All of lore.kernel.org
 help / color / mirror / Atom feed
From: Qu Wenruo <quwenruo@cn.fujitsu.com>
To: linux-btrfs@vger.kernel.org
Cc: quwenruo@cn.fujitsu.com
Subject: [PATCH v3 04/17] btrfs: Add threshold workqueue based on kernel workqueue
Date: Thu, 7 Nov 2013 13:51:54 +0800	[thread overview]
Message-ID: <1383803527-23736-5-git-send-email-quwenruo@cn.fujitsu.com> (raw)
In-Reply-To: <1383803527-23736-1-git-send-email-quwenruo@cn.fujitsu.com>

The original btrfs_workers has thresholding functions to dynamically
create or destroy kthreads.

Though there is no such function in kernel workqueue because the worker
is not created manually, we can still use the workqueue_set_max_active
to simulated the behavior, mainly to achieve a better HDD performance by
setting a high threshold on submit_workers.
(Sadly, no resource can be saved)

So in this patch, extra workqueue pending counters are introduced to
dynamically change the max active of each btrfs_workqueue_struct, hoping
to restore the behavior of the original thresholding function.

Also, workqueue_set_max_active use a mutex to protect workqueue_struct,
which is not meant to be called too frequently, so a new interval
mechanism is applied, that will only call workqueue_set_max_active after
a count of work is queued. Hoping to balance both the random and
sequence performance on HDD.

Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
Changelog:
v2->v3:
  - Add thresholding mechanism to simulate the old thresholding mechanism.
  - Will not enable thresholding when thresh is set to small value.
---
 fs/btrfs/async-thread.c | 131 ++++++++++++++++++++++++++++++++++++++++++++----
 fs/btrfs/async-thread.h |  24 ++++++++-
 2 files changed, 144 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 925aa6d..1fde6a2 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -31,6 +31,9 @@
 #define WORK_ORDER_DONE_BIT 2
 #define WORK_HIGH_PRIO_BIT 3
 
+#define NO_THRESHOLD (-1)
+#define DFT_THRESHOLD (32)
+
 /*
  * container for the kthread task pointer and the list of pending work
  * One of these is allocated per thread.
@@ -733,13 +736,31 @@ struct btrfs_workqueue_struct *btrfs_alloc_workqueue(char *name,
 						     char *ordered_name,
 						     char *high_name,
 						     int flags,
-						     int max_active)
+						     int max_active,
+						     int thresh)
 {
 	struct btrfs_workqueue_struct *ret = kzalloc(sizeof(*ret), GFP_NOFS);
-	flags |= WQ_UNBOUND;
 	if (unlikely(!ret))
 		return NULL;
-	ret->normal_wq = alloc_workqueue(name, flags, max_active);
+
+	ret->max_active = max_active;
+	ret->pending = 0;
+	/* default threshold */
+	if (thresh == 0)
+		thresh = DFT_THRESHOLD;
+	/* for low threshold, no threshold is a better choice*/
+	if (thresh < DFT_THRESHOLD) {
+		ret->current_max = max_active;
+		ret->thresh = NO_THRESHOLD;
+	} else {
+		ret->current_max = 1;
+		ret->thresh = thresh;
+	}
+	flags |= WQ_UNBOUND;
+
+	/* The max_active of workqueue will change betwen [1,max_active],
+	 * or just normal workqueue if NO_THRESHOLD is set*/
+	ret->normal_wq = alloc_workqueue(name, flags, ret->current_max);
 	if (unlikely(!ret->normal_wq)) {
 		kfree(ret);
 		return NULL;
@@ -757,7 +778,7 @@ struct btrfs_workqueue_struct *btrfs_alloc_workqueue(char *name,
 	if (high_name) {
 		ret->high_wq = alloc_workqueue(high_name,
 					       flags | WQ_HIGHPRI,
-					       max_active);
+					       ret->current_max);
 		if (unlikely(!ret->high_wq)) {
 			destroy_workqueue(ret->normal_wq);
 			if (ret->ordered_wq)
@@ -767,16 +788,101 @@ struct btrfs_workqueue_struct *btrfs_alloc_workqueue(char *name,
 		}
 	}
 
+	spin_lock_init(&ret->thresh_lock);
 	spin_lock_init(&ret->insert_lock);
 	return ret;
 }
 
+/*
+ * Hook for threshold which will be called in btrfs_queue_work.
+ * This hook WILL be called in IRQ context,
+ * so workqueue_set_max_active MUST NOT be called in this hook
+ */
+static inline void thresh_queue_hook(struct btrfs_workqueue_struct *wq)
+{
+	unsigned long flags;
+	/* Skip if no threshold is set */
+	if (wq->thresh == NO_THRESHOLD)
+		return;
+	/* Since the hook may be executed in IRQ handle, we need to
+	 * disable the IRQ */
+	spin_lock_irqsave(&wq->thresh_lock, flags);
+	wq->pending++;
+	spin_unlock_irqrestore(&wq->thresh_lock, flags);
+}
+
+/*
+ * Hook for threshold which will be called before executing the work,
+ * This hook is called in kthread content.
+ * So workqueue_set_max_active is called here.
+ */
+static inline void thresh_exec_hook(struct btrfs_workqueue_struct *wq)
+{
+	int new_max_active;
+	int need_change = 0;
+	unsigned long flags;
+
+	/* Skip if no threshold is set */
+	if (wq->thresh == NO_THRESHOLD)
+		return;
+
+	spin_lock_irqsave(&wq->thresh_lock, flags);
+	wq->pending--;
+	wq->count++;
+
+	/* Use 1/4 of the thresh as the interval to change the max active,
+	 * Too high value(like twice) will make the max active change too slow,
+	 * which will bring performance drop in random IO on HDD.
+	 * Too small value(like 1/8 or static value) will make
+	 * workqueue_set_max_active be called too frequently, where the mutex
+	 * can slowdown the whole work.*/
+	wq->count %= (wq->thresh / 4);
+	if (!wq->count)
+		goto  out;
+	/* Recaculate the current max */
+	new_max_active = wq->current_max;
+	if (wq->pending > wq->thresh)
+		new_max_active++;
+	if (wq->pending < wq->thresh / 2)
+		new_max_active--;
+	new_max_active = clamp_val(new_max_active, 1, wq->max_active);
+	if (new_max_active != wq->current_max)  {
+		need_change = 1;
+		wq->current_max = new_max_active;
+	}
+	/*
+	 * Workqueue is using mutex to set max active,
+	 * so we should not call it with a spinlock hold.
+	 *
+	 * Also workqueue_set_max_active is somewhat expensive,
+	 * we should not call it too frequently.
+	 */
+out:
+	spin_unlock_irqrestore(&wq->thresh_lock, flags);
+
+	if (need_change) {
+		workqueue_set_max_active(wq->normal_wq, wq->current_max);
+		if (wq->high_wq)
+			workqueue_set_max_active(wq->high_wq, wq->current_max);
+	}
+}
+
 static void normal_work_helper(struct work_struct *arg)
 {
 	struct btrfs_work_struct *work;
+	/*
+	 * Since some work may free the whole btrfs_work_struct in func,
+	 * we should not access the completion which may be freed.
+	 * But if it has ordered_func, that will be safe.
+	 */
+	int need_complete = 0;
 	work = container_of(arg, struct btrfs_work_struct, normal_work);
+	if (work->ordered_func)
+		need_complete = 1;
+	thresh_exec_hook(work->wq);
 	work->func(work);
-	complete(&work->normal_completion);
+	if (need_complete)
+		complete(&work->normal_completion);
 }
 
 static void ordered_work_helper(struct work_struct *arg)
@@ -798,9 +904,10 @@ void btrfs_init_work(struct btrfs_work_struct *work,
 	work->ordered_func = ordered_func;
 	work->ordered_free = ordered_free;
 	INIT_WORK(&work->normal_work, normal_work_helper);
-	if (work->ordered_func)
+	if (work->ordered_func) {
 		INIT_WORK(&work->ordered_work, ordered_work_helper);
-	init_completion(&work->normal_completion);
+		init_completion(&work->normal_completion);
+	}
 }
 
 void btrfs_queue_work(struct btrfs_workqueue_struct *wq,
@@ -808,10 +915,13 @@ void btrfs_queue_work(struct btrfs_workqueue_struct *wq,
 {
 	unsigned long flags;
 	struct workqueue_struct *dest_wq;
+	work->wq = wq;
 	if (work->high && wq->high_wq)
 		dest_wq = wq->high_wq;
 	else
 		dest_wq = wq->normal_wq;
+
+	thresh_queue_hook(wq);
 	spin_lock_irqsave(&wq->insert_lock, flags);
 	queue_work(dest_wq, &work->normal_work);
 	if (wq->ordered_wq && work->ordered_func)
@@ -830,7 +940,8 @@ void btrfs_destroy_workqueue(struct btrfs_workqueue_struct *wq)
 
 void btrfs_workqueue_set_max(struct btrfs_workqueue_struct *wq, int max)
 {
-	workqueue_set_max_active(wq->normal_wq, max);
-	if (wq->high_wq)
-		workqueue_set_max_active(wq->high_wq, max);
+	unsigned long flags;
+	spin_lock_irqsave(&wq->thresh_lock, flags);
+	wq->max_active = max;
+	spin_unlock_irqrestore(&wq->thresh_lock, flags);
 }
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 4863c38..44942b1 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -131,6 +131,26 @@ struct btrfs_workqueue_struct {
 	 * which will reduce the ordered_work waiting time and disk head moves.
 	 */
 	spinlock_t insert_lock;
+
+	/*
+	 * extra variants to implement threshold workqueue
+	 */
+	int pending;		/* How many works are pending */
+	int max_active;		/* Up limit of max running actives */
+	int current_max;	/* Current allowed number of running works */
+	int thresh;		/* Threshold,
+				   when pending > threshold, add current_max
+				   when pending < threshold/2, decrease
+				   current max. When set to 0, default value
+				   will be 32.
+				   Also, if thresh < 8, threshold will be
+				   disabled*/
+	unsigned int count;	/* Counter for preventing changing max active
+				   too frequently. When reaching 1.5 * thresh,
+				   then try to change current_max*/
+	spinlock_t thresh_lock; /* Used to lock the above threshold related
+				   variants. Not reuse the insert_lock to
+				   reduce race on the same lock*/
 };
 
 struct btrfs_work_struct {
@@ -142,6 +162,7 @@ struct btrfs_work_struct {
 	struct work_struct normal_work;
 	struct work_struct ordered_work;
 	struct completion normal_completion;
+	struct btrfs_workqueue_struct *wq;
 	int high;
 };
 
@@ -157,7 +178,8 @@ struct btrfs_workqueue_struct *btrfs_alloc_workqueue(char *name,
 						     char *ordered_name,
 						     char *high_name,
 						     int flags,
-						     int max_active);
+						     int max_active,
+						     int thresh);
 void btrfs_init_work(struct btrfs_work_struct *work,
 		     void (*func)(struct btrfs_work_struct *),
 		     void (*ordered_func)(struct btrfs_work_struct *),
-- 
1.8.4.2


  parent reply	other threads:[~2013-11-07  5:51 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-07  5:51 [PATCH v3 00/17] Replace btrfs_workers with kernel workqueue based btrfs_workqueue_struct Qu Wenruo
2013-11-07  5:51 ` [PATCH v3 01/17] btrfs: Cleanup the unused struct async_sched Qu Wenruo
2013-11-07 17:24   ` Josef Bacik
2013-11-07  5:51 ` [PATCH v3 02/17] btrfs: Added btrfs_workqueue_struct implemented ordered execution based on kernel workqueue Qu Wenruo
2013-11-07  9:33   ` Stefan Behrens
2013-11-07 16:05     ` David Sterba
2013-11-08  0:32       ` Qu Wenruo
2013-11-07 18:08   ` Josef Bacik
2013-11-07 18:09     ` Josef Bacik
2013-11-08  0:58       ` Qu Wenruo
2013-11-07  5:51 ` [PATCH v3 03/17] btrfs: Add high priority workqueue support for btrfs_workqueue_struct Qu Wenruo
2013-11-07 16:41   ` David Sterba
2013-11-08  0:53     ` Qu Wenruo
2013-11-12 16:59       ` David Sterba
2013-11-13  0:53         ` Qu Wenruo
2013-11-07  5:51 ` Qu Wenruo [this message]
2013-11-07  5:51 ` [PATCH v3 05/17] btrfs: Replace fs_info->workers with btrfs_workqueue Qu Wenruo
2013-11-07  5:51 ` [PATCH v3 06/17] btrfs: Replace fs_info->delalloc_workers " Qu Wenruo
2013-11-07  5:51 ` [PATCH v3 07/17] btrfs: Replace fs_info->submit_workers " Qu Wenruo
2013-11-07  5:51 ` [PATCH v3 08/17] btrfs: Replace fs_info->flush_workers " Qu Wenruo
2013-11-07  5:51 ` [PATCH v3 09/17] btrfs: Replace fs_info->endio_* workqueue " Qu Wenruo
2013-11-07  5:52 ` [PATCH v3 10/17] btrfs: Replace fs_info->rmw_workers " Qu Wenruo
2013-11-07  5:52 ` [PATCH v3 11/17] btrfs: Replace fs_info->cache_workers " Qu Wenruo
2013-11-07  5:52 ` [PATCH v3 12/17] btrfs: Replace fs_info->readahead_workers " Qu Wenruo
2013-11-07  5:52 ` [PATCH v3 13/17] btrfs: Replace fs_info->fixup_workers " Qu Wenruo
2013-11-07  5:52 ` [PATCH v3 14/17] btrfs: Replace fs_info->delayed_workers " Qu Wenruo
2013-11-07  5:52 ` [PATCH v3 15/17] btrfs: Replace fs_info->qgroup_rescan_worker " Qu Wenruo
2013-11-07  5:52 ` [PATCH v3 16/17] btrfs: Replace fs_info->scrub_* " Qu Wenruo
2013-11-07  5:52 ` [PATCH v3 17/17] btrfs: Cleanup the old btrfs_worker Qu Wenruo
2013-11-07 17:52 ` [PATCH v3 00/17] Replace btrfs_workers with kernel workqueue based btrfs_workqueue_struct David Sterba
2013-11-08  0:55   ` Qu Wenruo
2013-11-07 17:54 ` Chris Mason
2013-11-08  0:56   ` Qu Wenruo
2013-11-26  1:39   ` Qu Wenruo
2013-11-26  7:31     ` Liu Bo
2013-11-26  8:33       ` Qu Wenruo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1383803527-23736-5-git-send-email-quwenruo@cn.fujitsu.com \
    --to=quwenruo@cn.fujitsu.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.