linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mingming Cao <cmm@us.ibm.com>
To: Andrew Morton <akpm@osdl.org>
Cc: "Stephen C. Tweedie" <sct@redhat.com>,
	Ingo Molnar <mingo@elte.hu>, Lee Revell <rlrevell@joe-job.com>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	ext2-devel <ext2-devel@lists.sourceforge.net>
Subject: [PATCH] Reduce ext3 allocate-with-reservation lock latencies
Date: Thu, 28 Apr 2005 23:28:27 -0700	[thread overview]
Message-ID: <1114756107.2518.38.camel@localhost.localdomain> (raw)
In-Reply-To: <1114207837.7339.50.camel@localhost.localdomain>


Currently in ext3 block reservation code, the global filesystem
reservation tree lock (rsv_block) is hold during the process of
searching for a space to make a new reservation window, including while
scaning the block bitmap to verify if the avalible window has a free
block. Holding the lock during bitmap scan is unnecessary and could
possibly cause scalability issue and latency issues.

This patch trying to address this by dropping the lock before scan the
bitmap. Before that we need to reserve the open window in case someone
else is targetting at the same window. Question was should we reserve
the whole free reservable space or just the window size we need.
Reserve the whole free reservable space will possibly force other
threads which intended to do block allocation nearby move to another
block group(cause bad layout).  In this patch,  we just reserve the
desired size before drop the lock and scan the block bitmap. This patch
fixed a ext3 reservation latency issue seen on a cvs check out test.
Patch is tested with many fsx, tiobench, dbench and untar a kernel test.

Signed-Off-By: Mingming Cao <cmm@us.ibm.com>




---

 linux-2.6.11-ming/fs/ext3/balloc.c |  135 +++++++++++++++++--------------------
 linux-2.6.11-ming/fs/ext3/file.c   |    4 +
 2 files changed, 68 insertions(+), 71 deletions(-)

diff -puN fs/ext3/balloc.c~ext3-reduce-reservation-lock-latency fs/ext3/balloc.c
--- linux-2.6.11/fs/ext3/balloc.c~ext3-reduce-reservation-lock-latency	2005-04-22 10:49:04.000000000 -0700
+++ linux-2.6.11-ming/fs/ext3/balloc.c	2005-04-27 22:34:19.491331405 -0700
@@ -749,24 +749,24 @@ fail_access:
  * 	to find a free region that is of my size and has not
  * 	been reserved.
  *
- *	on succeed, it returns the reservation window to be appended to.
- *	failed, return NULL.
  */
-static struct ext3_reserve_window_node *find_next_reservable_window(
+static int find_next_reservable_window(
 				struct ext3_reserve_window_node *search_head,
-				unsigned long size, int *start_block,
+				struct ext3_reserve_window_node *my_rsv,
+				struct super_block * sb, int start_block,
 				int last_block)
 {
 	struct rb_node *next;
 	struct ext3_reserve_window_node *rsv, *prev;
 	int cur;
+	int size = my_rsv->rsv_goal_size;
 
 	/* TODO: make the start of the reservation window byte-aligned */
 	/* cur = *start_block & ~7;*/
-	cur = *start_block;
+	cur = start_block;
 	rsv = search_head;
 	if (!rsv)
-		return NULL;
+		return -1;
 
 	while (1) {
 		if (cur <= rsv->rsv_end)
@@ -782,7 +782,7 @@ static struct ext3_reserve_window_node *
 		 * space with expected-size (or more)...
 		 */
 		if (cur > last_block)
-			return NULL;		/* fail */
+			return -1;		/* fail */
 
 		prev = rsv;
 		next = rb_next(&rsv->rsv_node);
@@ -813,8 +813,26 @@ static struct ext3_reserve_window_node *
 	 * return the reservation window that we could append to.
 	 * succeed.
 	 */
-	*start_block = cur;
-	return prev;
+
+	if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
+		rsv_window_remove(sb, my_rsv);
+
+	/* let's book the whole avaliable window for now
+	 * we will check the
+	 * disk bitmap later and then, if there are free block
+	 * then we adjust the window size if the it's
+	 * larger than requested.
+	 * Otherwise, we will remove this node from the tree next time
+	 * call find_next_reservable_window.
+	 */
+	my_rsv->rsv_start = cur;
+	my_rsv->rsv_end = cur + size - 1;
+	my_rsv->rsv_alloc_hit = 0;
+
+	if (prev != my_rsv)
+		ext3_rsv_window_add(sb, my_rsv);
+
+	return 0;
 }
 
 /**
@@ -852,6 +870,7 @@ static struct ext3_reserve_window_node *
  *	@sb: the super block
  *	@group: the group we are trying to allocate in
  *	@bitmap_bh: the block group block bitmap
+ *
  */
 static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 		int goal, struct super_block *sb,
@@ -860,10 +879,10 @@ static int alloc_new_reservation(struct 
 	struct ext3_reserve_window_node *search_head;
 	int group_first_block, group_end_block, start_block;
 	int first_free_block;
-	int reservable_space_start;
-	struct ext3_reserve_window_node *prev_rsv;
 	struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
 	unsigned long size;
+	int ret;
+	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
 
 	group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
 				group * EXT3_BLOCKS_PER_GROUP(sb);
@@ -875,6 +894,7 @@ static int alloc_new_reservation(struct 
 		start_block = goal + group_first_block;
 
 	size = my_rsv->rsv_goal_size;
+
 	if (!rsv_is_empty(&my_rsv->rsv_window)) {
 		/*
 		 * if the old reservation is cross group boundary
@@ -908,6 +928,8 @@ static int alloc_new_reservation(struct 
 			my_rsv->rsv_goal_size= size;
 		}
 	}
+
+	spin_lock(rsv_lock);
 	/*
 	 * shift the search start to the window near the goal block
 	 */
@@ -921,11 +943,16 @@ static int alloc_new_reservation(struct 
 	 * need to check the bitmap after we found a reservable window.
 	 */
 retry:
-	prev_rsv = find_next_reservable_window(search_head, size,
-						&start_block, group_end_block);
-	if (prev_rsv == NULL)
-		goto failed;
-	reservable_space_start = start_block;
+	ret = find_next_reservable_window(search_head, my_rsv, sb,
+						start_block, group_end_block);
+
+	if (ret == -1) {
+		if (!rsv_is_empty(&my_rsv->rsv_window))
+			rsv_window_remove(sb, my_rsv);
+		spin_unlock(rsv_lock);
+		return -1;
+	}
+
 	/*
 	 * On success, find_next_reservable_window() returns the
 	 * reservation window where there is a reservable space after it.
@@ -937,8 +964,9 @@ retry:
 	 * block. Search start from the start block of the reservable space
 	 * we just found.
 	 */
+	spin_unlock(rsv_lock);
 	first_free_block = bitmap_search_next_usable_block(
-			reservable_space_start - group_first_block,
+			my_rsv->rsv_start - group_first_block,
 			bitmap_bh, group_end_block - group_first_block + 1);
 
 	if (first_free_block < 0) {
@@ -946,54 +974,30 @@ retry:
 		 * no free block left on the bitmap, no point
 		 * to reserve the space. return failed.
 		 */
-		goto failed;
+		spin_lock(rsv_lock);
+		if (!rsv_is_empty(&my_rsv->rsv_window))
+			rsv_window_remove(sb, my_rsv);
+		spin_unlock(rsv_lock);
+		return -1;		/* failed */
 	}
+
 	start_block = first_free_block + group_first_block;
 	/*
 	 * check if the first free block is within the
-	 * free space we just found
+	 * free space we just reserved
 	 */
-	if ((start_block >= reservable_space_start) &&
-	  (start_block < reservable_space_start + size))
-		goto found_rsv_window;
+	if ((start_block >= my_rsv->rsv_start) &&
+		  (start_block < my_rsv->rsv_end))
+		return 0;		/* succeed */
 	/*
 	 * if the first free bit we found is out of the reservable space
-	 * this means there is no free block on the reservable space
-	 * we should continue search for next reservable space,
+	 * continue search for next reservable space,
 	 * start from where the free block is,
 	 * we also shift the list head to where we stopped last time
 	 */
-	search_head = prev_rsv;
+	search_head = my_rsv;
+	spin_lock(rsv_lock);
 	goto retry;
-
-found_rsv_window:
-	/*
-	 * great! the reservable space contains some free blocks.
-	 * if the search returns that we should add the new
-	 * window just next to where the old window, we don't
- 	 * need to remove the old window first then add it to the
-	 * same place, just update the new start and new end.
-	 */
-	if (my_rsv != prev_rsv)  {
-		if (!rsv_is_empty(&my_rsv->rsv_window))
-			rsv_window_remove(sb, my_rsv);
-	}
-	my_rsv->rsv_start = reservable_space_start;
-	my_rsv->rsv_end = my_rsv->rsv_start + size - 1;
-	my_rsv->rsv_alloc_hit = 0;
-	if (my_rsv != prev_rsv)  {
-		ext3_rsv_window_add(sb, my_rsv);
-	}
-	return 0;		/* succeed */
-failed:
-	/*
-	 * failed to find a new reservation window in the current
-	 * group, remove the current(stale) reservation window
-	 * if there is any
-	 */
-	if (!rsv_is_empty(&my_rsv->rsv_window))
-		rsv_window_remove(sb, my_rsv);
-	return -1;		/* failed */
 }
 
 /*
@@ -1023,7 +1027,6 @@ ext3_try_to_allocate_with_rsv(struct sup
 			int goal, struct ext3_reserve_window_node * my_rsv,
 			int *errp)
 {
-	spinlock_t *rsv_lock;
 	unsigned long group_first_block;
 	int ret = 0;
 	int fatal;
@@ -1052,7 +1055,6 @@ ext3_try_to_allocate_with_rsv(struct sup
 		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL);
 		goto out;
 	}
-	rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
 	/*
 	 * goal is a group relative block number (if there is a goal)
 	 * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb)
@@ -1078,30 +1080,21 @@ ext3_try_to_allocate_with_rsv(struct sup
 	 * then we could go to allocate from the reservation window directly.
 	 */
 	while (1) {
-		struct ext3_reserve_window rsv_copy;
-
-		rsv_copy._rsv_start = my_rsv->rsv_start;
-		rsv_copy._rsv_end = my_rsv->rsv_end;
-
-		if (rsv_is_empty(&rsv_copy) || (ret < 0) ||
-			!goal_in_my_reservation(&rsv_copy, goal, group, sb)) {
-			spin_lock(rsv_lock);
+		if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
+			!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb)) {
 			ret = alloc_new_reservation(my_rsv, goal, sb,
 							group, bitmap_bh);
-			rsv_copy._rsv_start = my_rsv->rsv_start;
-			rsv_copy._rsv_end = my_rsv->rsv_end;
-			spin_unlock(rsv_lock);
 			if (ret < 0)
 				break;			/* failed */
 
-			if (!goal_in_my_reservation(&rsv_copy, goal, group, sb))
+			if (!goal_in_my_reservation(&my_rsv->rsv_window, goal, group, sb))
 				goal = -1;
 		}
-		if ((rsv_copy._rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
-		    || (rsv_copy._rsv_end < group_first_block))
+		if ((my_rsv->rsv_start >= group_first_block + EXT3_BLOCKS_PER_GROUP(sb))
+		    || (my_rsv->rsv_end < group_first_block))
 			BUG();
 		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal,
-					   &rsv_copy);
+					   &my_rsv->rsv_window);
 		if (ret >= 0) {
 			my_rsv->rsv_alloc_hit++;
 			break;				/* succeed */
diff -puN fs/ext3/file.c~ext3-reduce-reservation-lock-latency fs/ext3/file.c
--- linux-2.6.11/fs/ext3/file.c~ext3-reduce-reservation-lock-latency	2005-04-22 10:49:04.000000000 -0700
+++ linux-2.6.11-ming/fs/ext3/file.c	2005-04-26 11:09:56.000000000 -0700
@@ -36,7 +36,11 @@ static int ext3_release_file (struct ino
 	/* if we are the last writer on the inode, drop the block reservation */
 	if ((filp->f_mode & FMODE_WRITE) &&
 			(atomic_read(&inode->i_writecount) == 1))
+	{
+		down(&EXT3_I(inode)->truncate_sem);
 		ext3_discard_reservation(inode);
+		up(&EXT3_I(inode)->truncate_sem); 
+	}
 	if (is_dx(inode) && filp->private_data)
 		ext3_htree_free_dir_info(filp->private_data);
 

_



      parent reply	other threads:[~2005-04-29 16:42 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-04-05  3:51 ext3 allocate-with-reservation latencies Lee Revell
2005-04-05  4:13 ` Ingo Molnar
2005-04-05  6:10   ` Mingming Cao
2005-04-05 16:38     ` Lee Revell
2005-04-06  5:35   ` Mingming Cao
2005-04-06  9:51     ` Stephen C. Tweedie
2005-04-06 16:53       ` Mingming Cao
2005-04-06 18:22         ` Stephen C. Tweedie
2005-04-06 19:03           ` Mingming Cao
2005-04-07  8:14     ` Ingo Molnar
2005-04-07 13:08       ` Stephen C. Tweedie
2005-04-07 19:16         ` Mingming Cao
2005-04-07 23:37         ` Mingming Cao
2005-04-08 14:40           ` Stephen C. Tweedie
2005-04-08 16:06             ` Arjan van de Ven
2005-04-08 18:10             ` Mingming Cao
2005-04-08 18:12               ` Lee Revell
2005-04-11 11:48               ` Stephen C. Tweedie
2005-04-11 18:38                 ` Mingming Cao
2005-04-11 19:12                   ` Lee Revell
2005-04-11 19:22                     ` Lee Revell
2005-04-11 19:57                   ` Stephen C. Tweedie
2005-04-12  6:41                     ` Mingming Cao
2005-04-12 11:18                       ` Stephen C. Tweedie
2005-04-12 23:27                         ` Mingming Cao
2005-04-13 10:29                           ` Stephen C. Tweedie
     [not found]                             ` <1113597161.3899.80.camel@localhost.localdomain>
2005-04-18 18:00                               ` Stephen C. Tweedie
2005-04-18 21:56                                 ` [Ext2-devel] " Mingming Cao
2005-04-22 22:10                             ` [RFC][PATCH] Reduce ext3 allocate-with-reservation lock latencies Mingming Cao
2005-04-28  3:45                               ` Lee Revell
2005-04-28  7:37                                 ` Mingming Cao
2005-04-28 16:12                                   ` Lee Revell
2005-04-28 18:34                                     ` Mingming Cao
2005-04-29  6:18                                       ` Mingming Cao
2005-04-28 19:14                                 ` [RFC] Adding multiple block allocation to current ext3 Mingming Cao
2005-04-29 13:52                                   ` [Ext2-devel] [RFC] Adding multiple block allocation Suparna Bhattacharya
2005-04-29 17:10                                     ` Mingming Cao
2005-04-29 19:42                                       ` Mingming Cao
2005-04-29 20:57                                         ` Andrew Morton
2005-04-29 21:12                                           ` Mingming Cao
2005-04-29 21:34                                             ` Andrew Morton
2005-04-30 16:00                                       ` Suparna Bhattacharya
2005-04-29 18:45                                     ` Badari Pulavarty
2005-04-29 23:22                                       ` Mingming Cao
2005-04-30 16:10                                         ` Suparna Bhattacharya
2005-04-30 17:11                                           ` Suparna Bhattacharya
2005-04-30 18:07                                             ` Mingming Cao
2005-05-02  4:46                                               ` Suparna Bhattacharya
2005-04-30 16:52                                       ` Suparna Bhattacharya
2005-04-30  0:33                                     ` Mingming Cao
2005-04-30  0:44                                     ` Mingming Cao
2005-04-30 17:03                                       ` Suparna Bhattacharya
2006-01-10 23:26                                   ` [PATCH 0/5] multiple block allocation to current ext3 Mingming Cao
2006-01-11  5:25                                     ` Andrew Morton
2006-01-11 19:17                                       ` Mingming Cao
2006-01-11 19:43                                         ` Andrew Morton
2006-01-11 21:31                                           ` Mingming Cao
2006-01-14  1:12                                           ` Fall back io scheduler for 2.6.15? Mingming Cao
2006-01-14  1:49                                             ` Andrew Morton
2006-01-14  5:22                                               ` Dave Jones
2006-01-16  8:43                                               ` Jens Axboe
2006-01-16 19:45                                                 ` [PATCH] Fall back io scheduler ( Re: [Ext2-devel] Re: Fall back io scheduler for 2.6.15?) Mingming Cao
2006-01-16 19:49                                                   ` Jens Axboe
2006-01-16 19:57                                                     ` Mingming Cao
2006-01-19 19:37                                                 ` Fall back io scheduler for 2.6.15? Nate Diller
2006-01-20  8:10                                                   ` Jens Axboe
2006-01-16 19:38                                               ` [Ext2-devel] " Mingming Cao
2005-04-29  6:28                               ` Mingming Cao [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1114756107.2518.38.camel@localhost.localdomain \
    --to=cmm@us.ibm.com \
    --cc=akpm@osdl.org \
    --cc=ext2-devel@lists.sourceforge.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rlrevell@joe-job.com \
    --cc=sct@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).