linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] remove BKL from ext2_get_block() version 2
@ 2002-03-06  0:23 Dave Hansen
  2002-03-06  0:40 ` Alexander Viro
  2002-03-06  0:50 ` Alan Cox
  0 siblings, 2 replies; 8+ messages in thread
From: Dave Hansen @ 2002-03-06  0:23 UTC (permalink / raw)
  To: Marcelo Tosatti, Alan Cox; +Cc: linux-kernel, Alexander Viro

I posted the initial version of this last week.  There was no discussion about the patch itself.  I can only hope that no news is good news :)

My first version of the patch reinitialized i_meta_lock for every read_inode call.  This is not a correct way to do it.  However, 2.4 does not have the capability to have fs-specific init_inode_once() functions.  This probably meant altering the sb_ops structure if I wanted ext2_inode_info-specific initialization.  To make the patch less intrusive, I put i_meta_lock right into the inode structure.  I think that this is a good compromize between keeping ext2 code separated from VFS and patch simplicity.  The lock is now initialized once per inode in fs/inode.c's init_once(). 

I noticed the extra lock initializations because I was using lockmeter and kernprof while running dbench.  I was seeing a large throughput decrease when my patch was applied, and kernprof told me that a big chunk of CPU time was being spent in lockmeter's rwlock_alloc.  I have the feeling that I've hit a bottleneck in lockmeter's code spinning on locks which protect lockmeter's data structures.  

The patch is against 2.4.19-pre2.  The patch significantly lowers BKL contention (50%) on a 2-way PII-300 running dbench 4.  Dbench throughput is not significantly affected, but that is probably a function of my puny little machine more than the effectiveness of the patch.  I'll have some results on a much more beefy 8-way PIII tomorrow.  Earlier versions of the patch reduced BKL contention during dbench by 60% on the 8-way.  CPU utilization spinning on the BKL has been as high as 40%.

--
Dave Hansen
haveblue@us.ibm.com


diff -ur -X 2.5/exclude linux-2.4.19-pre2-clean/fs/ext2/balloc.c linux/fs/ext2/balloc.c
--- linux-2.4.19-pre2-clean/fs/ext2/balloc.c	Tue Mar  5 15:18:38 2002
+++ linux/fs/ext2/balloc.c	Tue Mar  5 15:16:57 2002
@@ -539,6 +539,7 @@
 	 */
 #ifdef EXT2_PREALLOCATE
 	/* Writer: ->i_prealloc* */
+	write_lock(&inode->i_meta_lock);
 	if (prealloc_count && !*prealloc_count) {
 		int	prealloc_goal;
 		unsigned long next_block = tmp + 1;
@@ -576,8 +577,8 @@
 		ext2_debug ("Preallocated a further %lu bits.\n",
 			       (k - 1));
 	}
+	write_unlock(&inode->i_meta_lock);
 #endif
-
 	j = tmp;
 
 	mark_buffer_dirty(bh);
diff -ur -X 2.5/exclude linux-2.4.19-pre2-clean/fs/ext2/inode.c linux/fs/ext2/inode.c
--- linux-2.4.19-pre2-clean/fs/ext2/inode.c	Tue Mar  5 15:18:38 2002
+++ linux/fs/ext2/inode.c	Tue Mar  5 15:16:57 2002
@@ -51,8 +51,6 @@
  */
 void ext2_delete_inode (struct inode * inode)
 {
-	lock_kernel();
-
 	if (is_bad_inode(inode) ||
 	    inode->i_ino == EXT2_ACL_IDX_INO ||
 	    inode->i_ino == EXT2_ACL_DATA_INO)
@@ -60,6 +58,8 @@
 	inode->u.ext2_i.i_dtime	= CURRENT_TIME;
 	mark_inode_dirty(inode);
 	ext2_update_inode(inode, IS_SYNC(inode));
+	
+	lock_kernel();
 	inode->i_size = 0;
 	if (inode->i_blocks)
 		ext2_truncate (inode);
@@ -68,24 +68,26 @@
 	unlock_kernel();
 	return;
 no_delete:
-	unlock_kernel();
 	clear_inode(inode);	/* We must guarantee clearing of inode... */
 }
 
 void ext2_discard_prealloc (struct inode * inode)
 {
 #ifdef EXT2_PREALLOCATE
-	lock_kernel();
+	write_lock(&inode->i_meta_lock);
 	/* Writer: ->i_prealloc* */
 	if (inode->u.ext2_i.i_prealloc_count) {
 		unsigned short total = inode->u.ext2_i.i_prealloc_count;
 		unsigned long block = inode->u.ext2_i.i_prealloc_block;
 		inode->u.ext2_i.i_prealloc_count = 0;
 		inode->u.ext2_i.i_prealloc_block = 0;
+		write_unlock(&inode->i_meta_lock);
 		/* Writer: end */
 		ext2_free_blocks (inode, block, total);
+	} else {
+		write_unlock(&inode->i_meta_lock);
 	}
-	unlock_kernel();
+		
 #endif
 }
 
@@ -99,6 +101,7 @@
 
 #ifdef EXT2_PREALLOCATE
 	/* Writer: ->i_prealloc* */
+	write_lock(&inode->i_meta_lock);
 	if (inode->u.ext2_i.i_prealloc_count &&
 	    (goal == inode->u.ext2_i.i_prealloc_block ||
 	     goal + 1 == inode->u.ext2_i.i_prealloc_block))
@@ -106,9 +109,11 @@
 		result = inode->u.ext2_i.i_prealloc_block++;
 		inode->u.ext2_i.i_prealloc_count--;
 		/* Writer: end */
+		write_unlock(&inode->i_meta_lock);
 		ext2_debug ("preallocation hit (%lu/%lu).\n",
 			    ++alloc_hits, ++alloc_attempts);
 	} else {
+		write_unlock(&inode->i_meta_lock);
 		ext2_discard_prealloc (inode);
 		ext2_debug ("preallocation miss (%lu/%lu).\n",
 			    alloc_hits, ++alloc_attempts);
@@ -253,9 +258,11 @@
 		if (!bh)
 			goto failure;
 		/* Reader: pointers */
+		read_lock(&inode->i_meta_lock);	
 		if (!verify_chain(chain, p))
 			goto changed;
 		add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
+		read_unlock(&inode->i_meta_lock);
 		/* Reader: end */
 		if (!p->key)
 			goto no_block;
@@ -263,6 +270,7 @@
 	return NULL;
 
 changed:
+	read_unlock(&inode->i_meta_lock);
 	*err = -EAGAIN;
 	goto no_block;
 failure:
@@ -328,6 +336,8 @@
 				 unsigned long *goal)
 {
 	/* Writer: ->i_next_alloc* */
+	
+	write_lock(&inode->i_meta_lock);
 	if (block == inode->u.ext2_i.i_next_alloc_block + 1) {
 		inode->u.ext2_i.i_next_alloc_block++;
 		inode->u.ext2_i.i_next_alloc_goal++;
@@ -343,9 +353,11 @@
 			*goal = inode->u.ext2_i.i_next_alloc_goal;
 		if (!*goal)
 			*goal = ext2_find_near(inode, partial);
+		write_unlock(&inode->i_meta_lock);
 		return 0;
 	}
 	/* Reader: end */
+	write_unlock(&inode->i_meta_lock);
 	return -EAGAIN;
 }
 
@@ -451,6 +463,7 @@
 
 	/* Verify that place we are splicing to is still there and vacant */
 
+	write_lock(&inode->i_meta_lock);
 	/* Writer: pointers, ->i_next_alloc* */
 	if (!verify_chain(chain, where-1) || *where->p)
 		/* Writer: end */
@@ -461,7 +474,7 @@
 	*where->p = where->key;
 	inode->u.ext2_i.i_next_alloc_block = block;
 	inode->u.ext2_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key);
-
+	write_unlock(&inode->i_meta_lock);
 	/* Writer: end */
 
 	/* We are done with atomic stuff, now do the rest of housekeeping */
@@ -484,6 +497,7 @@
 	return 0;
 
 changed:
+	write_unlock(&inode->i_meta_lock);
 	for (i = 1; i < num; i++)
 		bforget(where[i].bh);
 	for (i = 0; i < num; i++)
@@ -517,7 +531,6 @@
 	if (depth == 0)
 		goto out;
 
-	lock_kernel();
 reread:
 	partial = ext2_get_branch(inode, depth, offsets, chain, &err);
 
@@ -539,7 +552,6 @@
 			brelse(partial->bh);
 			partial--;
 		}
-		unlock_kernel();
 out:
 		return err;
 	}
@@ -1149,9 +1161,7 @@
 
 void ext2_write_inode (struct inode * inode, int wait)
 {
-	lock_kernel();
 	ext2_update_inode (inode, wait);
-	unlock_kernel();
 }
 
 int ext2_sync_inode (struct inode *inode)
diff -ur -X 2.5/exclude linux-2.4.19-pre2-clean/fs/inode.c linux/fs/inode.c
--- linux-2.4.19-pre2-clean/fs/inode.c	Fri Dec 21 09:41:55 2001
+++ linux/fs/inode.c	Fri Mar  1 14:47:32 2002
@@ -110,6 +110,7 @@
 		sema_init(&inode->i_sem, 1);
 		sema_init(&inode->i_zombie, 1);
 		spin_lock_init(&inode->i_data.i_shared_lock);
+		rwlock_init(&inode->i_meta_lock);
 	}
 }
 
diff -ur -X 2.5/exclude linux-2.4.19-pre2-clean/include/linux/fs.h linux/include/linux/fs.h
--- linux-2.4.19-pre2-clean/include/linux/fs.h	Tue Mar  5 15:19:42 2002
+++ linux/include/linux/fs.h	Tue Mar  5 15:17:47 2002
@@ -452,6 +452,7 @@
 	unsigned long		i_version;
 	struct semaphore	i_sem;
 	struct semaphore	i_zombie;
+	rwlock_t		i_meta_lock;
 	struct inode_operations	*i_op;
 	struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 	struct super_block	*i_sb;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] remove BKL from ext2_get_block() version 2
  2002-03-06  0:23 [PATCH] remove BKL from ext2_get_block() version 2 Dave Hansen
@ 2002-03-06  0:40 ` Alexander Viro
  2002-03-06  0:51   ` Dave Hansen
  2002-03-06  6:57   ` Dave Hansen
  2002-03-06  0:50 ` Alan Cox
  1 sibling, 2 replies; 8+ messages in thread
From: Alexander Viro @ 2002-03-06  0:40 UTC (permalink / raw)
  To: Dave Hansen; +Cc: Marcelo Tosatti, Alan Cox, linux-kernel



On Tue, 5 Mar 2002, Dave Hansen wrote:

First of all, learn to use fscking line breaks.

> I posted the initial version of this last week.  There was no discussion about the patch itself.  I can only hope that no news is good news :)
> 
> My first version of the patch reinitialized i_meta_lock for every read_inode call.  This is not a correct way to do it.  However, 2.4 does not have the capability to have fs-specific init_inode_once() functions.  This probably meant altering the sb_ops structure if I wanted ext2_inode_info-specific initialization.  To make the patch less intrusive, I put i_meta_lock right into the inode structure.  I think that this is a good compromize between keeping ext2 code separated from VFS and patch simplicity.  The lock is now initialized once per inode in fs/inode.c's init_once(). 

Denied.  You can trivially do that in ext2_read_inode() and ext2_new_inode().

> I noticed the extra lock initializations because I was using lockmeter and kernprof while running dbench.  I was seeing a large throughput decrease when my patch was applied, and kernprof told me that a big chunk of CPU time was being spent in lockmeter's rwlock_alloc.  I have the feeling that I've hit a bottleneck in lockmeter's code spinning on locks which protect lockmeter's data structures.  
> 
> The patch is against 2.4.19-pre2.  The patch significantly lowers BKL contention (50%) on a 2-way PII-300 running dbench 4.  Dbench throughput is not significantly affected, but that is probably a function of my puny little machine more than the effectiveness of the patch.  I'll have some results on a much more beefy 8-way PIII tomorrow.  Earlier versions of the patch reduced BKL contention during dbench by 60% on the 8-way.  CPU utilization spinning on the BKL has been as high as 40%.

ext2 patches _MUST_ get testing in 2.5 before they can go into 2.4.  At
the very least a month, preferably - two.  Until then consider them vetoed
for 2.4, no matter how BKL brigade feels about their crusade.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] remove BKL from ext2_get_block() version 2
  2002-03-06  0:23 [PATCH] remove BKL from ext2_get_block() version 2 Dave Hansen
  2002-03-06  0:40 ` Alexander Viro
@ 2002-03-06  0:50 ` Alan Cox
  2002-03-06  1:17   ` Dave Hansen
       [not found]   ` <20020306004432.GC5538@matchmail.com>
  1 sibling, 2 replies; 8+ messages in thread
From: Alan Cox @ 2002-03-06  0:50 UTC (permalink / raw)
  To: Dave Hansen; +Cc: Marcelo Tosatti, Alan Cox, linux-kernel, Alexander Viro

> The patch is against 2.4.19-pre2.  The patch significantly lowers BKL contention (50%) on a 2-way PII-300 running dbench 4.  Dbench throughput is not significantly affected, but that is probably a function of my puny little machine more than the effectiveness of the patch.  I'll have some results on a much more beefy 8-way PIII tomorrow.  Earlier versions of the patch reduced BKL contention during dbench by 60% on the 8-way.  CPU utilization spinning on the BKL has been as high as 40%.

I certainly am not interested in it. 2.4 locking changes for very big boxes
strike me as a little dangerous.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] remove BKL from ext2_get_block() version 2
  2002-03-06  0:40 ` Alexander Viro
@ 2002-03-06  0:51   ` Dave Hansen
  2002-03-06  1:19     ` Andrew Morton
  2002-03-06  6:57   ` Dave Hansen
  1 sibling, 1 reply; 8+ messages in thread
From: Dave Hansen @ 2002-03-06  0:51 UTC (permalink / raw)
  To: Alexander Viro; +Cc: linux-kernel

Alexander Viro wrote:
> Denied.  You can trivially do that in ext2_read_inode() and ext2_new_inode().
Good point.  That makes it much easier.

> ext2 patches _MUST_ get testing in 2.5 before they can go into 2.4.  At
> the very least a month, preferably - two.  Until then consider them vetoed
> for 2.4, no matter how BKL brigade feels about their crusade.
ChangeSet@1.290  2002-02-11 21:26:50-08:00  viro@psu.edu

So, it has been almost a month.  Do you plan to port these changes back 
to 2.4 yourself?



-- 
Dave Hansen
haveblue@us.ibm.com


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] remove BKL from ext2_get_block() version 2
  2002-03-06  0:50 ` Alan Cox
@ 2002-03-06  1:17   ` Dave Hansen
       [not found]   ` <20020306004432.GC5538@matchmail.com>
  1 sibling, 0 replies; 8+ messages in thread
From: Dave Hansen @ 2002-03-06  1:17 UTC (permalink / raw)
  To: Alan Cox; +Cc: linux-kernel, Alexander Viro

Alan Cox wrote:
> I certainly am not interested in it. 2.4 locking changes for very big boxes
> strike me as a little dangerous.

I think that I may have presented the patch in the wrong way.  The 
primary reason I'm doing this is BKL removal.  The ext2 code just 
happened to be one of the worst offenders that I'd run into.  I've been 
using the 8-way with dbench because it produces a the worst-case 
scenario I can think of.  I also like watching it compile kernels in 
just over a minute.  :)

This patch was also a backport of an Al Viro 2.5 change, so I consider 
it pretty safe.  Only time and testing can tell, but I've tested it 
about as much as I can.

All of this becomes pretty academic if Al decides that he will backport 
the 2.5 changes, which we all want to see.

-- 
Dave Hansen
haveblue@us.ibm.com


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] remove BKL from ext2_get_block() version 2
  2002-03-06  0:51   ` Dave Hansen
@ 2002-03-06  1:19     ` Andrew Morton
  0 siblings, 0 replies; 8+ messages in thread
From: Andrew Morton @ 2002-03-06  1:19 UTC (permalink / raw)
  To: Dave Hansen; +Cc: Alexander Viro, linux-kernel

Dave Hansen wrote:
> 
> Alexander Viro wrote:
> > Denied.  You can trivially do that in ext2_read_inode() and ext2_new_inode().
> Good point.  That makes it much easier.
> 
> > ext2 patches _MUST_ get testing in 2.5 before they can go into 2.4.  At
> > the very least a month, preferably - two.  Until then consider them vetoed
> > for 2.4, no matter how BKL brigade feels about their crusade.
> ChangeSet@1.290  2002-02-11 21:26:50-08:00  viro@psu.edu
> 
> So, it has been almost a month.  Do you plan to port these changes back
> to 2.4 yourself?
> 

2.5.5's ext2 has an SMP race, probably in get_block, which results in
the freeing of already-free buffers.   That has not yet been found and
fixed.

-

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] remove BKL from ext2_get_block() version 2
       [not found]   ` <20020306004432.GC5538@matchmail.com>
@ 2002-03-06  1:20     ` Dave Hansen
  0 siblings, 0 replies; 8+ messages in thread
From: Dave Hansen @ 2002-03-06  1:20 UTC (permalink / raw)
  To: Mike Fedyk; +Cc: linux-kernel

Mike Fedyk wrote:
 > Also, there seems to be a plan already in progress to do this on
 > 2.5.  BLK has been moved around in several filesystems to ease
 > transition to another lock.

As I indicated elsewhere in the thread, this _is_ a 2.5 backport
chosen because of the tremendous impact on BKL contention.

-- 
Dave Hansen
haveblue@us.ibm.com


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] remove BKL from ext2_get_block() version 2
  2002-03-06  0:40 ` Alexander Viro
  2002-03-06  0:51   ` Dave Hansen
@ 2002-03-06  6:57   ` Dave Hansen
  1 sibling, 0 replies; 8+ messages in thread
From: Dave Hansen @ 2002-03-06  6:57 UTC (permalink / raw)
  To: Alexander Viro; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 322 bytes --]

Alexander Viro wrote:
 > Denied.  You can trivially do that in ext2_read_inode() and
 > ext2_new_inode().

Is this the right way to do it?  I put the lock back in the 
ext2_inode_info structure, and moved the initialization from init_once() 
to ext2_read_inode() and ext2_new_inode().

-- 
Dave Hansen
haveblue@us.ibm.com

[-- Attachment #2: ext2_get_block-bkl-removal-2.4.19-pre2.3.patch --]
[-- Type: text/plain, Size: 6275 bytes --]

diff -ur -X 2.5/exclude linux-2.4.19-pre2-clean/fs/ext2/balloc.c linux/fs/ext2/balloc.c
--- linux-2.4.19-pre2-clean/fs/ext2/balloc.c	Tue Mar  5 15:18:38 2002
+++ linux/fs/ext2/balloc.c	Tue Mar  5 17:04:32 2002
@@ -539,6 +539,7 @@
 	 */
 #ifdef EXT2_PREALLOCATE
 	/* Writer: ->i_prealloc* */
+	write_lock(&inode->u.ext2_i.i_meta_lock);
 	if (prealloc_count && !*prealloc_count) {
 		int	prealloc_goal;
 		unsigned long next_block = tmp + 1;
@@ -576,8 +577,8 @@
 		ext2_debug ("Preallocated a further %lu bits.\n",
 			       (k - 1));
 	}
+	write_unlock(&inode->u.ext2_i.i_meta_lock);
 #endif
-
 	j = tmp;
 
 	mark_buffer_dirty(bh);
diff -ur -X 2.5/exclude linux-2.4.19-pre2-clean/fs/ext2/ialloc.c linux/fs/ext2/ialloc.c
--- linux-2.4.19-pre2-clean/fs/ext2/ialloc.c	Tue Mar  5 15:18:38 2002
+++ linux/fs/ext2/ialloc.c	Tue Mar  5 17:05:29 2002
@@ -392,6 +392,7 @@
 	inode->u.ext2_i.i_block_group = group;
 	if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL)
 		inode->i_flags |= S_SYNC;
+	rwlock_init(&inode->u.ext2_i.i_meta_lock);
 	insert_inode_hash(inode);
 	inode->i_generation = event++;
 	mark_inode_dirty(inode);
diff -ur -X 2.5/exclude linux-2.4.19-pre2-clean/fs/ext2/inode.c linux/fs/ext2/inode.c
--- linux-2.4.19-pre2-clean/fs/ext2/inode.c	Tue Mar  5 15:18:38 2002
+++ linux/fs/ext2/inode.c	Tue Mar  5 19:41:56 2002
@@ -51,8 +51,6 @@
  */
 void ext2_delete_inode (struct inode * inode)
 {
-	lock_kernel();
-
 	if (is_bad_inode(inode) ||
 	    inode->i_ino == EXT2_ACL_IDX_INO ||
 	    inode->i_ino == EXT2_ACL_DATA_INO)
@@ -60,6 +58,8 @@
 	inode->u.ext2_i.i_dtime	= CURRENT_TIME;
 	mark_inode_dirty(inode);
 	ext2_update_inode(inode, IS_SYNC(inode));
+	
+	lock_kernel();
 	inode->i_size = 0;
 	if (inode->i_blocks)
 		ext2_truncate (inode);
@@ -68,24 +68,26 @@
 	unlock_kernel();
 	return;
 no_delete:
-	unlock_kernel();
 	clear_inode(inode);	/* We must guarantee clearing of inode... */
 }
 
 void ext2_discard_prealloc (struct inode * inode)
 {
 #ifdef EXT2_PREALLOCATE
-	lock_kernel();
+	write_lock(&inode->u.ext2_i.i_meta_lock);
 	/* Writer: ->i_prealloc* */
 	if (inode->u.ext2_i.i_prealloc_count) {
 		unsigned short total = inode->u.ext2_i.i_prealloc_count;
 		unsigned long block = inode->u.ext2_i.i_prealloc_block;
 		inode->u.ext2_i.i_prealloc_count = 0;
 		inode->u.ext2_i.i_prealloc_block = 0;
+		write_unlock(&inode->u.ext2_i.i_meta_lock);
 		/* Writer: end */
 		ext2_free_blocks (inode, block, total);
+	} else {
+		write_unlock(&inode->u.ext2_i.i_meta_lock);
 	}
-	unlock_kernel();
+		
 #endif
 }
 
@@ -99,6 +101,7 @@
 
 #ifdef EXT2_PREALLOCATE
 	/* Writer: ->i_prealloc* */
+	write_lock(&inode->u.ext2_i.i_meta_lock);
 	if (inode->u.ext2_i.i_prealloc_count &&
 	    (goal == inode->u.ext2_i.i_prealloc_block ||
 	     goal + 1 == inode->u.ext2_i.i_prealloc_block))
@@ -106,9 +109,11 @@
 		result = inode->u.ext2_i.i_prealloc_block++;
 		inode->u.ext2_i.i_prealloc_count--;
 		/* Writer: end */
+		write_unlock(&inode->u.ext2_i.i_meta_lock);
 		ext2_debug ("preallocation hit (%lu/%lu).\n",
 			    ++alloc_hits, ++alloc_attempts);
 	} else {
+		write_unlock(&inode->u.ext2_i.i_meta_lock);
 		ext2_discard_prealloc (inode);
 		ext2_debug ("preallocation miss (%lu/%lu).\n",
 			    alloc_hits, ++alloc_attempts);
@@ -253,9 +258,11 @@
 		if (!bh)
 			goto failure;
 		/* Reader: pointers */
+		read_lock(&inode->u.ext2_i.i_meta_lock);	
 		if (!verify_chain(chain, p))
 			goto changed;
 		add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
+		read_unlock(&inode->u.ext2_i.i_meta_lock);
 		/* Reader: end */
 		if (!p->key)
 			goto no_block;
@@ -263,6 +270,7 @@
 	return NULL;
 
 changed:
+	read_unlock(&inode->u.ext2_i.i_meta_lock);
 	*err = -EAGAIN;
 	goto no_block;
 failure:
@@ -328,6 +336,8 @@
 				 unsigned long *goal)
 {
 	/* Writer: ->i_next_alloc* */
+	
+	write_lock(&inode->u.ext2_i.i_meta_lock);
 	if (block == inode->u.ext2_i.i_next_alloc_block + 1) {
 		inode->u.ext2_i.i_next_alloc_block++;
 		inode->u.ext2_i.i_next_alloc_goal++;
@@ -343,9 +353,11 @@
 			*goal = inode->u.ext2_i.i_next_alloc_goal;
 		if (!*goal)
 			*goal = ext2_find_near(inode, partial);
+		write_unlock(&inode->u.ext2_i.i_meta_lock);
 		return 0;
 	}
 	/* Reader: end */
+	write_unlock(&inode->u.ext2_i.i_meta_lock);
 	return -EAGAIN;
 }
 
@@ -451,6 +463,7 @@
 
 	/* Verify that place we are splicing to is still there and vacant */
 
+	write_lock(&inode->u.ext2_i.i_meta_lock);
 	/* Writer: pointers, ->i_next_alloc* */
 	if (!verify_chain(chain, where-1) || *where->p)
 		/* Writer: end */
@@ -461,7 +474,7 @@
 	*where->p = where->key;
 	inode->u.ext2_i.i_next_alloc_block = block;
 	inode->u.ext2_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key);
-
+	write_unlock(&inode->u.ext2_i.i_meta_lock);
 	/* Writer: end */
 
 	/* We are done with atomic stuff, now do the rest of housekeeping */
@@ -484,6 +497,7 @@
 	return 0;
 
 changed:
+	write_unlock(&inode->u.ext2_i.i_meta_lock);
 	for (i = 1; i < num; i++)
 		bforget(where[i].bh);
 	for (i = 0; i < num; i++)
@@ -517,7 +531,6 @@
 	if (depth == 0)
 		goto out;
 
-	lock_kernel();
 reread:
 	partial = ext2_get_branch(inode, depth, offsets, chain, &err);
 
@@ -539,7 +552,6 @@
 			brelse(partial->bh);
 			partial--;
 		}
-		unlock_kernel();
 out:
 		return err;
 	}
@@ -942,6 +954,7 @@
 	inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
 	inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
 	inode->u.ext2_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
+	rwlock_init(&inode->u.ext2_i.i_meta_lock);
 	/* We now have enough fields to check if the inode was active or not.
 	 * This is needed because nfsd might try to access dead inodes
 	 * the test is that same one that e2fsck uses
@@ -1149,9 +1162,7 @@
 
 void ext2_write_inode (struct inode * inode, int wait)
 {
-	lock_kernel();
 	ext2_update_inode (inode, wait);
-	unlock_kernel();
 }
 
 int ext2_sync_inode (struct inode *inode)
diff -ur -X 2.5/exclude linux-2.4.19-pre2-clean/include/linux/ext2_fs_i.h linux/include/linux/ext2_fs_i.h
--- linux-2.4.19-pre2-clean/include/linux/ext2_fs_i.h	Mon Sep 17 13:16:30 2001
+++ linux/include/linux/ext2_fs_i.h	Tue Mar  5 16:57:25 2002
@@ -36,6 +36,7 @@
 	__u32	i_prealloc_count;
 	__u32	i_dir_start_lookup;
 	int	i_new_inode:1;	/* Is a freshly allocated inode */
+	rwlock_t	i_meta_lock;
 };
 
 #endif	/* _LINUX_EXT2_FS_I */

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2002-03-06  6:57 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-03-06  0:23 [PATCH] remove BKL from ext2_get_block() version 2 Dave Hansen
2002-03-06  0:40 ` Alexander Viro
2002-03-06  0:51   ` Dave Hansen
2002-03-06  1:19     ` Andrew Morton
2002-03-06  6:57   ` Dave Hansen
2002-03-06  0:50 ` Alan Cox
2002-03-06  1:17   ` Dave Hansen
     [not found]   ` <20020306004432.GC5538@matchmail.com>
2002-03-06  1:20     ` Dave Hansen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).