linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH] minor optimization for EXT3
  2003-07-10 14:49 [PATCH] minor optimization for EXT3 Alex Tomas
@ 2003-07-10 11:20 ` Andrew Morton
  2003-07-10 15:33   ` bzzz
  2003-07-10 15:56   ` Alex Tomas
  0 siblings, 2 replies; 12+ messages in thread
From: Andrew Morton @ 2003-07-10 11:20 UTC (permalink / raw)
  To: Alex Tomas; +Cc: linux-kernel, ext2-devel

Alex Tomas <bzzz@tmi.comex.ru> wrote:
>
> Andreas Dilger proposed do not read inode's block during inode updating
>  if we have enough data to fill that block. here is the patch.

ok, thanks.  Could you please redo it for the current kernel?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] minor optimization for EXT3
@ 2003-07-10 14:49 Alex Tomas
  2003-07-10 11:20 ` Andrew Morton
  0 siblings, 1 reply; 12+ messages in thread
From: Alex Tomas @ 2003-07-10 14:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: ext2-devel, Andrew Morton, Alex Tomas


hi!

Andreas Dilger proposed do not read inode's block during inode updating
if we have enough data to fill that block. here is the patch.


ext3_get_inode_loc() read inode's block only if:
  1) this inode has no copy in memory
  2) inode's block has another valid inode(s)

this optimization allows to avoid needless I/O in two cases:
1) just allocated inode is first in the inode's block
2) kernel wants to write inode, but buffer in which inode
   belongs to gets freed by VM




diff -puN fs/ext3/inode.c~ext3-noread-inode fs/ext3/inode.c
--- linux-2.5.73/fs/ext3/inode.c~ext3-noread-inode	Thu Jul 10 12:03:52 2003
+++ linux-2.5.73-alexey/fs/ext3/inode.c	Thu Jul 10 14:40:51 2003
@@ -2286,7 +2286,7 @@ out_stop:
  * inode's underlying buffer_head on success. 
  */
 
-int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc)
+int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc, int in_mem)
 {
 	struct buffer_head *bh = 0;
 	unsigned long block;
@@ -2328,12 +2328,69 @@ int ext3_get_inode_loc (struct inode *in
 		EXT3_INODE_SIZE(inode->i_sb);
 	block = le32_to_cpu(gdp[desc].bg_inode_table) +
 		(offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb));
-	if (!(bh = sb_bread(inode->i_sb, block))) {
+	if (!(bh = sb_getblk(inode->i_sb, block))) {
 		ext3_error (inode->i_sb, "ext3_get_inode_loc",
 			    "unable to read inode block - "
 			    "inode=%lu, block=%lu", inode->i_ino, block);
 		goto bad_inode;
 	}
+	if (!buffer_uptodate(bh)) {
+		lock_buffer(bh);
+		if (buffer_uptodate(bh)) {
+			/* someone has already initialized buffer */
+			unlock_buffer(bh);
+			goto has_buffer;
+		}
+
+		/* we can't skip I/O if inode is on a disk only */
+		if (in_mem) {
+			struct buffer_head *bitmap_bh; 
+			int inodes_per_buffer;
+			int inode_offset, i;
+			int start;
+
+			/*
+			 * if this inode is only valid in buffer we need not I/O
+			 */
+			inodes_per_buffer = bh->b_size /
+				EXT3_INODE_SIZE(inode->i_sb);
+			inode_offset = ((inode->i_ino - 1) %
+					EXT3_INODES_PER_GROUP(inode->i_sb));
+			start = inode_offset & ~(inodes_per_buffer - 1);
+			bitmap_bh = read_inode_bitmap(inode->i_sb, block_group);
+			for (i = start; i < start + inodes_per_buffer; i++) {
+				if (i == inode_offset)
+					continue;
+				if (ext3_test_bit(i, bitmap_bh->b_data))
+					break;
+			}
+			brelse(bitmap_bh);
+			if (i == start + inodes_per_buffer) {
+				/* all inodes (but our) are free. so, we skip I/O */
+				memset(bh->b_data, 0, bh->b_size);
+				set_buffer_uptodate(bh);
+				unlock_buffer(bh);
+				goto has_buffer;
+			}
+		}
+
+		/*
+		 * No, there are another valid inodes in the buffer
+		 * so, to preserve them we have to read buffer from
+		 * the disk
+		 */
+		get_bh(bh);
+		bh->b_end_io = end_buffer_io_sync;
+		submit_bh(READ, bh);
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh)) {
+			ext3_error (inode->i_sb, "ext3_get_inode_loc",
+					"unable to read inode block - "
+					"inode=%lu, block=%lu", inode->i_ino, block);
+			goto bad_inode;
+		}
+	}
+  has_buffer:
 	offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1);
 
 	iloc->bh = bh;
@@ -2376,7 +2433,7 @@ void ext3_read_inode(struct inode * inod
 	ei->i_acl = EXT3_ACL_NOT_CACHED;
 	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
 #endif
-	if (ext3_get_inode_loc(inode, &iloc))
+	if (ext3_get_inode_loc(inode, &iloc, 0))
 		goto bad_inode;
 	bh = iloc.bh;
 	raw_inode = iloc.raw_inode;
@@ -2781,7 +2838,7 @@ ext3_reserve_inode_write(handle_t *handl
 {
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, iloc);
+		err = ext3_get_inode_loc(inode, iloc, 1);
 		if (!err) {
 			BUFFER_TRACE(iloc->bh, "get_write_access");
 			err = ext3_journal_get_write_access(handle, iloc->bh);
@@ -2879,7 +2936,7 @@ ext3_pin_inode(handle_t *handle, struct 
 
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, &iloc);
+		err = ext3_get_inode_loc(inode, &iloc, 1);
 		if (!err) {
 			BUFFER_TRACE(iloc.bh, "get_write_access");
 			err = journal_get_write_access(handle, iloc.bh);
diff -puN fs/ext3/ialloc.c~ext3-noread-inode fs/ext3/ialloc.c
--- linux-2.5.73/fs/ext3/ialloc.c~ext3-noread-inode	Thu Jul 10 13:05:37 2003
+++ linux-2.5.73-alexey/fs/ext3/ialloc.c	Thu Jul 10 13:06:12 2003
@@ -50,7 +50,7 @@
  *
  * Return buffer_head of bitmap on success or NULL.
  */
-static struct buffer_head *
+struct buffer_head *
 read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 {
 	struct ext3_group_desc *desc;
diff -puN include/linux/ext3_fs.h~ext3-noread-inode include/linux/ext3_fs.h
--- linux-2.5.73/include/linux/ext3_fs.h~ext3-noread-inode	Thu Jul 10 13:41:59 2003
+++ linux-2.5.73-alexey/include/linux/ext3_fs.h	Thu Jul 10 14:40:13 2003
@@ -717,6 +717,8 @@ extern unsigned long ext3_count_free_ino
 extern unsigned long ext3_count_dirs (struct super_block *);
 extern void ext3_check_inodes_bitmap (struct super_block *);
 extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
+extern struct buffer_head * read_inode_bitmap(struct super_block *, unsigned long);
+
 
 
 /* inode.c */
@@ -724,7 +726,7 @@ extern int ext3_forget(handle_t *, int, 
 extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
 extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
 
-extern int  ext3_get_inode_loc (struct inode *, struct ext3_iloc *);
+extern int  ext3_get_inode_loc (struct inode *, struct ext3_iloc *, int);
 extern void ext3_read_inode (struct inode *);
 extern void ext3_write_inode (struct inode *, int);
 extern int  ext3_setattr (struct dentry *, struct iattr *);

_


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] minor optimization for EXT3
  2003-07-10 11:20 ` Andrew Morton
@ 2003-07-10 15:33   ` bzzz
  2003-07-10 15:51     ` Andrew Morton
  2003-07-10 15:56   ` Alex Tomas
  1 sibling, 1 reply; 12+ messages in thread
From: bzzz @ 2003-07-10 15:33 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alex Tomas, linux-kernel, ext2-devel

>>>>> Andrew Morton (AM) writes:

 AM> Alex Tomas <bzzz@tmi.comex.ru> wrote:
 >> 
 >> Andreas Dilger proposed do not read inode's block during inode updating
 >> if we have enough data to fill that block. here is the patch.

 AM> ok, thanks.  Could you please redo it for the current kernel?

hmmm. it was against 2.5.72. I just tried it on 2.5.74 and all is OK.



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] minor optimization for EXT3
  2003-07-10 15:33   ` bzzz
@ 2003-07-10 15:51     ` Andrew Morton
  2003-07-10 20:46       ` Alex Tomas
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2003-07-10 15:51 UTC (permalink / raw)
  To: bzzz; +Cc: linux-kernel, ext2-devel

bzzz@tmi.comex.ru wrote:
>
> >>>>> Andrew Morton (AM) writes:
> 
>  AM> Alex Tomas <bzzz@tmi.comex.ru> wrote:
>  >> 
>  >> Andreas Dilger proposed do not read inode's block during inode updating
>  >> if we have enough data to fill that block. here is the patch.
> 
>  AM> ok, thanks.  Could you please redo it for the current kernel?
> 
> hmmm. it was against 2.5.72. I just tried it on 2.5.74 and all is OK.
> 

2.5.74 is some crufty ancient old thing.

ext3_read_inode() got reorganised.  Your latest patch will not apply to
current kernels.

The diff for the current devel kernel is always available at
http://www.kernel.org/pub/linux/kernel/v2.5/testing/cset/

The "gzipped full patch".  You should use that when generating and testing
changess.

There is even a nice patch-script for it:

	linus-patch http://www.kernel.org/pub/linux/kernel/v2.5/testing/cset/cset-20030710_0516.txt.gz




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] minor optimization for EXT3
  2003-07-10 11:20 ` Andrew Morton
  2003-07-10 15:33   ` bzzz
@ 2003-07-10 15:56   ` Alex Tomas
  1 sibling, 0 replies; 12+ messages in thread
From: Alex Tomas @ 2003-07-10 15:56 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alex Tomas, linux-kernel, ext2-devel


here is more optimized vertion of the patch. if inode bitmap isn't in
cache then we do not try to use it and read inode from disk to
prevent double I/O.

patch is against 2.5.7[34]


ext3_get_inode_loc() read inode's block only if:
  1) this inode has no copy in memory
  2) inode's block has another valid inode(s)

this optimization allows to avoid needless I/O in two cases:
1) just allocated inode is first valid in the inode's block
2) kernel wants to write inode, but buffer in which inode
   belongs to gets freed by VM




diff -puN fs/ext3/inode.c~ext3-noread-inode fs/ext3/inode.c
--- linux-2.5.73/fs/ext3/inode.c~ext3-noread-inode	Thu Jul 10 12:03:52 2003
+++ linux-2.5.73-alexey/fs/ext3/inode.c	Thu Jul 10 15:52:59 2003
@@ -2286,7 +2286,7 @@ out_stop:
  * inode's underlying buffer_head on success. 
  */
 
-int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc)
+int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc, int in_mem)
 {
 	struct buffer_head *bh = 0;
 	unsigned long block;
@@ -2328,12 +2328,88 @@ int ext3_get_inode_loc (struct inode *in
 		EXT3_INODE_SIZE(inode->i_sb);
 	block = le32_to_cpu(gdp[desc].bg_inode_table) +
 		(offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb));
-	if (!(bh = sb_bread(inode->i_sb, block))) {
+	if (!(bh = sb_getblk(inode->i_sb, block))) {
 		ext3_error (inode->i_sb, "ext3_get_inode_loc",
 			    "unable to read inode block - "
 			    "inode=%lu, block=%lu", inode->i_ino, block);
 		goto bad_inode;
 	}
+	if (!buffer_uptodate(bh)) {
+		lock_buffer(bh);
+		if (buffer_uptodate(bh)) {
+			/* someone has already initialized buffer */
+			unlock_buffer(bh);
+			goto has_buffer;
+		}
+
+		/* we can't skip I/O if inode is on a disk only */
+		if (in_mem) {
+			struct buffer_head *bitmap_bh; 
+			struct ext3_group_desc *desc;
+			int inodes_per_buffer;
+			int inode_offset, i;
+			int start;
+
+			/*
+			 * if this inode is only valid in buffer we need not I/O
+			 */
+			inodes_per_buffer = bh->b_size /
+				EXT3_INODE_SIZE(inode->i_sb);
+			inode_offset = ((inode->i_ino - 1) %
+					EXT3_INODES_PER_GROUP(inode->i_sb));
+			start = inode_offset & ~(inodes_per_buffer - 1);
+
+			/* check is inode bitmap is in cache? */
+			desc = ext3_get_group_desc(inode->i_sb, block_group, NULL);
+			if (!desc)
+				goto make_io;
+
+			bitmap_bh = sb_getblk(inode->i_sb, le32_to_cpu(desc->bg_inode_bitmap));
+			if (!bitmap_bh)
+				goto make_io;
+
+			/* 
+			 * if inode bitmap isn't in cache then we may end up by 2 reads
+			 * instead of 1 read before optimizing. skip it
+			 */
+			if (!buffer_uptodate(bitmap_bh)) {
+				brelse(bitmap_bh);
+				goto make_io;
+			}
+			for (i = start; i < start + inodes_per_buffer; i++) {
+				if (i == inode_offset)
+					continue;
+				if (ext3_test_bit(i, bitmap_bh->b_data))
+					break;
+			}
+			brelse(bitmap_bh);
+			if (i == start + inodes_per_buffer) {
+				/* all inodes (but our) are free. so, we skip I/O */
+				memset(bh->b_data, 0, bh->b_size);
+				set_buffer_uptodate(bh);
+				unlock_buffer(bh);
+				goto has_buffer;
+			}
+		}
+
+make_io:
+		/*
+		 * No, there are another valid inodes in the buffer
+		 * so, to preserve them we have to read buffer from
+		 * the disk
+		 */
+		get_bh(bh);
+		bh->b_end_io = end_buffer_io_sync;
+		submit_bh(READ, bh);
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh)) {
+			ext3_error (inode->i_sb, "ext3_get_inode_loc",
+					"unable to read inode block - "
+					"inode=%lu, block=%lu", inode->i_ino, block);
+			goto bad_inode;
+		}
+	}
+  has_buffer:
 	offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1);
 
 	iloc->bh = bh;
@@ -2376,7 +2452,7 @@ void ext3_read_inode(struct inode * inod
 	ei->i_acl = EXT3_ACL_NOT_CACHED;
 	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
 #endif
-	if (ext3_get_inode_loc(inode, &iloc))
+	if (ext3_get_inode_loc(inode, &iloc, 0))
 		goto bad_inode;
 	bh = iloc.bh;
 	raw_inode = iloc.raw_inode;
@@ -2781,7 +2857,7 @@ ext3_reserve_inode_write(handle_t *handl
 {
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, iloc);
+		err = ext3_get_inode_loc(inode, iloc, 1);
 		if (!err) {
 			BUFFER_TRACE(iloc->bh, "get_write_access");
 			err = ext3_journal_get_write_access(handle, iloc->bh);
@@ -2879,7 +2955,7 @@ ext3_pin_inode(handle_t *handle, struct 
 
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, &iloc);
+		err = ext3_get_inode_loc(inode, &iloc, 1);
 		if (!err) {
 			BUFFER_TRACE(iloc.bh, "get_write_access");
 			err = journal_get_write_access(handle, iloc.bh);
diff -puN fs/ext3/ialloc.c~ext3-noread-inode fs/ext3/ialloc.c
--- linux-2.5.73/fs/ext3/ialloc.c~ext3-noread-inode	Thu Jul 10 13:05:37 2003
+++ linux-2.5.73-alexey/fs/ext3/ialloc.c	Thu Jul 10 13:06:12 2003
@@ -50,7 +50,7 @@
  *
  * Return buffer_head of bitmap on success or NULL.
  */
-static struct buffer_head *
+struct buffer_head *
 read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 {
 	struct ext3_group_desc *desc;
diff -puN include/linux/ext3_fs.h~ext3-noread-inode include/linux/ext3_fs.h
--- linux-2.5.73/include/linux/ext3_fs.h~ext3-noread-inode	Thu Jul 10 13:41:59 2003
+++ linux-2.5.73-alexey/include/linux/ext3_fs.h	Thu Jul 10 14:40:13 2003
@@ -717,6 +717,8 @@ extern unsigned long ext3_count_free_ino
 extern unsigned long ext3_count_dirs (struct super_block *);
 extern void ext3_check_inodes_bitmap (struct super_block *);
 extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
+extern struct buffer_head * read_inode_bitmap(struct super_block *, unsigned long);
+
 
 
 /* inode.c */
@@ -724,7 +726,7 @@ extern int ext3_forget(handle_t *, int, 
 extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
 extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
 
-extern int  ext3_get_inode_loc (struct inode *, struct ext3_iloc *);
+extern int  ext3_get_inode_loc (struct inode *, struct ext3_iloc *, int);
 extern void ext3_read_inode (struct inode *);
 extern void ext3_write_inode (struct inode *, int);
 extern int  ext3_setattr (struct dentry *, struct iattr *);

_


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] minor optimization for EXT3
  2003-07-10 20:46       ` Alex Tomas
@ 2003-07-10 17:01         ` Andrew Morton
  2003-07-10 21:09           ` Alex Tomas
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2003-07-10 17:01 UTC (permalink / raw)
  To: Alex Tomas; +Cc: linux-kernel, ext2-devel

Alex Tomas <bzzz@tmi.comex.ru> wrote:
>
> OK. fixed version:

Looks nice.  Now, Andreas did mention a while back that the locking rework
added an additional complexity to this optimization.  Perhaps he can remind
us of the details there?


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] minor optimization for EXT3
  2003-07-10 21:09           ` Alex Tomas
@ 2003-07-10 19:14             ` Andreas Dilger
  0 siblings, 0 replies; 12+ messages in thread
From: Andreas Dilger @ 2003-07-10 19:14 UTC (permalink / raw)
  To: Alex Tomas; +Cc: Andrew Morton, linux-kernel, ext2-devel

On Jul 10, 2003  21:09 +0000, Alex Tomas wrote:
> >>>>> Andrew Morton (AM) writes:
> 
>  AM> Alex Tomas <bzzz@tmi.comex.ru> wrote:
>  >> 
>  >> OK. fixed version:
> 
>  AM> Looks nice.  Now, Andreas did mention a while back that the locking
>  AM> rework added an additional complexity to this optimization.  Perhaps
>  AM> he can remind us of the details there?
> 
> he meant than 2.5 don't use lock_sb() for inode allocation. this patch is
> safe from this point of view.

I sent a private email to Alex mentioning this also, before I noticed this
patch was posted here also.  Basically, the issue is that the inode selection
in ext3_new_inode() is using the atomic bitops to pick inodes which are free.

The itable reading code locks the buffer head, and then checks the bitmap,
but there is nothing to prevent another thread from marking another inode
for that itable block in-use while the first thread is still checking the
bitmap.  That would prevent the no-read optimization from happening, but
would not otherwise cause an error.

I think the race window is not huge, between the ext3_set_bit_atomic()
near the start of ext3_new_inode(), and the ext3_mark_inode_dirty() at the
end.  It could be made a lot smaller by moving the ext3_mark_inode_dirty()
call up, and splitting it into ext3_get_inode_iloc() and ext3_mark_iloc_dirty()
so that we can grab the itable buffer lock and decide whether we want to
read it or zero it, without having any unnecessary blocking in between.

Alternately, if we wanted to eliminate the race window entirely, we could
do the ext3_get_inode_iloc() after we find a candidate inode/itable block
with ext3_find_first_zero_bit(), but before we call ext3_set_bit_atomic().
That means we would grab the buffer lock and (maybe) zero the itable block
under lock before any thread could set a bit in the bitmap for that itable
block.  This isn't a scalability issue, since we essentially get one lock
per 32 inodes, and even though we are "blocking" other threads we are in
fact speeding things up because we won't be doing a read.

Cheers, Andreas
--
Andreas Dilger
http://sourceforge.net/projects/ext2resize/
http://www-mddsp.enel.ucalgary.ca/People/adilger/


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] minor optimization for EXT3
  2003-07-10 15:51     ` Andrew Morton
@ 2003-07-10 20:46       ` Alex Tomas
  2003-07-10 17:01         ` Andrew Morton
  0 siblings, 1 reply; 12+ messages in thread
From: Alex Tomas @ 2003-07-10 20:46 UTC (permalink / raw)
  To: Andrew Morton; +Cc: bzzz, linux-kernel, ext2-devel

>>>>> Andrew Morton (AM) writes:


 AM> ext3_read_inode() got reorganised.  Your latest patch will not apply to
 AM> current kernels.

OK. fixed version:



ext3_get_inode_loc() read inode's block only if:
  1) this inode has no copy in memory
  2) inode's block has another valid inode(s)

this optimization allows to avoid needless I/O in two cases:
1) just allocated inode is first valid in the inode's block
2) kernel wants to write inode, but buffer in which inode
   belongs to gets freed by VM




diff -puN fs/ext3/inode.c~ext3-noread-inode fs/ext3/inode.c
--- linux-2.5.74/fs/ext3/inode.c~ext3-noread-inode	Thu Jul 10 20:05:25 2003
+++ linux-2.5.74-alexey/fs/ext3/inode.c	Thu Jul 10 20:25:24 2003
@@ -2339,24 +2339,106 @@ static unsigned long ext3_get_inode_bloc
 /* 
  * ext3_get_inode_loc returns with an extra refcount against the
  * inode's underlying buffer_head on success. 
+ * in_mem arg enables optimization
  */
 
-int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc)
+int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc, int in_mem)
 {
 	unsigned long block;
-
+	struct buffer_head *bh;
+	
 	block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc);
-	if (block) {
-		struct buffer_head *bh = sb_bread(inode->i_sb, block);
-		if (bh) {
-			iloc->bh = bh;
-			return 0;
-		}
+	if (!block)
+		return -EIO;
+
+	bh = sb_getblk(inode->i_sb, block);
+	if (!bh) {
 		ext3_error (inode->i_sb, "ext3_get_inode_loc",
-			    "unable to read inode block - "
-			    "inode=%lu, block=%lu", inode->i_ino, block);
+				"unable to read inode block - "
+				"inode=%lu, block=%lu", inode->i_ino, block);
+		return -EIO;
+	}
+	if (!buffer_uptodate(bh)) {
+		lock_buffer(bh);
+		if (buffer_uptodate(bh)) {
+			/* someone has already initialized buffer */
+			unlock_buffer(bh);
+			goto has_buffer;
+		}
+
+		/* we can't skip I/O if inode is on a disk only */
+		if (in_mem) {
+			struct buffer_head *bitmap_bh; 
+			struct ext3_group_desc *desc;
+			int inodes_per_buffer;
+			int inode_offset, i;
+			int block_group;
+			int start;
+
+			/*
+			 * if this inode is only valid in buffer we need not I/O
+			 */
+			block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb);
+			inodes_per_buffer = bh->b_size /
+				EXT3_INODE_SIZE(inode->i_sb);
+			inode_offset = ((inode->i_ino - 1) %
+					EXT3_INODES_PER_GROUP(inode->i_sb));
+			start = inode_offset & ~(inodes_per_buffer - 1);
+
+			/* check is inode bitmap is in cache? */
+			desc = ext3_get_group_desc(inode->i_sb, block_group, NULL);
+			if (!desc)
+				goto make_io;
+
+			bitmap_bh = sb_getblk(inode->i_sb, le32_to_cpu(desc->bg_inode_bitmap));
+			if (!bitmap_bh)
+				goto make_io;
+
+			/* 
+			 * if inode bitmap isn't in cache then we may end up by 2 reads
+			 * instead of 1 read before optimizing. skip it
+			 */
+			if (!buffer_uptodate(bitmap_bh)) {
+				brelse(bitmap_bh);
+				goto make_io;
+			}
+			for (i = start; i < start + inodes_per_buffer; i++) {
+				if (i == inode_offset)
+					continue;
+				if (ext3_test_bit(i, bitmap_bh->b_data))
+					break;
+			}
+			brelse(bitmap_bh);
+			if (i == start + inodes_per_buffer) {
+				/* all inodes (but our) are free. so, we skip I/O */
+				memset(bh->b_data, 0, bh->b_size);
+				set_buffer_uptodate(bh);
+				unlock_buffer(bh);
+				goto has_buffer;
+			}
+		}
+
+make_io:
+		/*
+		 * No, there are another valid inodes in the buffer
+		 * so, to preserve them we have to read buffer from
+		 * the disk
+		 */
+		get_bh(bh);
+		bh->b_end_io = end_buffer_io_sync;
+		submit_bh(READ, bh);
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh)) {
+			ext3_error (inode->i_sb, "ext3_get_inode_loc",
+					"unable to read inode block - "
+					"inode=%lu, block=%lu", inode->i_ino, block);
+			brelse(bh);
+			return -EIO;
+		}
 	}
-	return -EIO;
+has_buffer:
+	iloc->bh = bh;
+	return 0;
 }
 
 void ext3_set_inode_flags(struct inode *inode)
@@ -2389,7 +2471,7 @@ void ext3_read_inode(struct inode * inod
 	ei->i_acl = EXT3_ACL_NOT_CACHED;
 	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
 #endif
-	if (ext3_get_inode_loc(inode, &iloc))
+	if (ext3_get_inode_loc(inode, &iloc, 0))
 		goto bad_inode;
 	bh = iloc.bh;
 	raw_inode = ext3_raw_inode(&iloc);
@@ -2793,7 +2875,7 @@ ext3_reserve_inode_write(handle_t *handl
 {
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, iloc);
+		err = ext3_get_inode_loc(inode, iloc, 1);
 		if (!err) {
 			BUFFER_TRACE(iloc->bh, "get_write_access");
 			err = ext3_journal_get_write_access(handle, iloc->bh);
@@ -2891,7 +2973,7 @@ ext3_pin_inode(handle_t *handle, struct 
 
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, &iloc);
+		err = ext3_get_inode_loc(inode, &iloc, 1);
 		if (!err) {
 			BUFFER_TRACE(iloc.bh, "get_write_access");
 			err = journal_get_write_access(handle, iloc.bh);
diff -puN include/linux/ext3_fs.h~ext3-noread-inode include/linux/ext3_fs.h
--- linux-2.5.74/include/linux/ext3_fs.h~ext3-noread-inode	Thu Jul 10 20:24:39 2003
+++ linux-2.5.74-alexey/include/linux/ext3_fs.h	Thu Jul 10 20:25:08 2003
@@ -721,7 +721,7 @@ extern int ext3_forget(handle_t *, int, 
 extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
 extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
 
-extern int  ext3_get_inode_loc (struct inode *, struct ext3_iloc *);
+extern int  ext3_get_inode_loc (struct inode *, struct ext3_iloc *, int);
 extern void ext3_read_inode (struct inode *);
 extern void ext3_write_inode (struct inode *, int);
 extern int  ext3_setattr (struct dentry *, struct iattr *);

_





^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] minor optimization for EXT3
  2003-07-10 17:01         ` Andrew Morton
@ 2003-07-10 21:09           ` Alex Tomas
  2003-07-10 19:14             ` Andreas Dilger
  0 siblings, 1 reply; 12+ messages in thread
From: Alex Tomas @ 2003-07-10 21:09 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alex Tomas, linux-kernel, ext2-devel

>>>>> Andrew Morton (AM) writes:

 AM> Alex Tomas <bzzz@tmi.comex.ru> wrote:
 >> 
 >> OK. fixed version:

 AM> Looks nice.  Now, Andreas did mention a while back that the locking rework
 AM> added an additional complexity to this optimization.  Perhaps he can remind
 AM> us of the details there?

he meant than 2.5 don't use lock_sb() for inode allocation. this patch is safe
from this point of view.



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] minor optimization for EXT3
  2003-07-10 12:43     ` Andi Kleen
@ 2003-07-10 16:51       ` Alex Tomas
  2003-07-10 12:55         ` Andi Kleen
  0 siblings, 1 reply; 12+ messages in thread
From: Alex Tomas @ 2003-07-10 16:51 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Alex Tomas, akpm, linux-kernel, ext2-devel

>>>>> Andi Kleen (AK) writes:

 AK> Alex Tomas <bzzz@tmi.comex.ru> writes:
 >> +			if (i == start + inodes_per_buffer) {
 >> +				/* all inodes (but our) are free. so, we skip I/O */

 AK> Won't this make undeletion a lot harder? Deleted inodes will now be trashed
 AK> at will, so you cannot use their contents anymore. 

AFAIK ext3 doesn't support undeletion at all

 AK> Also dtimes in free inodes  can be now lost, can't they? Did you check 
 AK> if that causes problems in fsck?  [my understanding was that ext2/3 fsck relies on the 
 AK> dtime to make some heuristics when recovering files work better]

freed inodes will be lost. I've checked filesystem by fsck after lots of creations/removals.
it seems OK.

 AK> Maybe it should be an mount option so that users can trade performance against
 AK> better recoverability.

well, I'm not sure we really need it


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] minor optimization for EXT3
  2003-07-10 16:51       ` Alex Tomas
@ 2003-07-10 12:55         ` Andi Kleen
  0 siblings, 0 replies; 12+ messages in thread
From: Andi Kleen @ 2003-07-10 12:55 UTC (permalink / raw)
  To: Alex Tomas; +Cc: Andi Kleen, akpm, linux-kernel, ext2-devel

On Thu, Jul 10, 2003 at 04:51:30PM +0000, Alex Tomas wrote:
>  AK> Also dtimes in free inodes  can be now lost, can't they? Did you check 
>  AK> if that causes problems in fsck?  [my understanding was that ext2/3 fsck relies on the 
>  AK> dtime to make some heuristics when recovering files work better]
> 
> freed inodes will be lost. I've checked filesystem by fsck after lots of creations/removals.
> it seems OK.

iirc the dtime is used so that fsck can relink all non deleted inodes to lost+found
when a directory is corrupted. Without dtime there is no way to distingush
deleted and non deleted files, and just relinking everything would be quite
nasty.

With your patch this heuristic would lose some files.

That's more important for ext2 than ext3 of course, but even on ext3 
you could get a corrupted directory when you're unlucky (e.g. io error or similar)

-Andi

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] minor optimization for EXT3
       [not found]   ` <87y8z6gyt3.fsf@gw.home.net.suse.lists.linux.kernel>
@ 2003-07-10 12:43     ` Andi Kleen
  2003-07-10 16:51       ` Alex Tomas
  0 siblings, 1 reply; 12+ messages in thread
From: Andi Kleen @ 2003-07-10 12:43 UTC (permalink / raw)
  To: Alex Tomas; +Cc: akpm, linux-kernel, ext2-devel

Alex Tomas <bzzz@tmi.comex.ru> writes:
> +			if (i == start + inodes_per_buffer) {
> +				/* all inodes (but our) are free. so, we skip I/O */

Won't this make undeletion a lot harder? Deleted inodes will now be trashed
at will, so you cannot use their contents anymore. 

Also dtimes in free inodes  can be now lost, can't they? Did you check 
if that causes problems in fsck?  [my understanding was that ext2/3 fsck relies on the 
dtime to make some heuristics when recovering files work better]

Maybe it should be an mount option so that users can trade performance against
better recoverability.

-Andi

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2003-07-10 19:00 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-07-10 14:49 [PATCH] minor optimization for EXT3 Alex Tomas
2003-07-10 11:20 ` Andrew Morton
2003-07-10 15:33   ` bzzz
2003-07-10 15:51     ` Andrew Morton
2003-07-10 20:46       ` Alex Tomas
2003-07-10 17:01         ` Andrew Morton
2003-07-10 21:09           ` Alex Tomas
2003-07-10 19:14             ` Andreas Dilger
2003-07-10 15:56   ` Alex Tomas
     [not found] <87smpeigio.fsf@gw.home.net.suse.lists.linux.kernel>
     [not found] ` <20030710042016.1b12113b.akpm@osdl.org.suse.lists.linux.kernel>
     [not found]   ` <87y8z6gyt3.fsf@gw.home.net.suse.lists.linux.kernel>
2003-07-10 12:43     ` Andi Kleen
2003-07-10 16:51       ` Alex Tomas
2003-07-10 12:55         ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).