linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode
@ 2005-03-05  0:02 Badari Pulavarty
  2005-03-05  0:23 ` Andrew Morton
  0 siblings, 1 reply; 10+ messages in thread
From: Badari Pulavarty @ 2005-03-05  0:02 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Andrew Morton

[-- Attachment #1: Type: text/plain, Size: 210 bytes --]

Hi Andrew,

Here is the 2.6.11-mm1 patch to add "nobh" support to ext3 
writeback mode. Can you please add this to -mm ?

BTW, this patch depends on add-writepage patch, which is
in -mm tree.

Thanks,
Badari



[-- Attachment #2: ext3-writeback-nobh.patch4 --]
[-- Type: text/x-patch, Size: 5329 bytes --]

diff -Naurp -Xdontdiff linux-2.6.11/fs/ext3/inode.c linux-2.6.11.new/fs/ext3/inode.c
--- linux-2.6.11/fs/ext3/inode.c	2005-03-04 16:43:22.536143072 -0800
+++ linux-2.6.11.new/fs/ext3/inode.c	2005-03-04 16:48:08.532664992 -0800
@@ -20,6 +20,7 @@
  * 	(jj@sunsite.ms.mff.cuni.cz)
  *
  *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
+ *  Add "nobh" support for ext3 writeback mode - pbadari@us.ibm.com
  */
 
 #include <linux/module.h>
@@ -1016,7 +1017,10 @@ retry:
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	ret = block_prepare_write(page, from, to, ext3_get_block);
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_prepare_write(page, from, to, ext3_get_block);
+	else
+		ret = block_prepare_write(page, from, to, ext3_get_block);
 	if (ret)
 		goto prepare_write_failed;
 
@@ -1100,7 +1104,12 @@ static int ext3_writeback_commit_write(s
 	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 	if (new_i_size > EXT3_I(inode)->i_disksize)
 		EXT3_I(inode)->i_disksize = new_i_size;
-	ret = generic_commit_write(file, page, from, to);
+
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_commit_write(file, page, from, to);
+	else
+		ret = generic_commit_write(file, page, from, to);
+
 	ret2 = ext3_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
@@ -1385,7 +1394,11 @@ static int ext3_writeback_writepage(stru
 		goto out_fail;
 	}
 
-	ret = block_write_full_page(page, ext3_get_block, wbc);
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_writepage(page, ext3_get_block, wbc);
+	else
+		ret = block_write_full_page(page, ext3_get_block, wbc);
+
 	err = ext3_journal_stop(handle);
 	if (!ret)
 		ret = err;
@@ -1646,13 +1659,34 @@ static int ext3_block_truncate_page(hand
 	unsigned blocksize, iblock, length, pos;
 	struct inode *inode = mapping->host;
 	struct buffer_head *bh;
-	int err;
+	int err = 0;
 	void *kaddr;
 
 	blocksize = inode->i_sb->s_blocksize;
 	length = blocksize - (offset & (blocksize - 1));
 	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 
+	if (test_opt(inode->i_sb, NOBH) && !page_has_buffers(page)) {
+		if (!PageUptodate(page)) {
+			err = mpage_readpage(page, ext3_get_block);
+			if ((err == 0) && !PageUptodate(page))
+				wait_on_page_locked(page);
+			lock_page(page);
+		}
+		if (err == 0 && PageUptodate(page)) {
+			kaddr = kmap_atomic(page, KM_USER0);
+			memset(kaddr + offset, 0, length);
+			flush_dcache_page(page);
+			kunmap_atomic(kaddr, KM_USER0);
+			set_page_dirty(page);
+			goto unlock;
+		}
+		/* 
+		 * Well, we tried to work without buffers and failed.
+		 * Fallback to creating buffers
+		 */
+	}
+	
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
 
diff -Naurp -Xdontdiff linux-2.6.11/fs/ext3/super.c linux-2.6.11.new/fs/ext3/super.c
--- linux-2.6.11/fs/ext3/super.c	2005-03-01 23:38:38.000000000 -0800
+++ linux-2.6.11.new/fs/ext3/super.c	2005-03-04 16:45:22.038975880 -0800
@@ -576,7 +576,7 @@ enum {
 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
 	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
-	Opt_reservation, Opt_noreservation, Opt_noload,
+	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
 	Opt_commit, Opt_journal_update, Opt_journal_inum,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -611,6 +611,7 @@ static match_table_t tokens = {
 	{Opt_reservation, "reservation"},
 	{Opt_noreservation, "noreservation"},
 	{Opt_noload, "noload"},
+	{Opt_nobh, "nobh"},
 	{Opt_commit, "commit=%u"},
 	{Opt_journal_update, "journal=update"},
 	{Opt_journal_inum, "journal=%u"},
@@ -924,6 +925,9 @@ clear_qf_name:
 			match_int(&args[0], &option);
 			*n_blocks_count = option;
 			break;
+		case Opt_nobh:
+			set_opt(sbi->s_mount_opt, NOBH);
+			break;
 		default:
 			printk (KERN_ERR
 				"EXT3-fs: Unrecognized mount option \"%s\" "
@@ -1563,6 +1567,19 @@ static int ext3_fill_super (struct super
 		break;
 	}
 
+	if (test_opt(sb, NOBH)) {
+		if (sb->s_blocksize_bits != PAGE_CACHE_SHIFT) {
+			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option "
+				"since filesystem blocksize doesn't match "
+				"pagesize\n");
+			clear_opt(sbi->s_mount_opt, NOBH);
+		}
+		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
+			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
+				"its supported only with writeback mode\n");
+			clear_opt(sbi->s_mount_opt, NOBH);
+		}
+	}
 	/*
 	 * The journal_load will have done any necessary log recovery,
 	 * so we can safely mount the rest of the filesystem now.
diff -Naurp -Xdontdiff linux-2.6.11/include/linux/ext3_fs.h linux-2.6.11.new/include/linux/ext3_fs.h
--- linux-2.6.11/include/linux/ext3_fs.h	2005-03-01 23:38:10.000000000 -0800
+++ linux-2.6.11.new/include/linux/ext3_fs.h	2005-03-04 16:46:29.101780784 -0800
@@ -357,6 +357,7 @@ struct ext3_inode {
 #define EXT3_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
 #define EXT3_MOUNT_RESERVATION		0x10000	/* Preallocation */
 #define EXT3_MOUNT_BARRIER		0x20000 /* Use block barriers */
+#define EXT3_MOUNT_NOBH		0x40000 /* No bufferheads */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode
  2005-03-05  0:02 [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode Badari Pulavarty
@ 2005-03-05  0:23 ` Andrew Morton
  2005-03-05  0:29   ` Badari Pulavarty
  2005-03-05  0:31   ` Andrew Morton
  0 siblings, 2 replies; 10+ messages in thread
From: Andrew Morton @ 2005-03-05  0:23 UTC (permalink / raw)
  To: Badari Pulavarty; +Cc: linux-kernel

Badari Pulavarty <pbadari@us.ibm.com> wrote:
>
> @@ -1646,13 +1659,34 @@ static int ext3_block_truncate_page(hand
>  	unsigned blocksize, iblock, length, pos;
>  	struct inode *inode = mapping->host;
>  	struct buffer_head *bh;
> -	int err;
> +	int err = 0;
>  	void *kaddr;
>  
>  	blocksize = inode->i_sb->s_blocksize;
>  	length = blocksize - (offset & (blocksize - 1));
>  	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
>  
> +	if (test_opt(inode->i_sb, NOBH) && !page_has_buffers(page)) {
> +		if (!PageUptodate(page)) {
> +			err = mpage_readpage(page, ext3_get_block);
> +			if ((err == 0) && !PageUptodate(page))
> +				wait_on_page_locked(page);
> +			lock_page(page);
> +		}
> +		if (err == 0 && PageUptodate(page)) {
> +			kaddr = kmap_atomic(page, KM_USER0);
> +			memset(kaddr + offset, 0, length);
> +			flush_dcache_page(page);
> +			kunmap_atomic(kaddr, KM_USER0);
> +			set_page_dirty(page);
> +			goto unlock;
> +		}
> +		/* 

What's all this doing?  (It needs comments please - it's very unobvious).

Dropping and reacquiring the page lock in the middle of the truncate there
is a bit of a worry - need to think about that.

Err, yes, it's buggy - page reclaim can come in, grab the page lock and
whip the page off the mapping.  We end up with an anonymous page and we
then start trying to write it into the file and all sorts of things.


This bit:

			if ((err == 0) && !PageUptodate(page))
				wait_on_page_locked(page);

can simply be removed.  We're about to lock the page anyway.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode
  2005-03-05  0:23 ` Andrew Morton
@ 2005-03-05  0:29   ` Badari Pulavarty
  2005-03-05  0:45     ` Andrew Morton
  2005-03-05  0:31   ` Andrew Morton
  1 sibling, 1 reply; 10+ messages in thread
From: Badari Pulavarty @ 2005-03-05  0:29 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linux Kernel Mailing List

On Fri, 2005-03-04 at 16:23, Andrew Morton wrote:
> Badari Pulavarty <pbadari@us.ibm.com> wrote:
> >
> > @@ -1646,13 +1659,34 @@ static int ext3_block_truncate_page(hand
> >  	unsigned blocksize, iblock, length, pos;
> >  	struct inode *inode = mapping->host;
> >  	struct buffer_head *bh;
> > -	int err;
> > +	int err = 0;
> >  	void *kaddr;
> >  
> >  	blocksize = inode->i_sb->s_blocksize;
> >  	length = blocksize - (offset & (blocksize - 1));
> >  	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
> >  
> > +	if (test_opt(inode->i_sb, NOBH) && !page_has_buffers(page)) {
> > +		if (!PageUptodate(page)) {
> > +			err = mpage_readpage(page, ext3_get_block);
> > +			if ((err == 0) && !PageUptodate(page))
> > +				wait_on_page_locked(page);
> > +			lock_page(page);
> > +		}
> > +		if (err == 0 && PageUptodate(page)) {
> > +			kaddr = kmap_atomic(page, KM_USER0);
> > +			memset(kaddr + offset, 0, length);
> > +			flush_dcache_page(page);
> > +			kunmap_atomic(kaddr, KM_USER0);
> > +			set_page_dirty(page);
> > +			goto unlock;
> > +		}
> > +		/* 
> 
> What's all this doing?  (It needs comments please - it's very unobvious).

All its trying to do is - to make sure the page is uptodate so that
it can zero out the portion thats needed. 

I can do getblock() and ll_rw_block(READ) instead. I was
trying to re-use the code and mpage_readpage() drops the lock.
What do you think ?


> 
> Dropping and reacquiring the page lock in the middle of the truncate there
> is a bit of a worry - need to think about that.
> 
> Err, yes, it's buggy - page reclaim can come in, grab the page lock and
> whip the page off the mapping.  We end up with an anonymous page and we
> then start trying to write it into the file and all sorts of things.
> 
> 
> This bit:
> 
> 			if ((err == 0) && !PageUptodate(page))
> 				wait_on_page_locked(page);
> 
> can simply be removed.  We're about to lock the page anyway.


Thanks,
Badari


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode
  2005-03-05  0:23 ` Andrew Morton
  2005-03-05  0:29   ` Badari Pulavarty
@ 2005-03-05  0:31   ` Andrew Morton
  2005-03-05  0:46     ` Badari Pulavarty
  1 sibling, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2005-03-05  0:31 UTC (permalink / raw)
  To: pbadari, linux-kernel

Andrew Morton <akpm@osdl.org> wrote:
>
> page reclaim can come in, grab the page lock and
> whip the page off the mapping.

No it can't - we hold an additional ref on the page, so reclaim will back
off.  Still, it feels a bit flakey.

And we're not supposed to take lock_page() inside journal_start, because
that's a ranking violation.  Probably i_sem will prevent an actual deadlock
occurring, however.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode
  2005-03-05  0:29   ` Badari Pulavarty
@ 2005-03-05  0:45     ` Andrew Morton
  2005-03-05  0:49       ` Badari Pulavarty
  2005-03-05  1:02       ` Badari Pulavarty
  0 siblings, 2 replies; 10+ messages in thread
From: Andrew Morton @ 2005-03-05  0:45 UTC (permalink / raw)
  To: Badari Pulavarty; +Cc: linux-kernel

Badari Pulavarty <pbadari@us.ibm.com> wrote:
>
> > What's all this doing?  (It needs comments please - it's very unobvious).
> 
> All its trying to do is - to make sure the page is uptodate so that
> it can zero out the portion thats needed. 

OK.

> I can do getblock() and ll_rw_block(READ) instead. I was
> trying to re-use the code and mpage_readpage() drops the lock.
> What do you think ?

Can you just call ->prepare_write, as nobh_truncate_page() does?  That'll
cause a nested transaction, but that's legal.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode
  2005-03-05  0:31   ` Andrew Morton
@ 2005-03-05  0:46     ` Badari Pulavarty
  0 siblings, 0 replies; 10+ messages in thread
From: Badari Pulavarty @ 2005-03-05  0:46 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 560 bytes --]

On Fri, 2005-03-04 at 16:31, Andrew Morton wrote:
> Andrew Morton <akpm@osdl.org> wrote:
> >
> > page reclaim can come in, grab the page lock and
> > whip the page off the mapping.
> 
> No it can't - we hold an additional ref on the page, so reclaim will back
> off.  Still, it feels a bit flakey.
> 
> And we're not supposed to take lock_page() inside journal_start, because
> that's a ranking violation.  Probably i_sem will prevent an actual deadlock
> occurring, however.
> 

Here is the new patch to deal with locking, with more code :(

Thanks,
Badari



[-- Attachment #2: ext3-writeback-nobh.patch5 --]
[-- Type: text/x-patch, Size: 5619 bytes --]

diff -Naurp -Xdontdiff linux-2.6.11/fs/ext3/inode.c linux-2.6.11.new/fs/ext3/inode.c
--- linux-2.6.11/fs/ext3/inode.c	2005-03-04 16:43:22.536143072 -0800
+++ linux-2.6.11.new/fs/ext3/inode.c	2005-03-04 17:43:16.719744112 -0800
@@ -20,6 +20,7 @@
  * 	(jj@sunsite.ms.mff.cuni.cz)
  *
  *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
+ *  Add "nobh" support for ext3 writeback mode - pbadari@us.ibm.com
  */
 
 #include <linux/module.h>
@@ -1016,7 +1017,10 @@ retry:
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	ret = block_prepare_write(page, from, to, ext3_get_block);
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_prepare_write(page, from, to, ext3_get_block);
+	else
+		ret = block_prepare_write(page, from, to, ext3_get_block);
 	if (ret)
 		goto prepare_write_failed;
 
@@ -1100,7 +1104,12 @@ static int ext3_writeback_commit_write(s
 	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 	if (new_i_size > EXT3_I(inode)->i_disksize)
 		EXT3_I(inode)->i_disksize = new_i_size;
-	ret = generic_commit_write(file, page, from, to);
+
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_commit_write(file, page, from, to);
+	else
+		ret = generic_commit_write(file, page, from, to);
+
 	ret2 = ext3_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
@@ -1385,7 +1394,11 @@ static int ext3_writeback_writepage(stru
 		goto out_fail;
 	}
 
-	ret = block_write_full_page(page, ext3_get_block, wbc);
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_writepage(page, ext3_get_block, wbc);
+	else
+		ret = block_write_full_page(page, ext3_get_block, wbc);
+
 	err = ext3_journal_stop(handle);
 	if (!ret)
 		ret = err;
@@ -1646,13 +1659,46 @@ static int ext3_block_truncate_page(hand
 	unsigned blocksize, iblock, length, pos;
 	struct inode *inode = mapping->host;
 	struct buffer_head *bh;
-	int err;
+	int err = 0;
 	void *kaddr;
 
 	blocksize = inode->i_sb->s_blocksize;
 	length = blocksize - (offset & (blocksize - 1));
 	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 
+	if (test_opt(inode->i_sb, NOBH) && !page_has_buffers(page)) {
+		if (!PageUptodate(page)) {
+			struct buffer_head map_bh;
+			bh = &map_bh;
+			bh->b_state = 0;
+			clear_buffer_mapped(bh);
+			ext3_get_block(inode, iblock, bh, 0);
+			if (!buffer_mapped(bh)) 
+				goto unlock;
+			err = -EIO;
+			set_bh_page(bh, page, 0);
+			bh->b_this_page = 0;
+			bh->b_size = 1 << inode->i_blkbits;
+			ll_rw_block(READ, 1, &bh);
+			wait_on_buffer(bh);
+			if (!buffer_uptodate(bh))
+				goto unlock;
+			SetPageMappedToDisk(page);
+		}
+		if (err == 0 && PageUptodate(page)) {
+			kaddr = kmap_atomic(page, KM_USER0);
+			memset(kaddr + offset, 0, length);
+			flush_dcache_page(page);
+			kunmap_atomic(kaddr, KM_USER0);
+			set_page_dirty(page);
+			goto unlock;
+		}
+		/* 
+		 * Well, we tried to work without buffers and failed.
+		 * Fallback to creating buffers
+		 */
+	}
+	
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
 
diff -Naurp -Xdontdiff linux-2.6.11/fs/ext3/super.c linux-2.6.11.new/fs/ext3/super.c
--- linux-2.6.11/fs/ext3/super.c	2005-03-01 23:38:38.000000000 -0800
+++ linux-2.6.11.new/fs/ext3/super.c	2005-03-04 16:45:22.038975880 -0800
@@ -576,7 +576,7 @@ enum {
 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
 	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
-	Opt_reservation, Opt_noreservation, Opt_noload,
+	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
 	Opt_commit, Opt_journal_update, Opt_journal_inum,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -611,6 +611,7 @@ static match_table_t tokens = {
 	{Opt_reservation, "reservation"},
 	{Opt_noreservation, "noreservation"},
 	{Opt_noload, "noload"},
+	{Opt_nobh, "nobh"},
 	{Opt_commit, "commit=%u"},
 	{Opt_journal_update, "journal=update"},
 	{Opt_journal_inum, "journal=%u"},
@@ -924,6 +925,9 @@ clear_qf_name:
 			match_int(&args[0], &option);
 			*n_blocks_count = option;
 			break;
+		case Opt_nobh:
+			set_opt(sbi->s_mount_opt, NOBH);
+			break;
 		default:
 			printk (KERN_ERR
 				"EXT3-fs: Unrecognized mount option \"%s\" "
@@ -1563,6 +1567,19 @@ static int ext3_fill_super (struct super
 		break;
 	}
 
+	if (test_opt(sb, NOBH)) {
+		if (sb->s_blocksize_bits != PAGE_CACHE_SHIFT) {
+			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option "
+				"since filesystem blocksize doesn't match "
+				"pagesize\n");
+			clear_opt(sbi->s_mount_opt, NOBH);
+		}
+		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
+			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
+				"its supported only with writeback mode\n");
+			clear_opt(sbi->s_mount_opt, NOBH);
+		}
+	}
 	/*
 	 * The journal_load will have done any necessary log recovery,
 	 * so we can safely mount the rest of the filesystem now.
diff -Naurp -Xdontdiff linux-2.6.11/include/linux/ext3_fs.h linux-2.6.11.new/include/linux/ext3_fs.h
--- linux-2.6.11/include/linux/ext3_fs.h	2005-03-01 23:38:10.000000000 -0800
+++ linux-2.6.11.new/include/linux/ext3_fs.h	2005-03-04 16:46:29.101780784 -0800
@@ -357,6 +357,7 @@ struct ext3_inode {
 #define EXT3_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
 #define EXT3_MOUNT_RESERVATION		0x10000	/* Preallocation */
 #define EXT3_MOUNT_BARRIER		0x20000 /* Use block barriers */
+#define EXT3_MOUNT_NOBH			0x40000 /* No bufferheads */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode
  2005-03-05  0:45     ` Andrew Morton
@ 2005-03-05  0:49       ` Badari Pulavarty
  2005-03-05  1:02       ` Badari Pulavarty
  1 sibling, 0 replies; 10+ messages in thread
From: Badari Pulavarty @ 2005-03-05  0:49 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linux Kernel Mailing List

On Fri, 2005-03-04 at 16:45, Andrew Morton wrote:
> Badari Pulavarty <pbadari@us.ibm.com> wrote:
> >
> > > What's all this doing?  (It needs comments please - it's very unobvious).
> > 
> > All its trying to do is - to make sure the page is uptodate so that
> > it can zero out the portion thats needed. 
> 
> OK.
> 
> > I can do getblock() and ll_rw_block(READ) instead. I was
> > trying to re-use the code and mpage_readpage() drops the lock.
> > What do you think ?
> 
> Can you just call ->prepare_write, as nobh_truncate_page() does?  That'll
> cause a nested transaction, but that's legal.
> 

Thats what I tried earlier, but never got it working. Lots of journal
asserts :( 

I guess I really need to learn journaling code someday.

Thanks,
Badari


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode
  2005-03-05  0:45     ` Andrew Morton
  2005-03-05  0:49       ` Badari Pulavarty
@ 2005-03-05  1:02       ` Badari Pulavarty
  2005-03-05  1:16         ` Andrew Morton
  1 sibling, 1 reply; 10+ messages in thread
From: Badari Pulavarty @ 2005-03-05  1:02 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Linux Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 710 bytes --]

On Fri, 2005-03-04 at 16:45, Andrew Morton wrote:
> Badari Pulavarty <pbadari@us.ibm.com> wrote:
> >
> > > What's all this doing?  (It needs comments please - it's very unobvious).
> > 
> > All its trying to do is - to make sure the page is uptodate so that
> > it can zero out the portion thats needed. 
> 
> OK.
> 
> > I can do getblock() and ll_rw_block(READ) instead. I was
> > trying to re-use the code and mpage_readpage() drops the lock.
> > What do you think ?
> 
> Can you just call ->prepare_write, as nobh_truncate_page() does?  That'll
> cause a nested transaction, but that's legal.
> 

Please ignore my previous patch. I forgot to clear "err" value.

Here is the updated patch.

Thanks,
Badari



[-- Attachment #2: ext3-writeback-nobh.patch6 --]
[-- Type: text/x-patch, Size: 5473 bytes --]

diff -Naurp -Xdontdiff linux-2.6.11/fs/ext3/inode.c linux-2.6.11.new/fs/ext3/inode.c
--- linux-2.6.11/fs/ext3/inode.c	2005-03-04 16:43:22.536143072 -0800
+++ linux-2.6.11.new/fs/ext3/inode.c	2005-03-04 18:02:13.418939568 -0800
@@ -20,6 +20,7 @@
  * 	(jj@sunsite.ms.mff.cuni.cz)
  *
  *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
+ *  Add "nobh" support for ext3 writeback mode - pbadari@us.ibm.com
  */
 
 #include <linux/module.h>
@@ -1016,7 +1017,10 @@ retry:
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	ret = block_prepare_write(page, from, to, ext3_get_block);
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_prepare_write(page, from, to, ext3_get_block);
+	else
+		ret = block_prepare_write(page, from, to, ext3_get_block);
 	if (ret)
 		goto prepare_write_failed;
 
@@ -1100,7 +1104,12 @@ static int ext3_writeback_commit_write(s
 	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 	if (new_i_size > EXT3_I(inode)->i_disksize)
 		EXT3_I(inode)->i_disksize = new_i_size;
-	ret = generic_commit_write(file, page, from, to);
+
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_commit_write(file, page, from, to);
+	else
+		ret = generic_commit_write(file, page, from, to);
+
 	ret2 = ext3_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
@@ -1385,7 +1394,11 @@ static int ext3_writeback_writepage(stru
 		goto out_fail;
 	}
 
-	ret = block_write_full_page(page, ext3_get_block, wbc);
+	if (test_opt(inode->i_sb, NOBH))
+		ret = nobh_writepage(page, ext3_get_block, wbc);
+	else
+		ret = block_write_full_page(page, ext3_get_block, wbc);
+
 	err = ext3_journal_stop(handle);
 	if (!ret)
 		ret = err;
@@ -1646,13 +1659,41 @@ static int ext3_block_truncate_page(hand
 	unsigned blocksize, iblock, length, pos;
 	struct inode *inode = mapping->host;
 	struct buffer_head *bh;
-	int err;
+	int err = 0;
 	void *kaddr;
 
 	blocksize = inode->i_sb->s_blocksize;
 	length = blocksize - (offset & (blocksize - 1));
 	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 
+	if (test_opt(inode->i_sb, NOBH) && !page_has_buffers(page)) {
+		if (!PageUptodate(page)) {
+			struct buffer_head map_bh;
+			bh = &map_bh;
+			bh->b_state = 0;
+			clear_buffer_mapped(bh);
+			ext3_get_block(inode, iblock, bh, 0);
+			if (!buffer_mapped(bh)) 
+				goto unlock;
+			err = -EIO;
+			set_bh_page(bh, page, 0);
+			bh->b_this_page = 0;
+			bh->b_size = 1 << inode->i_blkbits;
+			ll_rw_block(READ, 1, &bh);
+			wait_on_buffer(bh);
+			if (!buffer_uptodate(bh))
+				goto unlock;
+			SetPageMappedToDisk(page);
+		}
+		kaddr = kmap_atomic(page, KM_USER0);
+		memset(kaddr + offset, 0, length);
+		flush_dcache_page(page);
+		kunmap_atomic(kaddr, KM_USER0);
+		set_page_dirty(page);
+		err = 0;
+		goto unlock;
+	}
+	
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
 
diff -Naurp -Xdontdiff linux-2.6.11/fs/ext3/super.c linux-2.6.11.new/fs/ext3/super.c
--- linux-2.6.11/fs/ext3/super.c	2005-03-01 23:38:38.000000000 -0800
+++ linux-2.6.11.new/fs/ext3/super.c	2005-03-04 16:45:22.038975880 -0800
@@ -576,7 +576,7 @@ enum {
 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 	Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
 	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
-	Opt_reservation, Opt_noreservation, Opt_noload,
+	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
 	Opt_commit, Opt_journal_update, Opt_journal_inum,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -611,6 +611,7 @@ static match_table_t tokens = {
 	{Opt_reservation, "reservation"},
 	{Opt_noreservation, "noreservation"},
 	{Opt_noload, "noload"},
+	{Opt_nobh, "nobh"},
 	{Opt_commit, "commit=%u"},
 	{Opt_journal_update, "journal=update"},
 	{Opt_journal_inum, "journal=%u"},
@@ -924,6 +925,9 @@ clear_qf_name:
 			match_int(&args[0], &option);
 			*n_blocks_count = option;
 			break;
+		case Opt_nobh:
+			set_opt(sbi->s_mount_opt, NOBH);
+			break;
 		default:
 			printk (KERN_ERR
 				"EXT3-fs: Unrecognized mount option \"%s\" "
@@ -1563,6 +1567,19 @@ static int ext3_fill_super (struct super
 		break;
 	}
 
+	if (test_opt(sb, NOBH)) {
+		if (sb->s_blocksize_bits != PAGE_CACHE_SHIFT) {
+			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option "
+				"since filesystem blocksize doesn't match "
+				"pagesize\n");
+			clear_opt(sbi->s_mount_opt, NOBH);
+		}
+		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
+			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
+				"its supported only with writeback mode\n");
+			clear_opt(sbi->s_mount_opt, NOBH);
+		}
+	}
 	/*
 	 * The journal_load will have done any necessary log recovery,
 	 * so we can safely mount the rest of the filesystem now.
diff -Naurp -Xdontdiff linux-2.6.11/include/linux/ext3_fs.h linux-2.6.11.new/include/linux/ext3_fs.h
--- linux-2.6.11/include/linux/ext3_fs.h	2005-03-01 23:38:10.000000000 -0800
+++ linux-2.6.11.new/include/linux/ext3_fs.h	2005-03-04 16:46:29.101780784 -0800
@@ -357,6 +357,7 @@ struct ext3_inode {
 #define EXT3_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
 #define EXT3_MOUNT_RESERVATION		0x10000	/* Preallocation */
 #define EXT3_MOUNT_BARRIER		0x20000 /* Use block barriers */
+#define EXT3_MOUNT_NOBH			0x40000 /* No bufferheads */
 
 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode
  2005-03-05  1:02       ` Badari Pulavarty
@ 2005-03-05  1:16         ` Andrew Morton
  2005-03-05 17:39           ` Badari Pulavarty
  0 siblings, 1 reply; 10+ messages in thread
From: Andrew Morton @ 2005-03-05  1:16 UTC (permalink / raw)
  To: Badari Pulavarty; +Cc: linux-kernel

Badari Pulavarty <pbadari@us.ibm.com> wrote:
>
>  	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);

It would still be nice to add a comment in here...

> +	if (test_opt(inode->i_sb, NOBH) && !page_has_buffers(page)) {
> +		if (!PageUptodate(page)) {
> +			struct buffer_head map_bh;
> +			bh = &map_bh;
> +			bh->b_state = 0;
> +			clear_buffer_mapped(bh);
> +			ext3_get_block(inode, iblock, bh, 0);
> +			if (!buffer_mapped(bh)) 
> +				goto unlock;
> +			err = -EIO;
> +			set_bh_page(bh, page, 0);
> +			bh->b_this_page = 0;
> +			bh->b_size = 1 << inode->i_blkbits;
> +			ll_rw_block(READ, 1, &bh);
> +			wait_on_buffer(bh);
> +			if (!buffer_uptodate(bh))
> +				goto unlock;
> +			SetPageMappedToDisk(page);
> +		}
> +		kaddr = kmap_atomic(page, KM_USER0);
> +		memset(kaddr + offset, 0, length);
> +		flush_dcache_page(page);
> +		kunmap_atomic(kaddr, KM_USER0);
> +		set_page_dirty(page);
> +		err = 0;
> +		goto unlock;
> +	}
> +	
>  	if (!page_has_buffers(page))
>  		create_empty_buffers(page, blocksize, 0);

Given that we're about to go add buffers to the page, why not do that
first, and use the page's own buffer_head rather than cooking up a local
one?  Then we can simply use sb_bread().


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode
  2005-03-05  1:16         ` Andrew Morton
@ 2005-03-05 17:39           ` Badari Pulavarty
  0 siblings, 0 replies; 10+ messages in thread
From: Badari Pulavarty @ 2005-03-05 17:39 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

Andrew Morton wrote:
> Badari Pulavarty <pbadari@us.ibm.com> wrote:
> 
>> 	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
> 
> 
> It would still be nice to add a comment in here...
> 
> 
>>+	if (test_opt(inode->i_sb, NOBH) && !page_has_buffers(page)) {
>>+		if (!PageUptodate(page)) {
>>+			struct buffer_head map_bh;
>>+			bh = &map_bh;
>>+			bh->b_state = 0;
>>+			clear_buffer_mapped(bh);
>>+			ext3_get_block(inode, iblock, bh, 0);
>>+			if (!buffer_mapped(bh)) 
>>+				goto unlock;
>>+			err = -EIO;
>>+			set_bh_page(bh, page, 0);
>>+			bh->b_this_page = 0;
>>+			bh->b_size = 1 << inode->i_blkbits;
>>+			ll_rw_block(READ, 1, &bh);
>>+			wait_on_buffer(bh);
>>+			if (!buffer_uptodate(bh))
>>+				goto unlock;
>>+			SetPageMappedToDisk(page);
>>+		}
>>+		kaddr = kmap_atomic(page, KM_USER0);
>>+		memset(kaddr + offset, 0, length);
>>+		flush_dcache_page(page);
>>+		kunmap_atomic(kaddr, KM_USER0);
>>+		set_page_dirty(page);
>>+		err = 0;
>>+		goto unlock;
>>+	}
>>+	
>> 	if (!page_has_buffers(page))
>> 		create_empty_buffers(page, blocksize, 0);
> 
> 
> Given that we're about to go add buffers to the page, why not do that
> first, and use the page's own buffer_head rather than cooking up a local
> one?  Then we can simply use sb_bread().
> 
> 

Only reason for cooking up the local one is not to attach the buffer to
the page forever. That will end up forcing other routines (like 
writepage/writepages) go thro "confused" code. I was hoping not to
attach buffers if they are not really needed.

But again, doing it the way you suggested will make the code more
readable.

Thanks,
Badari

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2005-03-05 17:39 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-03-05  0:02 [PATCH] 2.6.11-mm1 "nobh" support for ext3 writeback mode Badari Pulavarty
2005-03-05  0:23 ` Andrew Morton
2005-03-05  0:29   ` Badari Pulavarty
2005-03-05  0:45     ` Andrew Morton
2005-03-05  0:49       ` Badari Pulavarty
2005-03-05  1:02       ` Badari Pulavarty
2005-03-05  1:16         ` Andrew Morton
2005-03-05 17:39           ` Badari Pulavarty
2005-03-05  0:31   ` Andrew Morton
2005-03-05  0:46     ` Badari Pulavarty

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).