linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [Patch 5/8] 2.4.20-pre4/ext3: Fix O_SYNC for non-data-journaled modes.
@ 2002-08-28 15:45 Stephen Tweedie
  2002-08-28 16:18 ` Christoph Hellwig
  0 siblings, 1 reply; 4+ messages in thread
From: Stephen Tweedie @ 2002-08-28 15:45 UTC (permalink / raw)
  To: Marcelo Tosatti, linux-kernel; +Cc: Stephen Tweedie

ext3 has its own code which marks buffers dirty, in addition to the setting
done by the core generic_commit_write code.  However, the core code does

                        if (!atomic_set_buffer_dirty(bh)) {
                                __mark_dirty(bh);
                                buffer_insert_inode_queue(bh, inode);

so if ext3 marks the buffer dirty itself, the core fails to put it on the
per-inode list of dirty buffers.  Hence, fsync_inode_buffers() misses it.

The fix is to let ext3 put the buffer on the inode queue manually when
walking the page's buffer lists in its page write code.

--- linux-ext3-2.4merge/fs/ext3/inode.c.=K0006=.orig	Tue Aug 27 23:19:57 2002
+++ linux-ext3-2.4merge/fs/ext3/inode.c	Tue Aug 27 23:19:57 2002
@@ -949,11 +949,13 @@
 }
 
 static int walk_page_buffers(	handle_t *handle,
+				struct inode *inode,
 				struct buffer_head *head,
 				unsigned from,
 				unsigned to,
 				int *partial,
 				int (*fn)(	handle_t *handle,
+						struct inode *inode,
 						struct buffer_head *bh))
 {
 	struct buffer_head *bh;
@@ -971,7 +973,7 @@
 				*partial = 1;
 			continue;
 		}
-		err = (*fn)(handle, bh);
+		err = (*fn)(handle, inode, bh);
 		if (!ret)
 			ret = err;
 	}
@@ -1004,7 +1006,7 @@
  * write.  
  */
 
-static int do_journal_get_write_access(handle_t *handle, 
+static int do_journal_get_write_access(handle_t *handle, struct inode *inode,
 				       struct buffer_head *bh)
 {
 	return ext3_journal_get_write_access(handle, bh);
@@ -1030,7 +1032,7 @@
 		goto prepare_write_failed;
 
 	if (ext3_should_journal_data(inode)) {
-		ret = walk_page_buffers(handle, page->buffers,
+		ret = walk_page_buffers(handle, inode, page->buffers,
 				from, to, NULL, do_journal_get_write_access);
 		if (ret) {
 			/*
@@ -1051,24 +1053,32 @@
 	return ret;
 }
 
-static int journal_dirty_sync_data(handle_t *handle, struct buffer_head *bh)
+static int journal_dirty_sync_data(handle_t *handle, struct inode *inode,
+				   struct buffer_head *bh)
 {
-	return ext3_journal_dirty_data(handle, bh, 0);
+	int ret = ext3_journal_dirty_data(handle, bh, 0);
+	if (bh->b_inode != inode)
+		buffer_insert_inode_data_queue(bh, inode);
+	return ret;
 }
 
 /*
  * For ext3_writepage().  We also brelse() the buffer to account for
  * the bget() which ext3_writepage() performs.
  */
-static int journal_dirty_async_data(handle_t *handle, struct buffer_head *bh)
+static int journal_dirty_async_data(handle_t *handle, struct inode *inode, 
+				    struct buffer_head *bh)
 {
 	int ret = ext3_journal_dirty_data(handle, bh, 1);
+	if (bh->b_inode != inode)
+		buffer_insert_inode_data_queue(bh, inode);
 	__brelse(bh);
 	return ret;
 }
 
 /* For commit_write() in data=journal mode */
-static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
+static int commit_write_fn(handle_t *handle, struct inode *inode, 
+			   struct buffer_head *bh)
 {
 	set_bit(BH_Uptodate, &bh->b_state);
 	return ext3_journal_dirty_metadata(handle, bh);
@@ -1103,7 +1113,7 @@
 		int partial = 0;
 		loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 
-		ret = walk_page_buffers(handle, page->buffers,
+		ret = walk_page_buffers(handle, inode, page->buffers,
 			from, to, &partial, commit_write_fn);
 		if (!partial)
 			SetPageUptodate(page);
@@ -1113,7 +1123,7 @@
 		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
 	} else {
 		if (ext3_should_order_data(inode)) {
-			ret = walk_page_buffers(handle, page->buffers,
+			ret = walk_page_buffers(handle, inode, page->buffers,
 				from, to, NULL, journal_dirty_sync_data);
 		}
 		/* Be careful here if generic_commit_write becomes a
@@ -1195,7 +1205,8 @@
 	return generic_block_bmap(mapping,block,ext3_get_block);
 }
 
-static int bget_one(handle_t *handle, struct buffer_head *bh)
+static int bget_one(handle_t *handle, struct inode *inode, 
+		    struct buffer_head *bh)
 {
 	atomic_inc(&bh->b_count);
 	return 0;
@@ -1294,7 +1305,7 @@
 			create_empty_buffers(page,
 				inode->i_dev, inode->i_sb->s_blocksize);
 		page_buffers = page->buffers;
-		walk_page_buffers(handle, page_buffers, 0,
+		walk_page_buffers(handle, inode, page_buffers, 0,
 				PAGE_CACHE_SIZE, NULL, bget_one);
 	}
 
@@ -1312,7 +1323,7 @@
 
 	/* And attach them to the current transaction */
 	if (order_data) {
-		err = walk_page_buffers(handle, page_buffers,
+		err = walk_page_buffers(handle, inode, page_buffers,
 			0, PAGE_CACHE_SIZE, NULL, journal_dirty_async_data);
 		if (!ret)
 			ret = err;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch 5/8] 2.4.20-pre4/ext3: Fix O_SYNC for non-data-journaled modes.
  2002-08-28 15:45 [Patch 5/8] 2.4.20-pre4/ext3: Fix O_SYNC for non-data-journaled modes Stephen Tweedie
@ 2002-08-28 16:18 ` Christoph Hellwig
  2002-08-28 16:26   ` Stephen C. Tweedie
  0 siblings, 1 reply; 4+ messages in thread
From: Christoph Hellwig @ 2002-08-28 16:18 UTC (permalink / raw)
  To: Stephen Tweedie; +Cc: Marcelo Tosatti, linux-kernel

On Wed, Aug 28, 2002 at 04:45:20PM +0100, Stephen Tweedie wrote:
> ext3 has its own code which marks buffers dirty, in addition to the setting
> done by the core generic_commit_write code.  However, the core code does
> 
>                         if (!atomic_set_buffer_dirty(bh)) {
>                                 __mark_dirty(bh);
>                                 buffer_insert_inode_queue(bh, inode);
> 
> so if ext3 marks the buffer dirty itself, the core fails to put it on the
> per-inode list of dirty buffers.  Hence, fsync_inode_buffers() misses it.
> 
> The fix is to let ext3 put the buffer on the inode queue manually when
> walking the page's buffer lists in its page write code.

This patch conflicts with the b_inode as bool patch you recently ACKed..


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch 5/8] 2.4.20-pre4/ext3: Fix O_SYNC for non-data-journaled modes.
  2002-08-28 16:18 ` Christoph Hellwig
@ 2002-08-28 16:26   ` Stephen C. Tweedie
  2002-08-28 16:28     ` Christoph Hellwig
  0 siblings, 1 reply; 4+ messages in thread
From: Stephen C. Tweedie @ 2002-08-28 16:26 UTC (permalink / raw)
  To: Christoph Hellwig, Stephen Tweedie, Marcelo Tosatti, linux-kernel

Hi,

On Wed, Aug 28, 2002 at 05:18:13PM +0100, Christoph Hellwig wrote:
> On Wed, Aug 28, 2002 at 04:45:20PM +0100, Stephen Tweedie wrote:
> > The fix is to let ext3 put the buffer on the inode queue manually when
> > walking the page's buffer lists in its page write code.
> 
> This patch conflicts with the b_inode as bool patch you recently ACKed..

Do you have a pointer to the most recent version of that patch you
were going to submit?  I've got one final batch of ext3-related things
to submit, containing no bug-fixes but only tweaks and new features
(eg. allowing you to specify the commit interval per-filesystem).  I
can merge the b_inode diff for that batch if you want.

--Stephen

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch 5/8] 2.4.20-pre4/ext3: Fix O_SYNC for non-data-journaled modes.
  2002-08-28 16:26   ` Stephen C. Tweedie
@ 2002-08-28 16:28     ` Christoph Hellwig
  0 siblings, 0 replies; 4+ messages in thread
From: Christoph Hellwig @ 2002-08-28 16:28 UTC (permalink / raw)
  To: Stephen C. Tweedie; +Cc: Marcelo Tosatti, linux-kernel

On Wed, Aug 28, 2002 at 05:26:42PM +0100, Stephen C. Tweedie wrote:
> Do you have a pointer to the most recent version of that patch you
> were going to submit?  I've got one final batch of ext3-related things
> to submit, containing no bug-fixes but only tweaks and new features
> (eg. allowing you to specify the commit interval per-filesystem).  I
> can merge the b_inode diff for that batch if you want.

Somewhere on lkml or below:


diff -uNr -Xdontdiff -p linux-2.4.20-pre4/fs/buffer.c linux/fs/buffer.c
--- linux-2.4.20-pre4/fs/buffer.c	Tue Aug 13 15:56:00 2002
+++ linux/fs/buffer.c	Sun Aug 25 19:28:55 2002
@@ -583,37 +583,29 @@ struct buffer_head * get_hash_table(kdev
 	return bh;
 }
 
-void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode)
+void buffer_insert_list(struct buffer_head *bh, struct list_head *list)
 {
 	spin_lock(&lru_list_lock);
-	if (bh->b_inode)
+	if (buffer_attached(bh))
 		list_del(&bh->b_inode_buffers);
-	bh->b_inode = inode;
-	list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers);
+	set_buffer_attached(bh);
+	list_add(&bh->b_inode_buffers, list);
 	spin_unlock(&lru_list_lock);
 }
 
-void buffer_insert_inode_data_queue(struct buffer_head *bh, struct inode *inode)
-{
-	spin_lock(&lru_list_lock);
-	if (bh->b_inode)
-		list_del(&bh->b_inode_buffers);
-	bh->b_inode = inode;
-	list_add(&bh->b_inode_buffers, &inode->i_dirty_data_buffers);
-	spin_unlock(&lru_list_lock);
-}
-
-/* The caller must have the lru_list lock before calling the 
-   remove_inode_queue functions.  */
+/*
+ * The caller must have the lru_list lock before calling the 
+ * remove_inode_queue functions.
+ */
 static void __remove_inode_queue(struct buffer_head *bh)
 {
-	bh->b_inode = NULL;
 	list_del(&bh->b_inode_buffers);
+	clear_buffer_attached(bh);
 }
 
 static inline void remove_inode_queue(struct buffer_head *bh)
 {
-	if (bh->b_inode)
+	if (buffer_attached(bh))
 		__remove_inode_queue(bh);
 }
 
@@ -827,10 +819,10 @@ inline void set_buffer_async_io(struct b
 int fsync_buffers_list(struct list_head *list)
 {
 	struct buffer_head *bh;
-	struct inode tmp;
+	struct list_head tmp;
 	int err = 0, err2;
 	
-	INIT_LIST_HEAD(&tmp.i_dirty_buffers);
+	INIT_LIST_HEAD(&tmp);
 	
 	spin_lock(&lru_list_lock);
 
@@ -838,10 +830,10 @@ int fsync_buffers_list(struct list_head 
 		bh = BH_ENTRY(list->next);
 		list_del(&bh->b_inode_buffers);
 		if (!buffer_dirty(bh) && !buffer_locked(bh))
-			bh->b_inode = NULL;
+			clear_buffer_attached(bh);
 		else {
-			bh->b_inode = &tmp;
-			list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers);
+			set_buffer_attached(bh);
+			list_add(&bh->b_inode_buffers, &tmp);
 			if (buffer_dirty(bh)) {
 				get_bh(bh);
 				spin_unlock(&lru_list_lock);
@@ -861,8 +853,8 @@ int fsync_buffers_list(struct list_head 
 		}
 	}
 
-	while (!list_empty(&tmp.i_dirty_buffers)) {
-		bh = BH_ENTRY(tmp.i_dirty_buffers.prev);
+	while (!list_empty(&tmp)) {
+		bh = BH_ENTRY(tmp.prev);
 		remove_inode_queue(bh);
 		get_bh(bh);
 		spin_unlock(&lru_list_lock);
@@ -1134,7 +1126,7 @@ struct buffer_head * bread(kdev_t dev, i
  */
 static void __put_unused_buffer_head(struct buffer_head * bh)
 {
-	if (bh->b_inode)
+	if (unlikely(buffer_attached(bh)))
 		BUG();
 	if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
 		kmem_cache_free(bh_cachep, bh);
diff -uNr -Xdontdiff -p linux-2.4.20-pre4/fs/reiserfs/inode.c linux/fs/reiserfs/inode.c
--- linux-2.4.20-pre4/fs/reiserfs/inode.c	Tue Aug 13 15:56:01 2002
+++ linux/fs/reiserfs/inode.c	Tue Aug 20 11:39:48 2002
@@ -102,9 +102,9 @@ inline void make_le_item_head (struct it
 }
 
 static void add_to_flushlist(struct inode *inode, struct buffer_head *bh) {
-    struct inode *jinode = &(SB_JOURNAL(inode->i_sb)->j_dummy_inode) ;
+    struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ;
 
-    buffer_insert_inode_queue(bh, jinode) ;
+    buffer_insert_list(bh, &j->j_dirty_buffers) ;
 }
 
 //
diff -uNr -Xdontdiff -p linux-2.4.20-pre4/fs/reiserfs/journal.c linux/fs/reiserfs/journal.c
--- linux-2.4.20-pre4/fs/reiserfs/journal.c	Sat Aug 17 14:54:39 2002
+++ linux/fs/reiserfs/journal.c	Tue Aug 20 11:39:48 2002
@@ -1937,7 +1937,7 @@ int journal_init(struct super_block *p_s
   memset(journal_writers, 0, sizeof(char *) * 512) ; /* debug code */
 
   INIT_LIST_HEAD(&SB_JOURNAL(p_s_sb)->j_bitmap_nodes) ;
-  INIT_LIST_HEAD(&(SB_JOURNAL(p_s_sb)->j_dummy_inode.i_dirty_buffers)) ;
+  INIT_LIST_HEAD(&SB_JOURNAL(p_s_sb)->j_dirty_buffers) ;
   reiserfs_allocate_list_bitmaps(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap, 
                                  SB_BMAP_NR(p_s_sb)) ;
   allocate_bitmap_nodes(p_s_sb) ;
@@ -2933,7 +2933,7 @@ printk("journal-2020: do_journal_end: BA
   SB_JOURNAL_LIST_INDEX(p_s_sb) = jindex ;
 
   /* write any buffers that must hit disk before this commit is done */
-  fsync_inode_buffers(&(SB_JOURNAL(p_s_sb)->j_dummy_inode)) ;
+  fsync_buffers_list(&(SB_JOURNAL(p_s_sb)->j_dirty_buffers)) ;
 
   /* honor the flush and async wishes from the caller */
   if (flush) {
diff -uNr -Xdontdiff -p linux-2.4.20-pre4/include/linux/fs.h linux/include/linux/fs.h
--- linux-2.4.20-pre4/include/linux/fs.h	Tue Aug 20 11:37:00 2002
+++ linux/include/linux/fs.h	Sun Aug 25 19:20:22 2002
@@ -219,6 +219,7 @@ enum bh_state_bits {
 	BH_Async,	/* 1 if the buffer is under end_buffer_io_async I/O */
 	BH_Wait_IO,	/* 1 if we should write out this buffer */
 	BH_Launder,	/* 1 if we can throttle on this buffer */
+	BH_Attached,	/* 1 if b_inode_buffers is linked into a list */
 	BH_JBD,		/* 1 if it has an attached journal_head */
 
 	BH_PrivateStart,/* not a state bit, but the first bit available
@@ -266,7 +267,6 @@ struct buffer_head {
 	unsigned long b_rsector;	/* Real buffer location on disk */
 	wait_queue_head_t b_wait;
 
-	struct inode *	     b_inode;
 	struct list_head     b_inode_buffers;	/* doubly linked list of inode dirty buffers */
 };
 
@@ -1167,8 +1167,18 @@ static inline void mark_buffer_clean(str
 extern void FASTCALL(__mark_dirty(struct buffer_head *bh));
 extern void FASTCALL(__mark_buffer_dirty(struct buffer_head *bh));
 extern void FASTCALL(mark_buffer_dirty(struct buffer_head *bh));
-extern void FASTCALL(buffer_insert_inode_queue(struct buffer_head *, struct inode *));
-extern void FASTCALL(buffer_insert_inode_data_queue(struct buffer_head *, struct inode *));
+
+extern void FASTCALL(buffer_insert_list(struct buffer_head *, struct list_head *));
+
+static inline void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode)
+{
+	buffer_insert_list(bh, &inode->i_dirty_buffers);
+}
+
+static inline void buffer_insert_inode_data_queue(struct buffer_head *bh, struct inode *inode)
+{
+	buffer_insert_list(bh, &inode->i_dirty_data_buffers);
+}
 
 static inline int atomic_set_buffer_dirty(struct buffer_head *bh)
 {
@@ -1183,6 +1193,21 @@ static inline void mark_buffer_async(str
 		clear_bit(BH_Async, &bh->b_state);
 }
 
+static inline void set_buffer_attached(struct buffer_head *bh)
+{
+	__set_bit(BH_Attached, &bh->b_state);
+}
+
+static inline void clear_buffer_attached(struct buffer_head *bh)
+{
+	clear_bit(BH_Attached, &bh->b_state);
+}
+
+static inline int buffer_attached(struct buffer_head *bh)
+{
+	return test_bit(BH_Attached, &bh->b_state);
+}
+
 /*
  * If an error happens during the make_request, this function
  * has to be recalled. It marks the buffer as clean and not
diff -uNr -Xdontdiff -p linux-2.4.20-pre4/include/linux/reiserfs_fs_sb.h linux/include/linux/reiserfs_fs_sb.h
--- linux-2.4.20-pre4/include/linux/reiserfs_fs_sb.h	Tue Aug 13 15:56:04 2002
+++ linux/include/linux/reiserfs_fs_sb.h	Tue Aug 20 15:26:38 2002
@@ -312,7 +312,7 @@ struct reiserfs_journal {
   int j_free_bitmap_nodes ;
   int j_used_bitmap_nodes ;
   struct list_head j_bitmap_nodes ;
-  struct inode j_dummy_inode ;
+  struct list_head j_dirty_buffers ;
   struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ;	/* array of bitmaps to record the deleted blocks */
   struct reiserfs_journal_list j_journal_list[JOURNAL_LIST_COUNT] ;	    /* array of all the journal lists */
   struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; 	    /* hash table for real buffer heads in current trans */ 
diff -uNr -Xdontdiff -p linux-2.4.20-pre4/kernel/ksyms.c linux/kernel/ksyms.c
--- linux-2.4.20-pre4/kernel/ksyms.c	Tue Aug 13 15:56:05 2002
+++ linux/kernel/ksyms.c	Sun Aug 25 19:32:04 2002
@@ -525,8 +527,7 @@ EXPORT_SYMBOL(get_hash_table);
 EXPORT_SYMBOL(get_empty_inode);
 EXPORT_SYMBOL(insert_inode_hash);
 EXPORT_SYMBOL(remove_inode_hash);
-EXPORT_SYMBOL(buffer_insert_inode_queue);
-EXPORT_SYMBOL(buffer_insert_inode_data_queue);
+EXPORT_SYMBOL(buffer_insert_list);
 EXPORT_SYMBOL(make_bad_inode);
 EXPORT_SYMBOL(is_bad_inode);
 EXPORT_SYMBOL(event);

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2002-08-28 16:24 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-08-28 15:45 [Patch 5/8] 2.4.20-pre4/ext3: Fix O_SYNC for non-data-journaled modes Stephen Tweedie
2002-08-28 16:18 ` Christoph Hellwig
2002-08-28 16:26   ` Stephen C. Tweedie
2002-08-28 16:28     ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).