linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] cleanup reiserfs direct->indirect conversions
@ 2001-07-12 14:11 Chris Mason
  2001-07-16 19:30 ` [reiserfs-list] " Nikita Danilov
  0 siblings, 1 reply; 2+ messages in thread
From: Chris Mason @ 2001-07-12 14:11 UTC (permalink / raw)
  To: linux-kernel, reiserfs-list; +Cc: torvalds


Hi guys,

This patch has been around since 2.4.0-test days, and was included in
2.4.5-ac14.  It's big so I wanted to give it lots of testing time, but
I'm sure Alan is sick of updating around it (thanks alan ;-)

To keep old data from being corrupted by a crash, 
reiserfs flushes newly unpacked tails before the transaction
that unpacked them commits.  The old way involved locking pages
during transaction close, which really sucked.

This code replaces that with a private inode, puts the
conversion targets on its buffer list, and uses fsync_inode_buffers
to get them on disk.

It has the added bonus of dropping lock_kernel from reiserfs_commit_write
unless i_size has changed or O_SYNC is in use.

The patch is against 2.4.7-pre6, Linus please include:

diff -Nru a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
--- a/fs/reiserfs/inode.c	Thu Jul 12 10:43:24 2001
+++ b/fs/reiserfs/inode.c	Thu Jul 12 10:43:24 2001
@@ -44,7 +44,6 @@
 	windex = push_journal_writer("delete_inode") ;
 
 	reiserfs_delete_object (&th, inode);
-	reiserfs_remove_page_from_flush_list(&th, inode) ;
 	pop_journal_writer(windex) ;
 	reiserfs_release_objectid (&th, inode->i_ino);
 
@@ -103,6 +102,11 @@
     ih->u.ih_entry_count = cpu_to_le16 (entry_count);
 }
 
+static void add_to_flushlist(struct inode *inode, struct buffer_head *bh) {
+    struct inode *jinode = &(SB_JOURNAL(inode->i_sb)->j_dummy_inode) ;
+
+    buffer_insert_inode_queue(bh, jinode) ;
+}
 
 //
 // FIXME: we might cache recently accessed indirect item (or at least
@@ -129,60 +133,6 @@
 ** --chris
 */
 
-/* people who call journal_begin with a page locked must call this
-** BEFORE calling journal_begin
-*/
-static int prevent_flush_page_lock(struct page *page, 
-				   struct inode *inode) {
-  struct reiserfs_page_list *pl ;
-  struct super_block *s = inode->i_sb ;
-  /* we don't care if the inode has a stale pointer from an old
-  ** transaction
-  */
-  if(!page || inode->u.reiserfs_i.i_conversion_trans_id != SB_JOURNAL(s)->j_trans_id) {
-    return 0 ;
-  }
-  pl = inode->u.reiserfs_i.i_converted_page ;
-  if (pl && pl->page == page) {
-    pl->do_not_lock = 1 ;
-  }
-  /* this last part is really important.  The address space operations have
-  ** the page locked before they call the journal functions.  So it is possible
-  ** for one process to be waiting in flush_pages_before_commit for a 
-  ** page, then for the process with the page locked to call journal_begin.
-  **
-  ** We'll deadlock because the process flushing pages will never notice
-  ** the process with the page locked has called prevent_flush_page_lock.
-  ** So, we wake up the page waiters, even though the page is still locked.
-  ** The process waiting in flush_pages_before_commit must check the
-  ** pl->do_not_lock flag, and stop trying to lock the page.
-  */
-  wake_up(&page->wait) ;
-  return 0 ;
- 
-}
-/* people who call journal_end with a page locked must call this
-** AFTER calling journal_end
-*/
-static int allow_flush_page_lock(struct page *page, 
-				   struct inode *inode) {
-
-  struct reiserfs_page_list *pl ;
-  struct super_block *s = inode->i_sb ;
-  /* we don't care if the inode has a stale pointer from an old
-  ** transaction
-  */
-  if(!page || inode->u.reiserfs_i.i_conversion_trans_id != SB_JOURNAL(s)->j_trans_id) {
-    return 0 ;
-  }
-  pl = inode->u.reiserfs_i.i_converted_page ;
-  if (pl && pl->page == page) {
-    pl->do_not_lock = 0 ;
-  }
-  return 0 ;
- 
-}
-
 /* If this page has a file tail in it, and
 ** it was read in by get_block_create_0, the page data is valid,
 ** but tail is still sitting in a direct item, and we can't write to
@@ -607,7 +557,6 @@
 	return -EIO;
     }
 
-    prevent_flush_page_lock(bh_result->b_page, inode) ;
     inode->u.reiserfs_i.i_pack_on_close = 1 ;
 
     windex = push_journal_writer("reiserfs_get_block") ;
@@ -693,7 +642,6 @@
 	if (transaction_started)
 	    journal_end(&th, inode->i_sb, jbegin_count) ;
 
-	allow_flush_page_lock(bh_result->b_page, inode) ;
 	unlock_kernel() ;
 	 
 	/* the item was found, so new blocks were not added to the file
@@ -794,8 +742,12 @@
 	    /* we've converted the tail, so we must 
 	    ** flush unbh before the transaction commits
 	    */
-	    reiserfs_add_page_to_flush_list(&th, inode, unbh) ;
-	    mark_buffer_dirty(unbh) ;
+	    add_to_flushlist(inode, unbh) ;
+
+	    /* mark it dirty now to prevent commit_write from adding
+	    ** this buffer to the inode's dirty buffer list
+	    */
+	    __mark_buffer_dirty(unbh) ;
 		  
 	    //inode->i_blocks += inode->i_sb->s_blocksize / 512;
 	    //mark_tail_converted (inode);
@@ -887,7 +839,6 @@
       journal_end(&th, inode->i_sb, jbegin_count) ;
     }
     pop_journal_writer(windex) ;
-    allow_flush_page_lock(bh_result->b_page, inode) ;
     unlock_kernel() ;
     reiserfs_check_path(&path) ;
     return retval;
@@ -1671,13 +1622,11 @@
     ** because the truncate might pack the item anyway 
     ** (it will unmap bh if it packs).
     */
-    prevent_flush_page_lock(page, p_s_inode) ;
     journal_begin(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 ) ;
     windex = push_journal_writer("reiserfs_vfs_truncate_file") ;
     reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
     pop_journal_writer(windex) ;
     journal_end(&th, p_s_inode->i_sb,  JOURNAL_PER_BALANCE_CNT * 2 ) ;
-    allow_flush_page_lock(page, p_s_inode) ;
 
     if (page) {
         length = offset & (blocksize - 1) ;
@@ -1719,7 +1668,6 @@
 
 start_over:
     lock_kernel() ;
-    prevent_flush_page_lock(bh_result->b_page, inode) ;
     journal_begin(&th, inode->i_sb, jbegin_count) ;
 
     make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
@@ -1785,7 +1733,6 @@
 out:
     pathrelse(&path) ;
     journal_end(&th, inode->i_sb, jbegin_count) ;
-    allow_flush_page_lock(bh_result->b_page, inode) ;
     unlock_kernel() ;
 
     /* this is where we fill in holes in the file. */
@@ -1950,29 +1897,27 @@
   return generic_block_bmap(as, block, reiserfs_bmap) ;
 }
 
+static int reiserfs_commit_write(struct file *f, struct page *page,
+			         unsigned from, unsigned to) {
+    struct inode *inode = page->mapping->host;
+    int ret ;
 
-static int reiserfs_commit_write(struct file *f, struct page *page, 
-                                 unsigned from, unsigned to) {
-    struct inode *inode = page->mapping->host ;
-    int ret ; 
-    struct reiserfs_transaction_handle th ;
-    
     reiserfs_wait_on_write_block(inode->i_sb) ;
-    lock_kernel();
-    prevent_flush_page_lock(page, inode) ;
     ret = generic_commit_write(f, page, from, to) ;
+
     /* we test for O_SYNC here so we can commit the transaction
     ** for any packed tails the file might have had
     */
     if (f->f_flags & O_SYNC) {
+	struct reiserfs_transaction_handle th ;
+	lock_kernel() ;
 	journal_begin(&th, inode->i_sb, 1) ;
 	reiserfs_prepare_for_journal(inode->i_sb, 
 	                             SB_BUFFER_WITH_SB(inode->i_sb), 1) ;
 	journal_mark_dirty(&th, inode->i_sb, SB_BUFFER_WITH_SB(inode->i_sb)) ;
 	journal_end_sync(&th, inode->i_sb, 1) ;
+	unlock_kernel() ;
     }
-    allow_flush_page_lock(page, inode) ;
-    unlock_kernel();
     return ret ;
 }
 
diff -Nru a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
--- a/fs/reiserfs/journal.c	Thu Jul 12 10:43:24 2001
+++ b/fs/reiserfs/journal.c	Thu Jul 12 10:43:24 2001
@@ -114,11 +114,7 @@
 static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) {
   if (bh) {
     clear_bit(BH_Dirty, &bh->b_state) ;
-#if 0
-    if (bh->b_list != BUF_CLEAN) {
-      reiserfs_file_buffer(bh, BUF_CLEAN) ;
-    }
-#endif
+    refile_buffer(bh) ;
   }
   return 0 ;
 }
@@ -1889,6 +1885,7 @@
   memset(journal_writers, 0, sizeof(char *) * 512) ; /* debug code */
 
   INIT_LIST_HEAD(&SB_JOURNAL(p_s_sb)->j_bitmap_nodes) ;
+  INIT_LIST_HEAD(&(SB_JOURNAL(p_s_sb)->j_dummy_inode.i_dirty_buffers)) ;
   reiserfs_allocate_list_bitmaps(p_s_sb, SB_JOURNAL(p_s_sb)->j_list_bitmap, 
                                  SB_BMAP_NR(p_s_sb)) ;
   allocate_bitmap_nodes(p_s_sb) ;
@@ -2582,9 +2579,6 @@
 	    ** in the current trans
 	    */
 	    mark_buffer_notjournal_dirty(cn->bh) ;
-	    if (!buffer_locked(cn->bh)) {
-	      reiserfs_clean_and_file_buffer(cn->bh) ;
-	    }
 	    cleaned = 1 ;
 	    atomic_dec(&(cn->bh->b_count)) ;
 	    if (atomic_read(&(cn->bh->b_count)) < 0) {
@@ -2602,6 +2596,7 @@
   }
 
   if (bh) {
+    reiserfs_clean_and_file_buffer(bh) ;
     atomic_dec(&(bh->b_count)) ; /* get_hash incs this */
     if (atomic_read(&(bh->b_count)) < 0) {
       printk("journal-2165: bh->b_count < 0\n") ;
@@ -2656,275 +2651,6 @@
   }
 }
 
-/* 
- * Wait for a page to get unlocked.
- *
- * This must be called with the caller "holding" the page,
- * ie with increased "page->count" so that the page won't
- * go away during the wait..
- */
-static void ___reiserfs_wait_on_page(struct reiserfs_page_list *pl)
-{
-    struct task_struct *tsk = current;
-    struct page *page = pl->page ;
-    DECLARE_WAITQUEUE(wait, tsk);
-
-    add_wait_queue(&page->wait, &wait);
-    do {
-	block_sync_page(page);
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	if (!PageLocked(page) || pl->do_not_lock)
-	    break;
-	schedule();
-    } while (PageLocked(page));
-    tsk->state = TASK_RUNNING;
-    remove_wait_queue(&page->wait, &wait);
-}
-
-/*
- * Get an exclusive lock on the page..
- * but, every time you get woken up, check the page to make sure
- * someone hasn't called a journal_begin with it locked.
- *
- * the page should always be locked when this returns
- *
- * returns 0 if you've got the page locked
- * returns 1 if it returns because someone else has called journal_begin
- *           with the page locked
- * this is only useful to the code that flushes pages before a 
- * commit.  Do not export this hack.  Ever.
- */
-static int reiserfs_try_lock_page(struct reiserfs_page_list *pl)
-{
-    struct page *page = pl->page ;
-    while (TryLockPage(page)) {
-	if (pl->do_not_lock) {
-	    /* the page is locked, but we cannot have it */
-	    return 1 ;
-	}
-	___reiserfs_wait_on_page(pl);
-    }
-    /* we have the page locked */
-    return 0 ;
-}
-
-
-/*
-** This can only be called from do_journal_end.
-** it runs through the list things that need flushing before the
-** transaction can commit, and writes each of them to disk
-**
-*/
-
-static void flush_pages_before_commit(struct reiserfs_transaction_handle *th,
-                                      struct super_block *p_s_sb) {
-  struct reiserfs_page_list *pl = SB_JOURNAL(p_s_sb)->j_flush_pages ;
-  struct reiserfs_page_list *pl_tmp ;
-  struct buffer_head *bh, *head ;
-  int count = 0 ;
-
-  /* first write each dirty unlocked buffer in the list */
-
-  while(pl) {
-    /* ugly.  journal_end can be called from get_block, which has a 
-    ** page locked.  So, we have to check to see if pl->page is the page
-    ** currently locked by the calling function, and if so, skip the
-    ** lock
-    */
-    if (reiserfs_try_lock_page(pl)) {
-      goto setup_next ;
-    }
-    if (!PageLocked(pl->page)) {
-      BUG() ;
-    }
-    if (pl->page->buffers) {
-      head = pl->page->buffers ;
-      bh = head ;
-      do {
-	if (bh->b_blocknr == pl->blocknr && buffer_dirty(bh) &&
-	    !buffer_locked(bh) && buffer_uptodate(bh) ) {
-	  ll_rw_block(WRITE, 1, &bh) ;
-	}
-	bh = bh->b_this_page ;
-      } while (bh != head) ;
-    }
-    if (!pl->do_not_lock) {
-      UnlockPage(pl->page) ;
-    }
-setup_next:
-    pl = pl->next ;
-  }
-
-  /* now wait on them */
-
-  pl = SB_JOURNAL(p_s_sb)->j_flush_pages ;
-  while(pl) {
-    if (reiserfs_try_lock_page(pl)) {
-      goto remove_page ;
-    }
-    if (!PageLocked(pl->page)) {
-      BUG() ;
-    }
-    if (pl->page->buffers) {
-      head = pl->page->buffers ;
-      bh = head ;
-      do {
-	if (bh->b_blocknr == pl->blocknr) {
-	  count++ ;
-	  wait_on_buffer(bh) ;
-	  if (!buffer_uptodate(bh)) {
-	    reiserfs_panic(p_s_sb, "journal-2443: flush_pages_before_commit, error writing block %lu\n", bh->b_blocknr) ;
-	  }
-	}
-	bh = bh->b_this_page ;
-      } while (bh != head) ;
-    }
-    if (!pl->do_not_lock) {
-      UnlockPage(pl->page) ;
-    }
-remove_page:
-    /* we've waited on the I/O, we can remove the page from the
-    ** list, and free our pointer struct to it.
-    */
-    if (pl->prev) {
-      pl->prev->next = pl->next ;
-    }
-    if (pl->next) {
-      pl->next->prev = pl->prev ;
-    }
-    put_page(pl->page) ;
-    pl_tmp = pl ;
-    pl = pl->next ;
-    reiserfs_kfree(pl_tmp, sizeof(struct reiserfs_page_list), p_s_sb) ;
-  }
-  SB_JOURNAL(p_s_sb)->j_flush_pages = NULL ;
-}
-
-/*
-** called when a indirect item is converted back into a tail.
-**
-** The reiserfs part of the inode stores enough information to find
-** our page_list struct in the flush list.  We remove it from the list
-** and free the struct.
-**
-** Note, it is possible for this to happen:
-**
-** reiserfs_add_page_to_flush_list(inode)
-** transaction ends, list is flushed
-** reiserfs_remove_page_from_flush_list(inode)
-**
-** This would be bad because the page_list pointer in the inode is not
-** updated when the list is flushed, so we can't know if the pointer is
-** valid.  So, in the inode, we also store the transaction id when the
-** page was added.  If we are trying to remove something from an old 
-** transaction, we just clear out the pointer in the inode and return.
-**
-** Normal case is to use the reiserfs_page_list pointer in the inode to 
-** find and remove the page from the flush list.
-*/
-int reiserfs_remove_page_from_flush_list(struct reiserfs_transaction_handle *th,
-                                         struct inode *inode) {
-  struct reiserfs_page_list *pl ;
-
-  /* was this conversion done in a previous transaction? If so, return */
-  if (inode->u.reiserfs_i.i_conversion_trans_id < th->t_trans_id) {
-    inode->u.reiserfs_i.i_converted_page = NULL ;
-    inode->u.reiserfs_i.i_conversion_trans_id = 0  ;
-    return 0 ;
-  }
-
-  /* remove the page_list struct from the list, release our hold on the
-  ** page, and free the page_list struct
-  */
-  pl = inode->u.reiserfs_i.i_converted_page ;
-  if (pl) {
-    if (pl->next) {
-      pl->next->prev = pl->prev ;
-    }
-    if (pl->prev) {
-      pl->prev->next = pl->next ;
-    }
-    if (SB_JOURNAL(inode->i_sb)->j_flush_pages == pl) {
-      SB_JOURNAL(inode->i_sb)->j_flush_pages = pl->next ;
-    }
-    put_page(pl->page) ;
-    reiserfs_kfree(pl, sizeof(struct reiserfs_page_list), inode->i_sb) ;
-    inode->u.reiserfs_i.i_converted_page = NULL ;
-    inode->u.reiserfs_i.i_conversion_trans_id = 0 ;
-  }
-  return 0 ;
-}
-
-/*
-** Called after a direct to indirect transaction.  The unformatted node
-** must be flushed to disk before the transaction commits, otherwise, we
-** risk losing the data from the direct item.  This adds the page
-** containing the unformatted node to a list of pages that need flushing.
-**
-** it calls get_page(page), so the page won't disappear until we've
-** flushed or removed it from our list.
-**
-** pointers to the reiserfs_page_list struct are stored in the inode, 
-** so this page can be quickly removed from the list after the tail is
-** converted back into a direct item.
-**
-** If we fail to find the memory for the reiserfs_page_list struct, we
-** just sync the page now.  Not good, but safe.
-**
-** since this must be called with the page locked, we always set
-** the do_not_lock field in the page_list struct we allocate
-**
-*/
-int reiserfs_add_page_to_flush_list(struct reiserfs_transaction_handle *th, 
-                                    struct inode *inode,
-				    struct buffer_head *bh) {
-  struct reiserfs_page_list *new_pl ;
-
-/* debugging use ONLY.  Do not define this on data you care about. */
-#ifdef REISERFS_NO_FLUSH_AFTER_CONVERT
-  return 0 ;
-#endif
-
-  get_page(bh->b_page) ;
-  new_pl = reiserfs_kmalloc(sizeof(struct reiserfs_page_list), GFP_NOFS,
-                            inode->i_sb) ;
-  if (!new_pl) {
-    put_page(bh->b_page) ;
-    reiserfs_warning("journal-2480: forced to flush page, out of memory\n") ;
-    ll_rw_block(WRITE, 1, &bh) ;
-    wait_on_buffer(bh) ;
-    if (!buffer_uptodate(bh)) {
-      reiserfs_panic(inode->i_sb, "journal-2484: error writing buffer %lu to disk\n", bh->b_blocknr) ;
-    }
-    inode->u.reiserfs_i.i_converted_page = NULL ;
-    return 0 ;
-  }
-
-  new_pl->page = bh->b_page ;
-  new_pl->do_not_lock = 1 ;
-  new_pl->blocknr = bh->b_blocknr ;
-  new_pl->next = SB_JOURNAL(inode->i_sb)->j_flush_pages; 
-  if (new_pl->next) {
-    new_pl->next->prev = new_pl ;
-  }
-  new_pl->prev = NULL ;
-  SB_JOURNAL(inode->i_sb)->j_flush_pages = new_pl ;
-  
-  /* if we have numbers from an old transaction, zero the converted
-  ** page, it has already been flushed and freed
-  */
-  if (inode->u.reiserfs_i.i_conversion_trans_id &&
-      inode->u.reiserfs_i.i_conversion_trans_id < th->t_trans_id) {
-    inode->u.reiserfs_i.i_converted_page = NULL ;
-  }
-  if (inode->u.reiserfs_i.i_converted_page) {
-    reiserfs_panic(inode->i_sb, "journal-2501: inode already had a converted page\n") ;
-  }
-  inode->u.reiserfs_i.i_converted_page = new_pl ;
-  inode->u.reiserfs_i.i_conversion_trans_id = th->t_trans_id ;
-  return 0 ;
-}
-
 /*
 ** long and ugly.  If flush, will not return until all commit
 ** blocks and all real buffers in the trans are on disk.
@@ -3137,11 +2863,8 @@
   jindex = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ; 
   SB_JOURNAL_LIST_INDEX(p_s_sb) = jindex ;
 
-  /* make sure to flush any data converted from direct items to
-  ** indirect items before allowing the commit blocks to reach the
-  ** disk
-  */
-  flush_pages_before_commit(th, p_s_sb) ;
+  /* write any buffers that must hit disk before this commit is done */
+  fsync_inode_buffers(&(SB_JOURNAL(p_s_sb)->j_dummy_inode)) ;
 
   /* honor the flush and async wishes from the caller */
   if (flush) {
diff -Nru a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
--- a/fs/reiserfs/stree.c	Thu Jul 12 10:43:24 2001
+++ b/fs/reiserfs/stree.c	Thu Jul 12 10:43:24 2001
@@ -1192,13 +1192,21 @@
 		/* Search for the buffer in cache. */
 		p_s_un_bh = get_hash_table(p_s_sb->s_dev, *p_n_unfm_pointer, n_blk_size);
 
-		if (p_s_un_bh && buffer_locked(p_s_un_bh)) {
-		  __wait_on_buffer(p_s_un_bh) ;
-		  if ( item_moved (&s_ih, p_s_path) )  {
-		      need_research = 1;
-		      brelse(p_s_un_bh) ;
-		      break ;
-		  }
+		if (p_s_un_bh) {
+		    mark_buffer_clean(p_s_un_bh) ;
+		    if (buffer_locked(p_s_un_bh)) {
+		        __wait_on_buffer(p_s_un_bh) ;
+		    }
+		    /* even if the item moves, the block number of the
+		    ** unformatted node we want to cut won't.  So, it was
+		    ** safe to clean the buffer here, this block _will_
+		    ** get freed during this call to prepare_for_delete_or_cut
+		    */
+		    if ( item_moved (&s_ih, p_s_path) )  {
+		        need_research = 1;
+		        brelse(p_s_un_bh) ;
+		        break ;
+		    }
 		}
 		if ( p_s_un_bh && block_in_use (p_s_un_bh)) {
 		    /* Block is locked or held more than by one holder and by
@@ -1243,30 +1251,7 @@
 		if ( item_moved (&s_ih, p_s_path) )  {
 		    need_research = 1;
 		    break ;
-#if 0
-		    reiserfs_prepare_for_journal(p_s_sb, 
-		                                 PATH_PLAST_BUFFER(p_s_path),
-						 1) ;
-		    if ( comp_items(&s_ih, p_s_path) )  {
-		      reiserfs_restore_prepared_buffer(p_s_sb, 
-		                               PATH_PLAST_BUFFER(p_s_path)) ;
-		      brelse(p_s_un_bh);
-		      break;
-		    }
-		    *p_n_unfm_pointer = 0;
-		    journal_mark_dirty (th,p_s_sb,PATH_PLAST_BUFFER(p_s_path));
-
-		    reiserfs_free_block(th, p_s_sb, block_addr);
-		    if (p_s_un_bh) {
-			mark_buffer_clean (p_s_un_bh);
-			brelse (p_s_un_bh);
-		    }
-		    if ( comp_items(&s_ih, p_s_path) )  {
-		      break ;
-		    }
-#endif
 		}
-
 	    }
 
 	    /* a trick.  If the buffer has been logged, this
@@ -1793,11 +1778,11 @@
     
     do_balance(&s_cut_balance, NULL, NULL, c_mode);
     if ( n_is_inode_locked ) {
-        /* we've converted from indirect to direct, we must remove
-	** ourselves from the list of pages that need flushing before
-	** this transaction can commit
+	/* we've done an indirect->direct conversion.  when the data block 
+	** was freed, it was removed from the list of blocks that must 
+	** be flushed before the transaction commits, so we don't need to 
+	** deal with it here.
 	*/
-	reiserfs_remove_page_from_flush_list(th, p_s_inode) ;
 	p_s_inode->u.reiserfs_i.i_pack_on_close = 0 ;
     }
     return n_ret_value;
diff -Nru a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
--- a/include/linux/reiserfs_fs.h	Thu Jul 12 10:43:24 2001
+++ b/include/linux/reiserfs_fs.h	Thu Jul 12 10:43:24 2001
@@ -1541,29 +1541,6 @@
   __u32 j_mount_id ;
 } ;
 
-/* these are used to keep flush pages that contain converted direct items.
-** if the page is not flushed before the transaction that converted it
-** is committed, we risk losing data
-**
-** note, while a page is in this list, its counter is incremented.
-*/
-struct reiserfs_page_list {
-  struct reiserfs_page_list *next ;
-  struct reiserfs_page_list *prev ;
-  struct page *page ;
-  unsigned long blocknr ; /* block number holding converted data */
-
-  /* if a transaction writer has the page locked the flush_page_list
-  ** function doesn't need to (and can't) get the lock while flushing
-  ** the page.  do_not_lock needs to be set by anyone who calls journal_end
-  ** with a page lock held.  They have to look in the inode and see
-  ** if the inode has the page they have locked in the flush list.
-  **
-  ** this sucks.
-  */
-  int do_not_lock ; 
-} ;
-
 extern task_queue reiserfs_commit_thread_tq ;
 extern wait_queue_head_t reiserfs_commit_thread_wait ;
 
diff -Nru a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h
--- a/include/linux/reiserfs_fs_i.h	Thu Jul 12 10:43:24 2001
+++ b/include/linux/reiserfs_fs_i.h	Thu Jul 12 10:43:24 2001
@@ -3,11 +3,6 @@
 
 #include <linux/list.h>
 
-/* these are used to keep track of the pages that need
-** flushing before the current transaction can commit
-*/
-struct reiserfs_page_list ;
-
 struct reiserfs_inode_info {
   __u32 i_key [4];/* key is still 4 32 bit integers */
   
@@ -21,21 +16,6 @@
   int i_pack_on_close ; // file might need tail packing on close 
 
   __u32 i_first_direct_byte; // offset of first byte stored in direct item.
-
-  /* pointer to the page that must be flushed before 
-  ** the current transaction can commit.
-  **
-  ** this pointer is only used when the tail is converted back into
-  ** a direct item, or the file is deleted
-  */
-  struct reiserfs_page_list *i_converted_page ;
-
-  /* we save the id of the transaction when we did the direct->indirect
-  ** conversion.  That allows us to flush the buffers to disk
-  ** without having to update this inode to zero out the converted
-  ** page variable
-  */
-  int i_conversion_trans_id ;
 
 				/* My guess is this contains the first
                                    unused block of a sequence of
diff -Nru a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
--- a/include/linux/reiserfs_fs_sb.h	Thu Jul 12 10:43:24 2001
+++ b/include/linux/reiserfs_fs_sb.h	Thu Jul 12 10:43:24 2001
@@ -249,6 +249,7 @@
   int j_free_bitmap_nodes ;
   int j_used_bitmap_nodes ;
   struct list_head j_bitmap_nodes ;
+  struct inode j_dummy_inode ;
   struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS] ;	/* array of bitmaps to record the deleted blocks */
   struct reiserfs_journal_list j_journal_list[JOURNAL_LIST_COUNT] ;	    /* array of all the journal lists */
   struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; 	    /* hash table for real buffer heads in current trans */ 




^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [reiserfs-list] [PATCH] cleanup reiserfs direct->indirect conversions
  2001-07-12 14:11 [PATCH] cleanup reiserfs direct->indirect conversions Chris Mason
@ 2001-07-16 19:30 ` Nikita Danilov
  0 siblings, 0 replies; 2+ messages in thread
From: Nikita Danilov @ 2001-07-16 19:30 UTC (permalink / raw)
  To: linux-kernel, reiserfs-list; +Cc: torvalds

Hello,

following patch for 2.4.7-pre6 implements NFS inode generation support
for ReiserFS. It was ported from earlier patch by Neil Brown and Chris
Mason. Inode generation is persistently stored in the on-disk field
unused for regular files. Generation is filled from global "generation
counter" persistently stored in a super-block and incremented on each
inode deletion. Hopefully this will cure most of reiserfs+knfsd woes for
2.4.6.

Linus, please apply.

On behalf of ReiserFS team,
Nikita.
------------------------------------------------------------
diff -Nru a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
--- a/fs/reiserfs/inode.c	Thu Jul 12 17:37:27 2001
+++ b/fs/reiserfs/inode.c	Thu Jul 12 17:37:27 2001
@@ -914,7 +914,6 @@
 
 
     copy_key (INODE_PKEY (inode), &(ih->ih_key));
-    inode->i_generation = INODE_PKEY (inode)->k_dir_id;
     inode->i_blksize = PAGE_SIZE;
 
     INIT_LIST_HEAD(&inode->u.reiserfs_i.i_prealloc_list) ;
@@ -934,6 +933,7 @@
 	inode->i_ctime = le32_to_cpu (sd->sd_ctime);
 
 	inode->i_blocks = le32_to_cpu (sd->u.sd_blocks);
+	inode->i_generation = INODE_PKEY (inode)->k_dir_id;
 	blocks = (inode->i_size + 511) >> 9;
 	blocks = _ROUND_UP (blocks, inode->i_blksize >> 9);
 	if (inode->i_blocks > blocks) {
@@ -968,6 +968,10 @@
 	inode->i_ctime = le32_to_cpu (sd->sd_ctime);
 	inode->i_blocks = le32_to_cpu (sd->sd_blocks);
 	rdev = le32_to_cpu (sd->u.sd_rdev);
+	if( S_ISCHR( inode -> i_mode ) || S_ISBLK( inode -> i_mode ) )
+	    inode->i_generation = INODE_PKEY (inode)->k_dir_id;
+	else
+	    inode->i_generation = le32_to_cpu( sd->u.sd_generation );
     }
 
     /* nopack = 0, by default */
@@ -1005,8 +1009,11 @@
     sd_v2->sd_atime = cpu_to_le32 (inode->i_atime);
     sd_v2->sd_ctime = cpu_to_le32 (inode->i_ctime);
     sd_v2->sd_blocks = cpu_to_le32 (inode->i_blocks);
-    if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+    if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
 	sd_v2->u.sd_rdev = cpu_to_le32 (inode->i_rdev);
+    } else {
+	sd_v2->u.sd_generation = cpu_to_le32( inode -> i_generation );
+    }
 }
 
 
@@ -1208,10 +1215,20 @@
 	    key.on_disk_key.k_objectid = data[0] ;
 	    key.on_disk_key.k_dir_id = data[1] ;
 	    inode = reiserfs_iget(sb, &key) ;
+	    if (inode && (fhtype == 3 || fhtype == 6) &&
+		data[2] != inode->i_generation) {
+		    iput(inode) ;
+		    inode = NULL ;
+	    }
     } else {
-	    key.on_disk_key.k_objectid = data[2] ;
-	    key.on_disk_key.k_dir_id = data[3] ;
+	    key.on_disk_key.k_objectid = data[fhtype==6?3:2] ;
+	    key.on_disk_key.k_dir_id = data[fhtype==6?4:3] ;
 	    inode = reiserfs_iget(sb, &key) ;
+	    if (inode && fhtype == 6 &&
+		data[5] != inode->i_generation) {
+		    iput(inode) ;
+		    inode = NULL ;
+	    }
     }
 out:
     if (!inode)
@@ -1246,21 +1263,23 @@
     struct inode *inode = dentry->d_inode ;
     int maxlen = *lenp;
     
-    if (maxlen < 2)
+    if (maxlen < 3)
         return 255 ;
 
     data[0] = inode->i_ino ;
     data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
-    *lenp = 2;
+    data[2] = inode->i_generation ;
+    *lenp = 3;
     /* no room for directory info? return what we've stored so far */
-    if (maxlen < 4 || ! need_parent)
-        return 2 ;
+    if (maxlen < 6 || ! need_parent)
+        return 3;
 
     inode = dentry->d_parent->d_inode ;
-    data[2] = inode->i_ino ;
-    data[3] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
-    *lenp = 4;
-    return 4; 
+    data[3] = inode->i_ino ;
+    data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
+    data[5] = inode->i_generation ;
+    *lenp = 6;
+    return 6; 
 }
 
 
@@ -1447,6 +1466,20 @@
 	return NULL;
     }
     if (old_format_only (sb))
+      /* not a perfect generation count, as object ids can be reused, but this
+      ** is as good as reiserfs can do right now.
+      ** note that the private part of inode isn't filled in yet, we have
+      ** to use the directory.
+      */
+      inode->i_generation = INODE_PKEY (dir)->k_objectid;
+    else
+#if defined( USE_INODE_GENERATION_COUNTER )
+      inode->i_generation = 
+	le32_to_cpu( sb -> u.reiserfs_sb.s_rs -> s_inode_generation );
+#else
+      inode->i_generation = ++event;
+#endif
+    if (old_format_only (sb))
 	make_le_item_head (&ih, 0, ITEM_VERSION_1, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
     else
 	make_le_item_head (&ih, 0, ITEM_VERSION_2, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
@@ -1536,10 +1569,6 @@
 	return NULL;
     }
 
-    /* not a perfect generation count, as object ids can be reused, but this
-    ** is as good as reiserfs can do right now
-    */
-    inode->i_generation = INODE_PKEY (inode)->k_dir_id;
     insert_inode_hash (inode);
     // we do not mark inode dirty: on disk content matches to the
     // in-core one
diff -Nru a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
--- a/fs/reiserfs/stree.c	Thu Jul 12 17:37:27 2001
+++ b/fs/reiserfs/stree.c	Thu Jul 12 17:37:27 2001
@@ -1560,6 +1560,17 @@
         reiserfs_warning("clm-4001: deleting inode with link count==%d\n", inode->i_nlink) ;
     }
 #endif
+#if defined( USE_INODE_GENERATION_COUNTER )
+    if( !old_format_only ( th -> t_super ) )
+      {
+       __u32 *inode_generation;
+       
+       inode_generation = 
+         &th -> t_super -> u.reiserfs_sb.s_rs -> s_inode_generation;
+       *inode_generation = cpu_to_le32( le32_to_cpu( *inode_generation ) + 1 );
+      }
+/* USE_INODE_GENERATION_COUNTER */
+#endif
     reiserfs_delete_solid_item (th, INODE_PKEY (inode));
 }
 
diff -Nru a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
--- a/include/linux/reiserfs_fs.h	Thu Jul 12 17:37:27 2001
+++ b/include/linux/reiserfs_fs.h	Thu Jul 12 17:37:27 2001
@@ -65,6 +65,9 @@
 /* enable journalling */
 #define ENABLE_JOURNAL
 
+#define USE_INODE_GENERATION_COUNTER
+
+
 #ifdef __KERNEL__
 
 /* #define REISERFS_CHECK */
@@ -708,6 +711,7 @@
     __u32 sd_blocks;
     union {
 	__u32 sd_rdev;
+	__u32 sd_generation;
       //__u32 sd_first_direct_byte; 
       /* first byte of file which is stored in a
 				       direct item: except that if it equals 1
diff -Nru a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
--- a/include/linux/reiserfs_fs_sb.h	Thu Jul 12 17:37:27 2001
+++ b/include/linux/reiserfs_fs_sb.h	Thu Jul 12 17:37:27 2001
@@ -60,7 +60,8 @@
                                    don't need to save bytes in the
                                    superblock. -Hans */
   __u16 s_reserved;
-  char s_unused[128] ;			/* zero filled by mkreiserfs */
+  __u32 s_inode_generation;
+  char s_unused[124] ;			/* zero filled by mkreiserfs */
 } __attribute__ ((__packed__));
 
 #define SB_SIZE (sizeof(struct reiserfs_super_block))
------------------------------------------------------------

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2001-07-16 19:31 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-07-12 14:11 [PATCH] cleanup reiserfs direct->indirect conversions Chris Mason
2001-07-16 19:30 ` [reiserfs-list] " Nikita Danilov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).