All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] ext4: fix data integrity sync in ordered mode
@ 2014-05-07  5:15 Namjae Jeon
  2014-05-07 13:55 ` Jan Kara
  0 siblings, 1 reply; 5+ messages in thread
From: Namjae Jeon @ 2014-05-07  5:15 UTC (permalink / raw)
  To: Theodore Ts'o, Jan Kara; +Cc: linux-ext4

When we perform a data integrity sync we tag all the dirty pages with
PAGECACHE_TAG_TOWRITE at start of ext4_da_writepages.
Later we check for this tag in write_cache_pages_da and creates a
struct mpage_da_data containing contiguously indexed pages tagged with this
tag and sync these pages with a call to mpage_da_map_and_submit.
This process is done in while loop until all the PAGECACHE_TAG_TOWRITE pages
are synced. We also do journal start and stop in each iteration.
journal_stop could initiate journal commit which would call ext4_writepage
which in turn will call ext4_bio_write_page even for delayed OR unwritten
buffers. When ext4_bio_write_page is called for such buffers, even though it
does not sync them but it clears the PAGECACHE_TAG_TOWRITE of the corresponding
page and hence these pages are also not synced by the currently running data
integrity sync. We will end up with dirty pages although sync is completed.

This could cause a potential data loss when the sync call is followed by a
truncate_pagecache call, which is exactly the case in collapse_range.
(It will cause generic/127 failure in xfstests)

To avoid this issue, we can use set_page_writeback_keepwrite instead of
set_page_writeback, which doesn't clear TOWRITE tag.

Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
---

Changelog
v3:
 - define test_set_page_writeback() and test_set_page_writeback_keepwrite()
   as calls to __test_set_page_writeback(page, keep_write)

v2:
 - As Jan Kara suggested, Create set_page_writeback_keepwrite which doesn't
   clear TOWRITE tag.

 fs/ext4/ext4.h             |  3 ++-
 fs/ext4/inode.c            |  6 ++++--
 fs/ext4/page-io.c          |  8 ++++++--
 include/linux/page-flags.h | 12 +++++++++++-
 mm/page-writeback.c        | 11 ++++++-----
 5 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6b45afa..78fed7b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2771,7 +2771,8 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
 extern int ext4_bio_write_page(struct ext4_io_submit *io,
 			       struct page *page,
 			       int len,
-			       struct writeback_control *wbc);
+			       struct writeback_control *wbc,
+			       bool keep_towrite);
 
 /* mmp.c */
 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b1dc334..8d9cb26 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1846,6 +1846,7 @@ static int ext4_writepage(struct page *page,
 	struct buffer_head *page_bufs = NULL;
 	struct inode *inode = page->mapping->host;
 	struct ext4_io_submit io_submit;
+	bool keep_towrite = false;
 
 	trace_ext4_writepage(page);
 	size = i_size_read(inode);
@@ -1876,6 +1877,7 @@ static int ext4_writepage(struct page *page,
 			unlock_page(page);
 			return 0;
 		}
+		keep_towrite = true;
 	}
 
 	if (PageChecked(page) && ext4_should_journal_data(inode))
@@ -1892,7 +1894,7 @@ static int ext4_writepage(struct page *page,
 		unlock_page(page);
 		return -ENOMEM;
 	}
-	ret = ext4_bio_write_page(&io_submit, page, len, wbc);
+	ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
 	ext4_io_submit(&io_submit);
 	/* Drop io_end reference we got from init */
 	ext4_put_io_end_defer(io_submit.io_end);
@@ -1911,7 +1913,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
 	else
 		len = PAGE_CACHE_SIZE;
 	clear_page_dirty_for_io(page);
-	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
 	if (!err)
 		mpd->wbc->nr_to_write--;
 	mpd->first_page++;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a64e7a..4a2dd07 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -401,7 +401,8 @@ submit_and_retry:
 int ext4_bio_write_page(struct ext4_io_submit *io,
 			struct page *page,
 			int len,
-			struct writeback_control *wbc)
+			struct writeback_control *wbc,
+			bool keep_towrite)
 {
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, blocksize;
@@ -414,7 +415,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
 
-	set_page_writeback(page);
+	if (keep_towrite)
+		set_page_writeback_keepwrite(page);
+	else
+		set_page_writeback(page);
 	ClearPageError(page);
 
 	/*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index bc2007e..9b68718 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -317,13 +317,23 @@ CLEARPAGEFLAG(Uptodate, uptodate)
 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
 
 int test_clear_page_writeback(struct page *page);
-int test_set_page_writeback(struct page *page);
+int __test_set_page_writeback(struct page *page, bool keep_write);
+
+#define test_set_page_writeback(page)			\
+	__test_set_page_writeback(page, false)
+#define test_set_page_writeback_keepwrite(page)	\
+	__test_set_page_writeback(page, true)
 
 static inline void set_page_writeback(struct page *page)
 {
 	test_set_page_writeback(page);
 }
 
+static inline void set_page_writeback_keepwrite(struct page *page)
+{
+	test_set_page_writeback_keepwrite(page);
+}
+
 #ifdef CONFIG_PAGEFLAGS_EXTENDED
 /*
  * System with lots of page flags available. This allows separate
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 310d329..ba037e3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2398,7 +2398,7 @@ int test_clear_page_writeback(struct page *page)
 	return ret;
 }
 
-int test_set_page_writeback(struct page *page)
+int __test_set_page_writeback(struct page *page, bool keep_write)
 {
 	struct address_space *mapping = page_mapping(page);
 	int ret;
@@ -2423,9 +2423,10 @@ int test_set_page_writeback(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
-		radix_tree_tag_clear(&mapping->page_tree,
-				     page_index(page),
-				     PAGECACHE_TAG_TOWRITE);
+		if (!keep_write)
+			radix_tree_tag_clear(&mapping->page_tree,
+						page_index(page),
+						PAGECACHE_TAG_TOWRITE);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
@@ -2436,7 +2437,7 @@ int test_set_page_writeback(struct page *page)
 	return ret;
 
 }
-EXPORT_SYMBOL(test_set_page_writeback);
+EXPORT_SYMBOL(__test_set_page_writeback);
 
 /*
  * Return true if any of the pages in the mapping are marked with the
-- 
1.7.11-rc0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] ext4: fix data integrity sync in ordered mode
  2014-05-07  5:15 [PATCH v3] ext4: fix data integrity sync in ordered mode Namjae Jeon
@ 2014-05-07 13:55 ` Jan Kara
  2014-05-07 23:08   ` Namjae Jeon
  0 siblings, 1 reply; 5+ messages in thread
From: Jan Kara @ 2014-05-07 13:55 UTC (permalink / raw)
  To: Namjae Jeon; +Cc: Theodore Ts'o, Jan Kara, linux-ext4

On Wed 07-05-14 14:15:24, Namjae Jeon wrote:
> When we perform a data integrity sync we tag all the dirty pages with
> PAGECACHE_TAG_TOWRITE at start of ext4_da_writepages.
> Later we check for this tag in write_cache_pages_da and creates a
> struct mpage_da_data containing contiguously indexed pages tagged with this
> tag and sync these pages with a call to mpage_da_map_and_submit.
> This process is done in while loop until all the PAGECACHE_TAG_TOWRITE pages
> are synced. We also do journal start and stop in each iteration.
> journal_stop could initiate journal commit which would call ext4_writepage
> which in turn will call ext4_bio_write_page even for delayed OR unwritten
> buffers. When ext4_bio_write_page is called for such buffers, even though it
> does not sync them but it clears the PAGECACHE_TAG_TOWRITE of the corresponding
> page and hence these pages are also not synced by the currently running data
> integrity sync. We will end up with dirty pages although sync is completed.
> 
> This could cause a potential data loss when the sync call is followed by a
> truncate_pagecache call, which is exactly the case in collapse_range.
> (It will cause generic/127 failure in xfstests)
> 
> To avoid this issue, we can use set_page_writeback_keepwrite instead of
> set_page_writeback, which doesn't clear TOWRITE tag.
> 
> Cc: Jan Kara <jack@suse.cz>
> Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
> Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
  The patch looks good. You can add:
Reviewed-by: Jan Kara <jack@suse.cz>

BTW you should also cc linux-mm@kvack.org with this patch since it touches
the mm code.

									Honza
> ---
> 
> Changelog
> v3:
>  - define test_set_page_writeback() and test_set_page_writeback_keepwrite()
>    as calls to __test_set_page_writeback(page, keep_write)
> 
> v2:
>  - As Jan Kara suggested, Create set_page_writeback_keepwrite which doesn't
>    clear TOWRITE tag.
> 
>  fs/ext4/ext4.h             |  3 ++-
>  fs/ext4/inode.c            |  6 ++++--
>  fs/ext4/page-io.c          |  8 ++++++--
>  include/linux/page-flags.h | 12 +++++++++++-
>  mm/page-writeback.c        | 11 ++++++-----
>  5 files changed, 29 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 6b45afa..78fed7b 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2771,7 +2771,8 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
>  extern int ext4_bio_write_page(struct ext4_io_submit *io,
>  			       struct page *page,
>  			       int len,
> -			       struct writeback_control *wbc);
> +			       struct writeback_control *wbc,
> +			       bool keep_towrite);
>  
>  /* mmp.c */
>  extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index b1dc334..8d9cb26 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -1846,6 +1846,7 @@ static int ext4_writepage(struct page *page,
>  	struct buffer_head *page_bufs = NULL;
>  	struct inode *inode = page->mapping->host;
>  	struct ext4_io_submit io_submit;
> +	bool keep_towrite = false;
>  
>  	trace_ext4_writepage(page);
>  	size = i_size_read(inode);
> @@ -1876,6 +1877,7 @@ static int ext4_writepage(struct page *page,
>  			unlock_page(page);
>  			return 0;
>  		}
> +		keep_towrite = true;
>  	}
>  
>  	if (PageChecked(page) && ext4_should_journal_data(inode))
> @@ -1892,7 +1894,7 @@ static int ext4_writepage(struct page *page,
>  		unlock_page(page);
>  		return -ENOMEM;
>  	}
> -	ret = ext4_bio_write_page(&io_submit, page, len, wbc);
> +	ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
>  	ext4_io_submit(&io_submit);
>  	/* Drop io_end reference we got from init */
>  	ext4_put_io_end_defer(io_submit.io_end);
> @@ -1911,7 +1913,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
>  	else
>  		len = PAGE_CACHE_SIZE;
>  	clear_page_dirty_for_io(page);
> -	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
> +	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
>  	if (!err)
>  		mpd->wbc->nr_to_write--;
>  	mpd->first_page++;
> diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
> index 1a64e7a..4a2dd07 100644
> --- a/fs/ext4/page-io.c
> +++ b/fs/ext4/page-io.c
> @@ -401,7 +401,8 @@ submit_and_retry:
>  int ext4_bio_write_page(struct ext4_io_submit *io,
>  			struct page *page,
>  			int len,
> -			struct writeback_control *wbc)
> +			struct writeback_control *wbc,
> +			bool keep_towrite)
>  {
>  	struct inode *inode = page->mapping->host;
>  	unsigned block_start, blocksize;
> @@ -414,7 +415,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
>  	BUG_ON(!PageLocked(page));
>  	BUG_ON(PageWriteback(page));
>  
> -	set_page_writeback(page);
> +	if (keep_towrite)
> +		set_page_writeback_keepwrite(page);
> +	else
> +		set_page_writeback(page);
>  	ClearPageError(page);
>  
>  	/*
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index bc2007e..9b68718 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -317,13 +317,23 @@ CLEARPAGEFLAG(Uptodate, uptodate)
>  extern void cancel_dirty_page(struct page *page, unsigned int account_size);
>  
>  int test_clear_page_writeback(struct page *page);
> -int test_set_page_writeback(struct page *page);
> +int __test_set_page_writeback(struct page *page, bool keep_write);
> +
> +#define test_set_page_writeback(page)			\
> +	__test_set_page_writeback(page, false)
> +#define test_set_page_writeback_keepwrite(page)	\
> +	__test_set_page_writeback(page, true)
>  
>  static inline void set_page_writeback(struct page *page)
>  {
>  	test_set_page_writeback(page);
>  }
>  
> +static inline void set_page_writeback_keepwrite(struct page *page)
> +{
> +	test_set_page_writeback_keepwrite(page);
> +}
> +
>  #ifdef CONFIG_PAGEFLAGS_EXTENDED
>  /*
>   * System with lots of page flags available. This allows separate
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 310d329..ba037e3 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -2398,7 +2398,7 @@ int test_clear_page_writeback(struct page *page)
>  	return ret;
>  }
>  
> -int test_set_page_writeback(struct page *page)
> +int __test_set_page_writeback(struct page *page, bool keep_write)
>  {
>  	struct address_space *mapping = page_mapping(page);
>  	int ret;
> @@ -2423,9 +2423,10 @@ int test_set_page_writeback(struct page *page)
>  			radix_tree_tag_clear(&mapping->page_tree,
>  						page_index(page),
>  						PAGECACHE_TAG_DIRTY);
> -		radix_tree_tag_clear(&mapping->page_tree,
> -				     page_index(page),
> -				     PAGECACHE_TAG_TOWRITE);
> +		if (!keep_write)
> +			radix_tree_tag_clear(&mapping->page_tree,
> +						page_index(page),
> +						PAGECACHE_TAG_TOWRITE);
>  		spin_unlock_irqrestore(&mapping->tree_lock, flags);
>  	} else {
>  		ret = TestSetPageWriteback(page);
> @@ -2436,7 +2437,7 @@ int test_set_page_writeback(struct page *page)
>  	return ret;
>  
>  }
> -EXPORT_SYMBOL(test_set_page_writeback);
> +EXPORT_SYMBOL(__test_set_page_writeback);
>  
>  /*
>   * Return true if any of the pages in the mapping are marked with the
> -- 
> 1.7.11-rc0
> 
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH v3] ext4: fix data integrity sync in ordered mode
  2014-05-07 13:55 ` Jan Kara
@ 2014-05-07 23:08   ` Namjae Jeon
  0 siblings, 0 replies; 5+ messages in thread
From: Namjae Jeon @ 2014-05-07 23:08 UTC (permalink / raw)
  To: 'Jan Kara'; +Cc: 'Theodore Ts'o', 'linux-ext4'

> 
> On Wed 07-05-14 14:15:24, Namjae Jeon wrote:
> > When we perform a data integrity sync we tag all the dirty pages with
> > PAGECACHE_TAG_TOWRITE at start of ext4_da_writepages.
> > Later we check for this tag in write_cache_pages_da and creates a
> > struct mpage_da_data containing contiguously indexed pages tagged with this
> > tag and sync these pages with a call to mpage_da_map_and_submit.
> > This process is done in while loop until all the PAGECACHE_TAG_TOWRITE pages
> > are synced. We also do journal start and stop in each iteration.
> > journal_stop could initiate journal commit which would call ext4_writepage
> > which in turn will call ext4_bio_write_page even for delayed OR unwritten
> > buffers. When ext4_bio_write_page is called for such buffers, even though it
> > does not sync them but it clears the PAGECACHE_TAG_TOWRITE of the corresponding
> > page and hence these pages are also not synced by the currently running data
> > integrity sync. We will end up with dirty pages although sync is completed.
> >
> > This could cause a potential data loss when the sync call is followed by a
> > truncate_pagecache call, which is exactly the case in collapse_range.
> > (It will cause generic/127 failure in xfstests)
> >
> > To avoid this issue, we can use set_page_writeback_keepwrite instead of
> > set_page_writeback, which doesn't clear TOWRITE tag.
> >
> > Cc: Jan Kara <jack@suse.cz>
> > Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
> > Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
>   The patch looks good. You can add:
> Reviewed-by: Jan Kara <jack@suse.cz>
> 
> BTW you should also cc linux-mm@kvack.org with this patch since it touches
> the mm code.
Okay. Thanks for your review!
> 
> 									Honza


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v3] ext4: fix data integrity sync in ordered mode
@ 2014-05-07 23:16 ` Namjae Jeon
  0 siblings, 0 replies; 5+ messages in thread
From: Namjae Jeon @ 2014-05-07 23:16 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: linux-ext4, linux-mm, Jan Kara

When we perform a data integrity sync we tag all the dirty pages with
PAGECACHE_TAG_TOWRITE at start of ext4_da_writepages.
Later we check for this tag in write_cache_pages_da and creates a
struct mpage_da_data containing contiguously indexed pages tagged with this
tag and sync these pages with a call to mpage_da_map_and_submit.
This process is done in while loop until all the PAGECACHE_TAG_TOWRITE pages
are synced. We also do journal start and stop in each iteration.
journal_stop could initiate journal commit which would call ext4_writepage
which in turn will call ext4_bio_write_page even for delayed OR unwritten
buffers. When ext4_bio_write_page is called for such buffers, even though it
does not sync them but it clears the PAGECACHE_TAG_TOWRITE of the corresponding
page and hence these pages are also not synced by the currently running data
integrity sync. We will end up with dirty pages although sync is completed.

This could cause a potential data loss when the sync call is followed by a
truncate_pagecache call, which is exactly the case in collapse_range.
(It will cause generic/127 failure in xfstests)

To avoid this issue, we can use set_page_writeback_keepwrite instead of
set_page_writeback, which doesn't clear TOWRITE tag.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---

Changelog
v3:
 - define test_set_page_writeback() and test_set_page_writeback_keepwrite()
   as calls to __test_set_page_writeback(page, keep_write)

v2:
 - As Jan Kara suggested, Create set_page_writeback_keepwrite which doesn't
   clear TOWRITE tag.

 fs/ext4/ext4.h             |  3 ++-
 fs/ext4/inode.c            |  6 ++++--
 fs/ext4/page-io.c          |  8 ++++++--
 include/linux/page-flags.h | 12 +++++++++++-
 mm/page-writeback.c        | 11 ++++++-----
 5 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6b45afa..78fed7b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2771,7 +2771,8 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
 extern int ext4_bio_write_page(struct ext4_io_submit *io,
 			       struct page *page,
 			       int len,
-			       struct writeback_control *wbc);
+			       struct writeback_control *wbc,
+			       bool keep_towrite);
 
 /* mmp.c */
 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b1dc334..8d9cb26 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1846,6 +1846,7 @@ static int ext4_writepage(struct page *page,
 	struct buffer_head *page_bufs = NULL;
 	struct inode *inode = page->mapping->host;
 	struct ext4_io_submit io_submit;
+	bool keep_towrite = false;
 
 	trace_ext4_writepage(page);
 	size = i_size_read(inode);
@@ -1876,6 +1877,7 @@ static int ext4_writepage(struct page *page,
 			unlock_page(page);
 			return 0;
 		}
+		keep_towrite = true;
 	}
 
 	if (PageChecked(page) && ext4_should_journal_data(inode))
@@ -1892,7 +1894,7 @@ static int ext4_writepage(struct page *page,
 		unlock_page(page);
 		return -ENOMEM;
 	}
-	ret = ext4_bio_write_page(&io_submit, page, len, wbc);
+	ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
 	ext4_io_submit(&io_submit);
 	/* Drop io_end reference we got from init */
 	ext4_put_io_end_defer(io_submit.io_end);
@@ -1911,7 +1913,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
 	else
 		len = PAGE_CACHE_SIZE;
 	clear_page_dirty_for_io(page);
-	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
 	if (!err)
 		mpd->wbc->nr_to_write--;
 	mpd->first_page++;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a64e7a..4a2dd07 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -401,7 +401,8 @@ submit_and_retry:
 int ext4_bio_write_page(struct ext4_io_submit *io,
 			struct page *page,
 			int len,
-			struct writeback_control *wbc)
+			struct writeback_control *wbc,
+			bool keep_towrite)
 {
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, blocksize;
@@ -414,7 +415,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
 
-	set_page_writeback(page);
+	if (keep_towrite)
+		set_page_writeback_keepwrite(page);
+	else
+		set_page_writeback(page);
 	ClearPageError(page);
 
 	/*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index bc2007e..9b68718 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -317,13 +317,23 @@ CLEARPAGEFLAG(Uptodate, uptodate)
 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
 
 int test_clear_page_writeback(struct page *page);
-int test_set_page_writeback(struct page *page);
+int __test_set_page_writeback(struct page *page, bool keep_write);
+
+#define test_set_page_writeback(page)			\
+	__test_set_page_writeback(page, false)
+#define test_set_page_writeback_keepwrite(page)	\
+	__test_set_page_writeback(page, true)
 
 static inline void set_page_writeback(struct page *page)
 {
 	test_set_page_writeback(page);
 }
 
+static inline void set_page_writeback_keepwrite(struct page *page)
+{
+	test_set_page_writeback_keepwrite(page);
+}
+
 #ifdef CONFIG_PAGEFLAGS_EXTENDED
 /*
  * System with lots of page flags available. This allows separate
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 310d329..ba037e3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2398,7 +2398,7 @@ int test_clear_page_writeback(struct page *page)
 	return ret;
 }
 
-int test_set_page_writeback(struct page *page)
+int __test_set_page_writeback(struct page *page, bool keep_write)
 {
 	struct address_space *mapping = page_mapping(page);
 	int ret;
@@ -2423,9 +2423,10 @@ int test_set_page_writeback(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
-		radix_tree_tag_clear(&mapping->page_tree,
-				     page_index(page),
-				     PAGECACHE_TAG_TOWRITE);
+		if (!keep_write)
+			radix_tree_tag_clear(&mapping->page_tree,
+						page_index(page),
+						PAGECACHE_TAG_TOWRITE);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
@@ -2436,7 +2437,7 @@ int test_set_page_writeback(struct page *page)
 	return ret;
 
 }
-EXPORT_SYMBOL(test_set_page_writeback);
+EXPORT_SYMBOL(__test_set_page_writeback);
 
 /*
  * Return true if any of the pages in the mapping are marked with the
-- 
1.7.11-rc0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v3] ext4: fix data integrity sync in ordered mode
@ 2014-05-07 23:16 ` Namjae Jeon
  0 siblings, 0 replies; 5+ messages in thread
From: Namjae Jeon @ 2014-05-07 23:16 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: linux-ext4, linux-mm, Jan Kara

When we perform a data integrity sync we tag all the dirty pages with
PAGECACHE_TAG_TOWRITE at start of ext4_da_writepages.
Later we check for this tag in write_cache_pages_da and creates a
struct mpage_da_data containing contiguously indexed pages tagged with this
tag and sync these pages with a call to mpage_da_map_and_submit.
This process is done in while loop until all the PAGECACHE_TAG_TOWRITE pages
are synced. We also do journal start and stop in each iteration.
journal_stop could initiate journal commit which would call ext4_writepage
which in turn will call ext4_bio_write_page even for delayed OR unwritten
buffers. When ext4_bio_write_page is called for such buffers, even though it
does not sync them but it clears the PAGECACHE_TAG_TOWRITE of the corresponding
page and hence these pages are also not synced by the currently running data
integrity sync. We will end up with dirty pages although sync is completed.

This could cause a potential data loss when the sync call is followed by a
truncate_pagecache call, which is exactly the case in collapse_range.
(It will cause generic/127 failure in xfstests)

To avoid this issue, we can use set_page_writeback_keepwrite instead of
set_page_writeback, which doesn't clear TOWRITE tag.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---

Changelog
v3:
 - define test_set_page_writeback() and test_set_page_writeback_keepwrite()
   as calls to __test_set_page_writeback(page, keep_write)

v2:
 - As Jan Kara suggested, Create set_page_writeback_keepwrite which doesn't
   clear TOWRITE tag.

 fs/ext4/ext4.h             |  3 ++-
 fs/ext4/inode.c            |  6 ++++--
 fs/ext4/page-io.c          |  8 ++++++--
 include/linux/page-flags.h | 12 +++++++++++-
 mm/page-writeback.c        | 11 ++++++-----
 5 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6b45afa..78fed7b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2771,7 +2771,8 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
 extern int ext4_bio_write_page(struct ext4_io_submit *io,
 			       struct page *page,
 			       int len,
-			       struct writeback_control *wbc);
+			       struct writeback_control *wbc,
+			       bool keep_towrite);
 
 /* mmp.c */
 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b1dc334..8d9cb26 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1846,6 +1846,7 @@ static int ext4_writepage(struct page *page,
 	struct buffer_head *page_bufs = NULL;
 	struct inode *inode = page->mapping->host;
 	struct ext4_io_submit io_submit;
+	bool keep_towrite = false;
 
 	trace_ext4_writepage(page);
 	size = i_size_read(inode);
@@ -1876,6 +1877,7 @@ static int ext4_writepage(struct page *page,
 			unlock_page(page);
 			return 0;
 		}
+		keep_towrite = true;
 	}
 
 	if (PageChecked(page) && ext4_should_journal_data(inode))
@@ -1892,7 +1894,7 @@ static int ext4_writepage(struct page *page,
 		unlock_page(page);
 		return -ENOMEM;
 	}
-	ret = ext4_bio_write_page(&io_submit, page, len, wbc);
+	ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
 	ext4_io_submit(&io_submit);
 	/* Drop io_end reference we got from init */
 	ext4_put_io_end_defer(io_submit.io_end);
@@ -1911,7 +1913,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
 	else
 		len = PAGE_CACHE_SIZE;
 	clear_page_dirty_for_io(page);
-	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
 	if (!err)
 		mpd->wbc->nr_to_write--;
 	mpd->first_page++;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a64e7a..4a2dd07 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -401,7 +401,8 @@ submit_and_retry:
 int ext4_bio_write_page(struct ext4_io_submit *io,
 			struct page *page,
 			int len,
-			struct writeback_control *wbc)
+			struct writeback_control *wbc,
+			bool keep_towrite)
 {
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, blocksize;
@@ -414,7 +415,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
 
-	set_page_writeback(page);
+	if (keep_towrite)
+		set_page_writeback_keepwrite(page);
+	else
+		set_page_writeback(page);
 	ClearPageError(page);
 
 	/*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index bc2007e..9b68718 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -317,13 +317,23 @@ CLEARPAGEFLAG(Uptodate, uptodate)
 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
 
 int test_clear_page_writeback(struct page *page);
-int test_set_page_writeback(struct page *page);
+int __test_set_page_writeback(struct page *page, bool keep_write);
+
+#define test_set_page_writeback(page)			\
+	__test_set_page_writeback(page, false)
+#define test_set_page_writeback_keepwrite(page)	\
+	__test_set_page_writeback(page, true)
 
 static inline void set_page_writeback(struct page *page)
 {
 	test_set_page_writeback(page);
 }
 
+static inline void set_page_writeback_keepwrite(struct page *page)
+{
+	test_set_page_writeback_keepwrite(page);
+}
+
 #ifdef CONFIG_PAGEFLAGS_EXTENDED
 /*
  * System with lots of page flags available. This allows separate
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 310d329..ba037e3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2398,7 +2398,7 @@ int test_clear_page_writeback(struct page *page)
 	return ret;
 }
 
-int test_set_page_writeback(struct page *page)
+int __test_set_page_writeback(struct page *page, bool keep_write)
 {
 	struct address_space *mapping = page_mapping(page);
 	int ret;
@@ -2423,9 +2423,10 @@ int test_set_page_writeback(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
-		radix_tree_tag_clear(&mapping->page_tree,
-				     page_index(page),
-				     PAGECACHE_TAG_TOWRITE);
+		if (!keep_write)
+			radix_tree_tag_clear(&mapping->page_tree,
+						page_index(page),
+						PAGECACHE_TAG_TOWRITE);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
@@ -2436,7 +2437,7 @@ int test_set_page_writeback(struct page *page)
 	return ret;
 
 }
-EXPORT_SYMBOL(test_set_page_writeback);
+EXPORT_SYMBOL(__test_set_page_writeback);
 
 /*
  * Return true if any of the pages in the mapping are marked with the
-- 
1.7.11-rc0

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2014-05-07 23:16 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-07  5:15 [PATCH v3] ext4: fix data integrity sync in ordered mode Namjae Jeon
2014-05-07 13:55 ` Jan Kara
2014-05-07 23:08   ` Namjae Jeon
2014-05-07 23:16 Namjae Jeon
2014-05-07 23:16 ` Namjae Jeon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.