All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/12] Convert write_cache_pages() to an iterator
@ 2023-06-26 17:35 Matthew Wilcox (Oracle)
  2023-06-26 17:35 ` [PATCH 01/12] writeback: Factor out writeback_finish() Matthew Wilcox (Oracle)
                   ` (14 more replies)
  0 siblings, 15 replies; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Dave Howells doesn't like the indirect function call imposed by
write_cache_pages(), so refactor it into an iterator.  I took the
opportunity to add the ability to iterate a folio_batch without having
an external variable.

This is against next-20230623.  If you try to apply it on top of a tree
which doesn't include the pagevec removal series, IT WILL CRASH because
it won't reinitialise folio_batch->i and the iteration will index out
of bounds.

I have a feeling the 'done' parameter could have a better name, but I
can't think what it might be.

Matthew Wilcox (Oracle) (12):
  writeback: Factor out writeback_finish()
  writeback: Factor writeback_get_batch() out of write_cache_pages()
  writeback: Factor should_writeback_folio() out of write_cache_pages()
  writeback: Simplify the loops in write_cache_pages()
  pagevec: Add ability to iterate a queue
  writeback: Use the folio_batch queue iterator
  writeback: Factor writeback_iter_init() out of write_cache_pages()
  writeback: Factor writeback_get_folio() out of write_cache_pages()
  writeback: Factor writeback_iter_next() out of write_cache_pages()
  writeback: Add for_each_writeback_folio()
  iomap: Convert iomap_writepages() to use for_each_writeback_folio()
  writeback: Remove a use of write_cache_pages() from do_writepages()

 fs/iomap/buffered-io.c    |  14 +-
 include/linux/pagevec.h   |  18 +++
 include/linux/writeback.h |  22 ++-
 mm/page-writeback.c       | 310 +++++++++++++++++++++-----------------
 4 files changed, 216 insertions(+), 148 deletions(-)

-- 
2.39.2


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH 01/12] writeback: Factor out writeback_finish()
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-27  4:05   ` Christoph Hellwig
  2023-06-26 17:35 ` [PATCH 02/12] writeback: Factor writeback_get_batch() out of write_cache_pages() Matthew Wilcox (Oracle)
                   ` (13 subsequent siblings)
  14 siblings, 1 reply; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Instead of having a 'done' variable that controls the nested loops,
have a writeback_finish() that can be returned directly.  This involves
keeping more things in writeback_control, but it's just moving stuff
allocated on the stack to being allocated slightly earlier on the stack.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/writeback.h |  6 ++++
 mm/page-writeback.c       | 74 +++++++++++++++++++++------------------
 2 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fba937999fbf..5b7d11f54013 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -11,6 +11,7 @@
 #include <linux/flex_proportions.h>
 #include <linux/backing-dev-defs.h>
 #include <linux/blk_types.h>
+#include <linux/pagevec.h>
 
 struct bio;
 
@@ -52,6 +53,10 @@ struct writeback_control {
 	loff_t range_start;
 	loff_t range_end;
 
+	struct folio_batch fbatch;
+	pgoff_t done_index;
+	int err;
+
 	enum writeback_sync_modes sync_mode;
 
 	unsigned for_kupdate:1;		/* A kupdate writeback */
@@ -59,6 +64,7 @@ struct writeback_control {
 	unsigned tagged_writepages:1;	/* tag-and-write to avoid livelock */
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
+	unsigned range_whole:1;		/* entire file */
 	unsigned for_sync:1;		/* sync(2) WB_SYNC_ALL writeback */
 	unsigned unpinned_fscache_wb:1;	/* Cleared I_PINNING_FSCACHE_WB */
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1d17fb1ec863..abd7c0eebc72 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2360,6 +2360,24 @@ void tag_pages_for_writeback(struct address_space *mapping,
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
+static int writeback_finish(struct address_space *mapping,
+		struct writeback_control *wbc, bool done)
+{
+	folio_batch_release(&wbc->fbatch);
+
+	/*
+	 * If we hit the last page and there is more work to be done:
+	 * wrap the index back to the start of the file for the next
+	 * time we are called.
+	 */
+	if (wbc->range_cyclic && !done)
+		wbc->done_index = 0;
+	if (wbc->range_cyclic || (wbc->range_whole && wbc->nr_to_write > 0))
+		mapping->writeback_index = wbc->done_index;
+
+	return wbc->err;
+}
+
 /**
  * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
  * @mapping: address space structure to write
@@ -2395,18 +2413,12 @@ int write_cache_pages(struct address_space *mapping,
 		      struct writeback_control *wbc, writepage_t writepage,
 		      void *data)
 {
-	int ret = 0;
-	int done = 0;
 	int error;
-	struct folio_batch fbatch;
 	int nr_folios;
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
-	pgoff_t done_index;
-	int range_whole = 0;
 	xa_mark_t tag;
 
-	folio_batch_init(&fbatch);
 	if (wbc->range_cyclic) {
 		index = mapping->writeback_index; /* prev offset */
 		end = -1;
@@ -2414,7 +2426,7 @@ int write_cache_pages(struct address_space *mapping,
 		index = wbc->range_start >> PAGE_SHIFT;
 		end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-			range_whole = 1;
+			wbc->range_whole = 1;
 	}
 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
 		tag_pages_for_writeback(mapping, index, end);
@@ -2422,20 +2434,24 @@ int write_cache_pages(struct address_space *mapping,
 	} else {
 		tag = PAGECACHE_TAG_DIRTY;
 	}
-	done_index = index;
-	while (!done && (index <= end)) {
+
+	wbc->done_index = index;
+	folio_batch_init(&wbc->fbatch);
+	wbc->err = 0;
+
+	while (index <= end) {
 		int i;
 
 		nr_folios = filemap_get_folios_tag(mapping, &index, end,
-				tag, &fbatch);
+				tag, &wbc->fbatch);
 
 		if (nr_folios == 0)
 			break;
 
 		for (i = 0; i < nr_folios; i++) {
-			struct folio *folio = fbatch.folios[i];
+			struct folio *folio = wbc->fbatch.folios[i];
 
-			done_index = folio->index;
+			wbc->done_index = folio->index;
 
 			folio_lock(folio);
 
@@ -2488,14 +2504,14 @@ int write_cache_pages(struct address_space *mapping,
 					folio_unlock(folio);
 					error = 0;
 				} else if (wbc->sync_mode != WB_SYNC_ALL) {
-					ret = error;
-					done_index = folio->index +
-						folio_nr_pages(folio);
-					done = 1;
-					break;
+					wbc->err = error;
+					wbc->done_index = folio->index +
+							folio_nr_pages(folio);
+					return writeback_finish(mapping,
+							wbc, true);
 				}
-				if (!ret)
-					ret = error;
+				if (!wbc->err)
+					wbc->err = error;
 			}
 
 			/*
@@ -2505,26 +2521,14 @@ int write_cache_pages(struct address_space *mapping,
 			 * we tagged for writeback prior to entering this loop.
 			 */
 			if (--wbc->nr_to_write <= 0 &&
-			    wbc->sync_mode == WB_SYNC_NONE) {
-				done = 1;
-				break;
-			}
+			    wbc->sync_mode == WB_SYNC_NONE)
+				return writeback_finish(mapping, wbc, true);
 		}
-		folio_batch_release(&fbatch);
+		folio_batch_release(&wbc->fbatch);
 		cond_resched();
 	}
 
-	/*
-	 * If we hit the last page and there is more work to be done: wrap
-	 * back the index back to the start of the file for the next
-	 * time we are called.
-	 */
-	if (wbc->range_cyclic && !done)
-		done_index = 0;
-	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
-		mapping->writeback_index = done_index;
-
-	return ret;
+	return writeback_finish(mapping, wbc, false);
 }
 EXPORT_SYMBOL(write_cache_pages);
 
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 02/12] writeback: Factor writeback_get_batch() out of write_cache_pages()
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
  2023-06-26 17:35 ` [PATCH 01/12] writeback: Factor out writeback_finish() Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-26 17:35 ` [PATCH 03/12] writeback: Factor should_writeback_folio() " Matthew Wilcox (Oracle)
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

This simple helper will be the basis of the writeback iterator.
To make this work, we need to remember the current index
and end positions in writeback_control.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/writeback.h |  2 ++
 mm/page-writeback.c       | 49 +++++++++++++++++++++------------------
 2 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 5b7d11f54013..7dd050b40e4b 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -54,6 +54,8 @@ struct writeback_control {
 	loff_t range_end;
 
 	struct folio_batch fbatch;
+	pgoff_t index;
+	pgoff_t end;		/* Inclusive */
 	pgoff_t done_index;
 	int err;
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index abd7c0eebc72..67c7f1564727 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2378,6 +2378,22 @@ static int writeback_finish(struct address_space *mapping,
 	return wbc->err;
 }
 
+static void writeback_get_batch(struct address_space *mapping,
+		struct writeback_control *wbc)
+{
+	xa_mark_t tag;
+
+	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+		tag = PAGECACHE_TAG_TOWRITE;
+	else
+		tag = PAGECACHE_TAG_DIRTY;
+
+	folio_batch_release(&wbc->fbatch);
+	cond_resched();
+	filemap_get_folios_tag(mapping, &wbc->index, wbc->end, tag,
+			&wbc->fbatch);
+}
+
 /**
  * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
  * @mapping: address space structure to write
@@ -2414,41 +2430,32 @@ int write_cache_pages(struct address_space *mapping,
 		      void *data)
 {
 	int error;
-	int nr_folios;
-	pgoff_t index;
-	pgoff_t end;		/* Inclusive */
-	xa_mark_t tag;
 
 	if (wbc->range_cyclic) {
-		index = mapping->writeback_index; /* prev offset */
-		end = -1;
+		wbc->index = mapping->writeback_index; /* prev offset */
+		wbc->end = -1;
 	} else {
-		index = wbc->range_start >> PAGE_SHIFT;
-		end = wbc->range_end >> PAGE_SHIFT;
+		wbc->index = wbc->range_start >> PAGE_SHIFT;
+		wbc->end = wbc->range_end >> PAGE_SHIFT;
 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 			wbc->range_whole = 1;
 	}
-	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
-		tag_pages_for_writeback(mapping, index, end);
-		tag = PAGECACHE_TAG_TOWRITE;
-	} else {
-		tag = PAGECACHE_TAG_DIRTY;
-	}
+	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+		tag_pages_for_writeback(mapping, wbc->index, wbc->end);
 
-	wbc->done_index = index;
+	wbc->done_index = wbc->index;
 	folio_batch_init(&wbc->fbatch);
 	wbc->err = 0;
 
-	while (index <= end) {
+	while (wbc->index <= wbc->end) {
 		int i;
 
-		nr_folios = filemap_get_folios_tag(mapping, &index, end,
-				tag, &wbc->fbatch);
+		writeback_get_batch(mapping, wbc);
 
-		if (nr_folios == 0)
+		if (wbc->fbatch.nr == 0)
 			break;
 
-		for (i = 0; i < nr_folios; i++) {
+		for (i = 0; i < wbc->fbatch.nr; i++) {
 			struct folio *folio = wbc->fbatch.folios[i];
 
 			wbc->done_index = folio->index;
@@ -2524,8 +2531,6 @@ int write_cache_pages(struct address_space *mapping,
 			    wbc->sync_mode == WB_SYNC_NONE)
 				return writeback_finish(mapping, wbc, true);
 		}
-		folio_batch_release(&wbc->fbatch);
-		cond_resched();
 	}
 
 	return writeback_finish(mapping, wbc, false);
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 03/12] writeback: Factor should_writeback_folio() out of write_cache_pages()
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
  2023-06-26 17:35 ` [PATCH 01/12] writeback: Factor out writeback_finish() Matthew Wilcox (Oracle)
  2023-06-26 17:35 ` [PATCH 02/12] writeback: Factor writeback_get_batch() out of write_cache_pages() Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-27  4:12   ` Christoph Hellwig
  2023-06-26 17:35 ` [PATCH 04/12] writeback: Simplify the loops in write_cache_pages() Matthew Wilcox (Oracle)
                   ` (11 subsequent siblings)
  14 siblings, 1 reply; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Reduce write_cache_pages() by about 30 lines; much of it is commentary,
but it all bundles nicely into an obvious function.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/page-writeback.c | 60 +++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 27 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 67c7f1564727..54f2972dab45 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2394,6 +2394,37 @@ static void writeback_get_batch(struct address_space *mapping,
 			&wbc->fbatch);
 }
 
+static bool should_writeback_folio(struct address_space *mapping,
+		struct writeback_control *wbc, struct folio *folio)
+{
+	/*
+	 * Folio truncated or invalidated. We can freely skip it then,
+	 * even for data integrity operations: the folio has disappeared
+	 * concurrently, so there could be no real expectation of this
+	 * data integrity operation even if there is now a new, dirty
+	 * folio at the same pagecache index.
+	 */
+	if (unlikely(folio->mapping != mapping))
+		return false;
+
+	/* Did somebody write it for us? */
+	if (!folio_test_dirty(folio))
+		return false;
+
+	if (folio_test_writeback(folio)) {
+		if (wbc->sync_mode != WB_SYNC_NONE)
+			folio_wait_writeback(folio);
+		else
+			return false;
+	}
+
+	BUG_ON(folio_test_writeback(folio));
+	if (!folio_clear_dirty_for_io(folio))
+		return false;
+
+	return true;
+}
+
 /**
  * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
  * @mapping: address space structure to write
@@ -2461,38 +2492,13 @@ int write_cache_pages(struct address_space *mapping,
 			wbc->done_index = folio->index;
 
 			folio_lock(folio);
-
-			/*
-			 * Page truncated or invalidated. We can freely skip it
-			 * then, even for data integrity operations: the page
-			 * has disappeared concurrently, so there could be no
-			 * real expectation of this data integrity operation
-			 * even if there is now a new, dirty page at the same
-			 * pagecache address.
-			 */
-			if (unlikely(folio->mapping != mapping)) {
-continue_unlock:
+			if (!should_writeback_folio(mapping, wbc, folio)) {
 				folio_unlock(folio);
 				continue;
 			}
 
-			if (!folio_test_dirty(folio)) {
-				/* someone wrote it for us */
-				goto continue_unlock;
-			}
-
-			if (folio_test_writeback(folio)) {
-				if (wbc->sync_mode != WB_SYNC_NONE)
-					folio_wait_writeback(folio);
-				else
-					goto continue_unlock;
-			}
-
-			BUG_ON(folio_test_writeback(folio));
-			if (!folio_clear_dirty_for_io(folio))
-				goto continue_unlock;
-
 			trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
+
 			error = writepage(folio, wbc, data);
 			if (unlikely(error)) {
 				/*
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 04/12] writeback: Simplify the loops in write_cache_pages()
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (2 preceding siblings ...)
  2023-06-26 17:35 ` [PATCH 03/12] writeback: Factor should_writeback_folio() " Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-27  4:16   ` Christoph Hellwig
  2023-06-26 17:35 ` [PATCH 05/12] pagevec: Add ability to iterate a queue Matthew Wilcox (Oracle)
                   ` (10 subsequent siblings)
  14 siblings, 1 reply; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Collapse the two nested loops into one.  This is needed as a step
towards turning this into an iterator.
---
 mm/page-writeback.c | 94 ++++++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 54f2972dab45..68f28eeb15ed 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2461,6 +2461,7 @@ int write_cache_pages(struct address_space *mapping,
 		      void *data)
 {
 	int error;
+	int i = 0;
 
 	if (wbc->range_cyclic) {
 		wbc->index = mapping->writeback_index; /* prev offset */
@@ -2478,65 +2479,64 @@ int write_cache_pages(struct address_space *mapping,
 	folio_batch_init(&wbc->fbatch);
 	wbc->err = 0;
 
-	while (wbc->index <= wbc->end) {
-		int i;
-
-		writeback_get_batch(mapping, wbc);
+	for (;;) {
+		struct folio *folio;
 
+		if (i == wbc->fbatch.nr) {
+			writeback_get_batch(mapping, wbc);
+			i = 0;
+		}
 		if (wbc->fbatch.nr == 0)
 			break;
 
-		for (i = 0; i < wbc->fbatch.nr; i++) {
-			struct folio *folio = wbc->fbatch.folios[i];
+		folio = wbc->fbatch.folios[i++];
 
-			wbc->done_index = folio->index;
+		wbc->done_index = folio->index;
 
-			folio_lock(folio);
-			if (!should_writeback_folio(mapping, wbc, folio)) {
-				folio_unlock(folio);
-				continue;
-			}
+		folio_lock(folio);
+		if (!should_writeback_folio(mapping, wbc, folio)) {
+			folio_unlock(folio);
+			continue;
+		}
 
-			trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
-
-			error = writepage(folio, wbc, data);
-			if (unlikely(error)) {
-				/*
-				 * Handle errors according to the type of
-				 * writeback. There's no need to continue for
-				 * background writeback. Just push done_index
-				 * past this page so media errors won't choke
-				 * writeout for the entire file. For integrity
-				 * writeback, we must process the entire dirty
-				 * set regardless of errors because the fs may
-				 * still have state to clear for each page. In
-				 * that case we continue processing and return
-				 * the first error.
-				 */
-				if (error == AOP_WRITEPAGE_ACTIVATE) {
-					folio_unlock(folio);
-					error = 0;
-				} else if (wbc->sync_mode != WB_SYNC_ALL) {
-					wbc->err = error;
-					wbc->done_index = folio->index +
-							folio_nr_pages(folio);
-					return writeback_finish(mapping,
-							wbc, true);
-				}
-				if (!wbc->err)
-					wbc->err = error;
-			}
+		trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
 
+		error = writepage(folio, wbc, data);
+		if (unlikely(error)) {
 			/*
-			 * We stop writing back only if we are not doing
-			 * integrity sync. In case of integrity sync we have to
-			 * keep going until we have written all the pages
-			 * we tagged for writeback prior to entering this loop.
+			 * Handle errors according to the type of
+			 * writeback. There's no need to continue for
+			 * background writeback. Just push done_index
+			 * past this page so media errors won't choke
+			 * writeout for the entire file. For integrity
+			 * writeback, we must process the entire dirty
+			 * set regardless of errors because the fs may
+			 * still have state to clear for each page. In
+			 * that case we continue processing and return
+			 * the first error.
 			 */
-			if (--wbc->nr_to_write <= 0 &&
-			    wbc->sync_mode == WB_SYNC_NONE)
+			if (error == AOP_WRITEPAGE_ACTIVATE) {
+				folio_unlock(folio);
+				error = 0;
+			} else if (wbc->sync_mode != WB_SYNC_ALL) {
+				wbc->err = error;
+				wbc->done_index = folio->index +
+						folio_nr_pages(folio);
 				return writeback_finish(mapping, wbc, true);
+			}
+			if (!wbc->err)
+				wbc->err = error;
 		}
+
+		/*
+		 * We stop writing back only if we are not doing
+		 * integrity sync. In case of integrity sync we have to
+		 * keep going until we have written all the pages
+		 * we tagged for writeback prior to entering this loop.
+		 */
+		if (--wbc->nr_to_write <= 0 &&
+		    wbc->sync_mode == WB_SYNC_NONE)
+			return writeback_finish(mapping, wbc, true);
 	}
 
 	return writeback_finish(mapping, wbc, false);
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 05/12] pagevec: Add ability to iterate a queue
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (3 preceding siblings ...)
  2023-06-26 17:35 ` [PATCH 04/12] writeback: Simplify the loops in write_cache_pages() Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-26 17:35 ` [PATCH 06/12] writeback: Use the folio_batch queue iterator Matthew Wilcox (Oracle)
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Add a loop counter inside the folio_batch to let us iterate from 0-nr
instead of decrementing nr and treating the batch as a stack.  It would
generate some very weird and suboptimal I/O patterns for page writeback
to iterate over the batch as a stack.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/pagevec.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 87cc678adc85..fcc06c300a72 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -27,6 +27,7 @@ struct folio;
  */
 struct folio_batch {
 	unsigned char nr;
+	unsigned char i;
 	bool percpu_pvec_drained;
 	struct folio *folios[PAGEVEC_SIZE];
 };
@@ -40,12 +41,14 @@ struct folio_batch {
 static inline void folio_batch_init(struct folio_batch *fbatch)
 {
 	fbatch->nr = 0;
+	fbatch->i = 0;
 	fbatch->percpu_pvec_drained = false;
 }
 
 static inline void folio_batch_reinit(struct folio_batch *fbatch)
 {
 	fbatch->nr = 0;
+	fbatch->i = 0;
 }
 
 static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
@@ -75,6 +78,21 @@ static inline unsigned folio_batch_add(struct folio_batch *fbatch,
 	return folio_batch_space(fbatch);
 }
 
+/**
+ * folio_batch_next - Return the next folio to process.
+ * @fbatch: The folio batch being processed.
+ *
+ * Use this function to implement a queue of folios.
+ *
+ * Return: The next folio in the queue, or NULL if the queue is empty.
+ */
+static inline struct folio *folio_batch_next(struct folio_batch *fbatch)
+{
+	if (fbatch->i == fbatch->nr)
+		return NULL;
+	return fbatch->folios[fbatch->i++];
+}
+
 void __folio_batch_release(struct folio_batch *pvec);
 
 static inline void folio_batch_release(struct folio_batch *fbatch)
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 06/12] writeback: Use the folio_batch queue iterator
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (4 preceding siblings ...)
  2023-06-26 17:35 ` [PATCH 05/12] pagevec: Add ability to iterate a queue Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-27  4:25   ` Christoph Hellwig
  2023-06-26 17:35 ` [PATCH 07/12] writeback: Factor writeback_iter_init() out of write_cache_pages() Matthew Wilcox (Oracle)
                   ` (8 subsequent siblings)
  14 siblings, 1 reply; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Instead of keeping our own local iterator variable, use the one just
added to folio_batch.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/page-writeback.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 68f28eeb15ed..f782b48c5b0c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2378,11 +2378,15 @@ static int writeback_finish(struct address_space *mapping,
 	return wbc->err;
 }
 
-static void writeback_get_batch(struct address_space *mapping,
+static struct folio *writeback_get_next(struct address_space *mapping,
 		struct writeback_control *wbc)
 {
+	struct folio *folio = folio_batch_next(&wbc->fbatch);
 	xa_mark_t tag;
 
+	if (folio)
+		return folio;
+
 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
 		tag = PAGECACHE_TAG_TOWRITE;
 	else
@@ -2392,6 +2396,7 @@ static void writeback_get_batch(struct address_space *mapping,
 	cond_resched();
 	filemap_get_folios_tag(mapping, &wbc->index, wbc->end, tag,
 			&wbc->fbatch);
+	return folio_batch_next(&wbc->fbatch);
 }
 
 static bool should_writeback_folio(struct address_space *mapping,
@@ -2461,7 +2466,6 @@ int write_cache_pages(struct address_space *mapping,
 		      void *data)
 {
 	int error;
-	int i = 0;
 
 	if (wbc->range_cyclic) {
 		wbc->index = mapping->writeback_index; /* prev offset */
@@ -2480,17 +2484,11 @@ int write_cache_pages(struct address_space *mapping,
 	wbc->err = 0;
 
 	for (;;) {
-		struct folio *folio;
+		struct folio *folio = writeback_get_next(mapping, wbc);
 
-		if (i == wbc->fbatch.nr) {
-			writeback_get_batch(mapping, wbc);
-			i = 0;
-		}
-		if (wbc->fbatch.nr == 0)
+		if (!folio)
 			break;
 
-		folio = wbc->fbatch.folios[i++];
-
 		wbc->done_index = folio->index;
 
 		folio_lock(folio);
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 07/12] writeback: Factor writeback_iter_init() out of write_cache_pages()
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (5 preceding siblings ...)
  2023-06-26 17:35 ` [PATCH 06/12] writeback: Use the folio_batch queue iterator Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-27  4:30   ` Christoph Hellwig
  2023-06-26 17:35 ` [PATCH 08/12] writeback: Factor writeback_get_folio() " Matthew Wilcox (Oracle)
                   ` (7 subsequent siblings)
  14 siblings, 1 reply; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Make it return the first folio in the batch so that we can use it
in a typical for() pattern.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/page-writeback.c | 48 ++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f782b48c5b0c..18f332611a52 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2430,6 +2430,28 @@ static bool should_writeback_folio(struct address_space *mapping,
 	return true;
 }
 
+static struct folio *writeback_iter_init(struct address_space *mapping,
+		struct writeback_control *wbc)
+{
+	if (wbc->range_cyclic) {
+		wbc->index = mapping->writeback_index; /* prev offset */
+		wbc->end = -1;
+	} else {
+		wbc->index = wbc->range_start >> PAGE_SHIFT;
+		wbc->end = wbc->range_end >> PAGE_SHIFT;
+		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+			wbc->range_whole = 1;
+	}
+	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
+		tag_pages_for_writeback(mapping, wbc->index, wbc->end);
+
+	wbc->done_index = wbc->index;
+	folio_batch_init(&wbc->fbatch);
+	wbc->err = 0;
+
+	return writeback_get_next(mapping, wbc);
+}
+
 /**
  * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
  * @mapping: address space structure to write
@@ -2465,30 +2487,12 @@ int write_cache_pages(struct address_space *mapping,
 		      struct writeback_control *wbc, writepage_t writepage,
 		      void *data)
 {
+	struct folio *folio;
 	int error;
 
-	if (wbc->range_cyclic) {
-		wbc->index = mapping->writeback_index; /* prev offset */
-		wbc->end = -1;
-	} else {
-		wbc->index = wbc->range_start >> PAGE_SHIFT;
-		wbc->end = wbc->range_end >> PAGE_SHIFT;
-		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-			wbc->range_whole = 1;
-	}
-	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
-		tag_pages_for_writeback(mapping, wbc->index, wbc->end);
-
-	wbc->done_index = wbc->index;
-	folio_batch_init(&wbc->fbatch);
-	wbc->err = 0;
-
-	for (;;) {
-		struct folio *folio = writeback_get_next(mapping, wbc);
-
-		if (!folio)
-			break;
-
+	for (folio = writeback_iter_init(mapping, wbc);
+	     folio;
+	     folio = writeback_get_next(mapping, wbc)) {
 		wbc->done_index = folio->index;
 
 		folio_lock(folio);
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 08/12] writeback: Factor writeback_get_folio() out of write_cache_pages()
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (6 preceding siblings ...)
  2023-06-26 17:35 ` [PATCH 07/12] writeback: Factor writeback_iter_init() out of write_cache_pages() Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-27  4:34   ` Christoph Hellwig
  2023-06-26 17:35 ` [PATCH 09/12] writeback: Factor writeback_iter_next() " Matthew Wilcox (Oracle)
                   ` (6 subsequent siblings)
  14 siblings, 1 reply; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Move the loop for should-we-write-this-folio to its own function.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/page-writeback.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 18f332611a52..659df2b5c7c0 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2430,6 +2430,27 @@ static bool should_writeback_folio(struct address_space *mapping,
 	return true;
 }
 
+static struct folio *writeback_get_folio(struct address_space *mapping,
+		struct writeback_control *wbc)
+{
+	struct folio *folio;
+
+	for (;;) {
+		folio = writeback_get_next(mapping, wbc);
+		if (!folio)
+			return NULL;
+		wbc->done_index = folio->index;
+
+		folio_lock(folio);
+		if (likely(should_writeback_folio(mapping, wbc, folio)))
+			break;
+		folio_unlock(folio);
+	}
+
+	trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
+	return folio;
+}
+
 static struct folio *writeback_iter_init(struct address_space *mapping,
 		struct writeback_control *wbc)
 {
@@ -2449,7 +2470,7 @@ static struct folio *writeback_iter_init(struct address_space *mapping,
 	folio_batch_init(&wbc->fbatch);
 	wbc->err = 0;
 
-	return writeback_get_next(mapping, wbc);
+	return writeback_get_folio(mapping, wbc);
 }
 
 /**
@@ -2492,17 +2513,7 @@ int write_cache_pages(struct address_space *mapping,
 
 	for (folio = writeback_iter_init(mapping, wbc);
 	     folio;
-	     folio = writeback_get_next(mapping, wbc)) {
-		wbc->done_index = folio->index;
-
-		folio_lock(folio);
-		if (!should_writeback_folio(mapping, wbc, folio)) {
-			folio_unlock(folio);
-			continue;
-		}
-
-		trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
-
+	     folio = writeback_get_folio(mapping, wbc)) {
 		error = writepage(folio, wbc, data);
 		if (unlikely(error)) {
 			/*
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 09/12] writeback: Factor writeback_iter_next() out of write_cache_pages()
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (7 preceding siblings ...)
  2023-06-26 17:35 ` [PATCH 08/12] writeback: Factor writeback_get_folio() " Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-27  4:39   ` Christoph Hellwig
  2023-06-26 17:35 ` [PATCH 10/12] writeback: Add for_each_writeback_folio() Matthew Wilcox (Oracle)
                   ` (5 subsequent siblings)
  14 siblings, 1 reply; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Pull the post-processing of the writepage_t callback into a
separate function.  That means changing writeback_finish() to
return NULL, and writeback_get_next() to call writeback_finish()
when we naturally run out of folios.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/page-writeback.c | 84 ++++++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 659df2b5c7c0..ef61d7006c5e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2360,7 +2360,7 @@ void tag_pages_for_writeback(struct address_space *mapping,
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
-static int writeback_finish(struct address_space *mapping,
+static struct folio *writeback_finish(struct address_space *mapping,
 		struct writeback_control *wbc, bool done)
 {
 	folio_batch_release(&wbc->fbatch);
@@ -2375,7 +2375,7 @@ static int writeback_finish(struct address_space *mapping,
 	if (wbc->range_cyclic || (wbc->range_whole && wbc->nr_to_write > 0))
 		mapping->writeback_index = wbc->done_index;
 
-	return wbc->err;
+	return NULL;
 }
 
 static struct folio *writeback_get_next(struct address_space *mapping,
@@ -2438,7 +2438,7 @@ static struct folio *writeback_get_folio(struct address_space *mapping,
 	for (;;) {
 		folio = writeback_get_next(mapping, wbc);
 		if (!folio)
-			return NULL;
+			return writeback_finish(mapping, wbc, false);
 		wbc->done_index = folio->index;
 
 		folio_lock(folio);
@@ -2473,6 +2473,45 @@ static struct folio *writeback_iter_init(struct address_space *mapping,
 	return writeback_get_folio(mapping, wbc);
 }
 
+static struct folio *writeback_iter_next(struct address_space *mapping,
+		struct writeback_control *wbc, struct folio *folio, int error)
+{
+	if (unlikely(error)) {
+		/*
+		 * Handle errors according to the type of writeback.
+		 * There's no need to continue for background writeback.
+		 * Just push done_index past this folio so media
+		 * errors won't choke writeout for the entire file.
+		 * For integrity writeback, we must process the entire
+		 * dirty set regardless of errors because the fs may
+		 * still have state to clear for each folio.  In that
+		 * case we continue processing and return the first error.
+		 */
+		if (error == AOP_WRITEPAGE_ACTIVATE) {
+			folio_unlock(folio);
+			error = 0;
+		} else if (wbc->sync_mode != WB_SYNC_ALL) {
+			wbc->err = error;
+			wbc->done_index = folio->index +
+					folio_nr_pages(folio);
+			return writeback_finish(mapping, wbc, true);
+		}
+		if (!wbc->err)
+			wbc->err = error;
+	}
+
+	/*
+	 * We stop writing back only if we are not doing integrity
+	 * sync. In case of integrity sync we have to keep going until
+	 * we have written all the folios we tagged for writeback prior
+	 * to entering this loop.
+	 */
+	if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
+		return writeback_finish(mapping, wbc, true);
+
+	return writeback_get_folio(mapping, wbc);
+}
+
 /**
  * write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
  * @mapping: address space structure to write
@@ -2513,46 +2552,11 @@ int write_cache_pages(struct address_space *mapping,
 
 	for (folio = writeback_iter_init(mapping, wbc);
 	     folio;
-	     folio = writeback_get_folio(mapping, wbc)) {
+	     folio = writeback_iter_next(mapping, wbc, folio, error)) {
 		error = writepage(folio, wbc, data);
-		if (unlikely(error)) {
-			/*
-			 * Handle errors according to the type of
-			 * writeback. There's no need to continue for
-			 * background writeback. Just push done_index
-			 * past this page so media errors won't choke
-			 * writeout for the entire file. For integrity
-			 * writeback, we must process the entire dirty
-			 * set regardless of errors because the fs may
-			 * still have state to clear for each page. In
-			 * that case we continue processing and return
-			 * the first error.
-			 */
-			if (error == AOP_WRITEPAGE_ACTIVATE) {
-				folio_unlock(folio);
-				error = 0;
-			} else if (wbc->sync_mode != WB_SYNC_ALL) {
-				wbc->err = error;
-				wbc->done_index = folio->index +
-						folio_nr_pages(folio);
-				return writeback_finish(mapping, wbc, true);
-			}
-			if (!wbc->err)
-				wbc->err = error;
-		}
-
-		/*
-		 * We stop writing back only if we are not doing
-		 * integrity sync. In case of integrity sync we have to
-		 * keep going until we have written all the pages
-		 * we tagged for writeback prior to entering this loop.
-		 */
-		if (--wbc->nr_to_write <= 0 &&
-		    wbc->sync_mode == WB_SYNC_NONE)
-			return writeback_finish(mapping, wbc, true);
 	}
 
-	return writeback_finish(mapping, wbc, false);
+	return wbc->err;
 }
 EXPORT_SYMBOL(write_cache_pages);
 
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 10/12] writeback: Add for_each_writeback_folio()
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (8 preceding siblings ...)
  2023-06-26 17:35 ` [PATCH 09/12] writeback: Factor writeback_iter_next() " Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-26 17:35 ` [PATCH 11/12] iomap: Convert iomap_writepages() to use for_each_writeback_folio() Matthew Wilcox (Oracle)
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Wrap up the iterator with a nice bit of syntactic sugar.  Now the
caller doesn't need to know about wbc->err and can just return error,
not knowing that the iterator took care of storing errors correctly.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/writeback.h | 14 +++++++++++---
 mm/page-writeback.c       | 11 ++++-------
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 7dd050b40e4b..84d5306ef045 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -369,14 +369,22 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
 
 bool wb_over_bg_thresh(struct bdi_writeback *wb);
 
+struct folio *writeback_iter_init(struct address_space *mapping,
+		struct writeback_control *wbc);
+struct folio *writeback_iter_next(struct address_space *mapping,
+		struct writeback_control *wbc, struct folio *folio, int error);
+
+#define for_each_writeback_folio(mapping, wbc, folio, error)		\
+	for (folio = writeback_iter_init(mapping, wbc);			\
+	     folio || ((error = wbc->err), false);			\
+	     folio = writeback_iter_next(mapping, wbc, folio, error))
+
 typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc,
 				void *data);
-
-void tag_pages_for_writeback(struct address_space *mapping,
-			     pgoff_t start, pgoff_t end);
 int write_cache_pages(struct address_space *mapping,
 		      struct writeback_control *wbc, writepage_t writepage,
 		      void *data);
+
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
 void writeback_set_ratelimit(void);
 void tag_pages_for_writeback(struct address_space *mapping,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ef61d7006c5e..245d6318dfb2 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2451,7 +2451,7 @@ static struct folio *writeback_get_folio(struct address_space *mapping,
 	return folio;
 }
 
-static struct folio *writeback_iter_init(struct address_space *mapping,
+struct folio *writeback_iter_init(struct address_space *mapping,
 		struct writeback_control *wbc)
 {
 	if (wbc->range_cyclic) {
@@ -2473,7 +2473,7 @@ static struct folio *writeback_iter_init(struct address_space *mapping,
 	return writeback_get_folio(mapping, wbc);
 }
 
-static struct folio *writeback_iter_next(struct address_space *mapping,
+struct folio *writeback_iter_next(struct address_space *mapping,
 		struct writeback_control *wbc, struct folio *folio, int error)
 {
 	if (unlikely(error)) {
@@ -2550,13 +2550,10 @@ int write_cache_pages(struct address_space *mapping,
 	struct folio *folio;
 	int error;
 
-	for (folio = writeback_iter_init(mapping, wbc);
-	     folio;
-	     folio = writeback_iter_next(mapping, wbc, folio, error)) {
+	for_each_writeback_folio(mapping, wbc, folio, error)
 		error = writepage(folio, wbc, data);
-	}
 
-	return wbc->err;
+	return error;
 }
 EXPORT_SYMBOL(write_cache_pages);
 
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 11/12] iomap: Convert iomap_writepages() to use for_each_writeback_folio()
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (9 preceding siblings ...)
  2023-06-26 17:35 ` [PATCH 10/12] writeback: Add for_each_writeback_folio() Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-26 17:35 ` [PATCH 12/12] writeback: Remove a use of write_cache_pages() from do_writepages() Matthew Wilcox (Oracle)
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

This removes one indirect function call per folio, and adds typesafety
by not casting through a void pointer.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/buffered-io.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index a4fa81af60d9..a4dd17abe244 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1711,9 +1711,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
  * regular allocated space.
  */
 static int iomap_do_writepage(struct folio *folio,
-		struct writeback_control *wbc, void *data)
+		struct writeback_control *wbc, struct iomap_writepage_ctx *wpc)
 {
-	struct iomap_writepage_ctx *wpc = data;
 	struct inode *inode = folio->mapping->host;
 	u64 end_pos, isize;
 
@@ -1810,13 +1809,16 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
 		struct iomap_writepage_ctx *wpc,
 		const struct iomap_writeback_ops *ops)
 {
-	int			ret;
+	struct folio *folio;
+	int err;
 
 	wpc->ops = ops;
-	ret = write_cache_pages(mapping, wbc, iomap_do_writepage, wpc);
+	for_each_writeback_folio(mapping, wbc, folio, err)
+		err = iomap_do_writepage(folio, wbc, wpc);
+
 	if (!wpc->ioend)
-		return ret;
-	return iomap_submit_ioend(wpc, wpc->ioend, ret);
+		return err;
+	return iomap_submit_ioend(wpc, wpc->ioend, err);
 }
 EXPORT_SYMBOL_GPL(iomap_writepages);
 
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [PATCH 12/12] writeback: Remove a use of write_cache_pages() from do_writepages()
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (10 preceding siblings ...)
  2023-06-26 17:35 ` [PATCH 11/12] iomap: Convert iomap_writepages() to use for_each_writeback_folio() Matthew Wilcox (Oracle)
@ 2023-06-26 17:35 ` Matthew Wilcox (Oracle)
  2023-06-27  4:03 ` [PATCH 00/12] Convert write_cache_pages() to an iterator Christoph Hellwig
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 35+ messages in thread
From: Matthew Wilcox (Oracle) @ 2023-06-26 17:35 UTC (permalink / raw)
  To: linux-mm
  Cc: Matthew Wilcox (Oracle),
	linux-fsdevel, linux-kernel, Jan Kara, David Howells

Use the new for_each_writeback_folio() directly instead of indirecting
through a callback.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/page-writeback.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 245d6318dfb2..55832679af21 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2557,13 +2557,21 @@ int write_cache_pages(struct address_space *mapping,
 }
 EXPORT_SYMBOL(write_cache_pages);
 
-static int writepage_cb(struct folio *folio, struct writeback_control *wbc,
-		void *data)
+static int writeback_use_writepage(struct address_space *mapping,
+		struct writeback_control *wbc)
 {
-	struct address_space *mapping = data;
-	int ret = mapping->a_ops->writepage(&folio->page, wbc);
-	mapping_set_error(mapping, ret);
-	return ret;
+	struct blk_plug plug;
+	struct folio *folio;
+	int err;
+
+	blk_start_plug(&plug);
+	for_each_writeback_folio(mapping, wbc, folio, err) {
+		err = mapping->a_ops->writepage(&folio->page, wbc);
+		mapping_set_error(mapping, err);
+	}
+	blk_finish_plug(&plug);
+
+	return err;
 }
 
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
@@ -2579,12 +2587,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 		if (mapping->a_ops->writepages) {
 			ret = mapping->a_ops->writepages(mapping, wbc);
 		} else if (mapping->a_ops->writepage) {
-			struct blk_plug plug;
-
-			blk_start_plug(&plug);
-			ret = write_cache_pages(mapping, wbc, writepage_cb,
-						mapping);
-			blk_finish_plug(&plug);
+			ret = writeback_use_writepage(mapping, wbc);
 		} else {
 			/* deal with chardevs and other special files */
 			ret = 0;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/12] Convert write_cache_pages() to an iterator
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (11 preceding siblings ...)
  2023-06-26 17:35 ` [PATCH 12/12] writeback: Remove a use of write_cache_pages() from do_writepages() Matthew Wilcox (Oracle)
@ 2023-06-27  4:03 ` Christoph Hellwig
  2023-06-27 10:53 ` David Howells
  2023-11-21  5:18 ` Christoph Hellwig
  14 siblings, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2023-06-27  4:03 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 06:35:09PM +0100, Matthew Wilcox (Oracle) wrote:
> Dave Howells doesn't like the indirect function call imposed by
> write_cache_pages(), so refactor it into an iterator.  I took the
> opportunity to add the ability to iterate a folio_batch without having
> an external variable.
> 
> This is against next-20230623.  If you try to apply it on top of a tree
> which doesn't include the pagevec removal series, IT WILL CRASH because
> it won't reinitialise folio_batch->i and the iteration will index out
> of bounds.
> 
> I have a feeling the 'done' parameter could have a better name, but I
> can't think what it might be.

Heh, I actually have a local series with a minor subset of some of the
cleanups, but without the iterator conversion.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 01/12] writeback: Factor out writeback_finish()
  2023-06-26 17:35 ` [PATCH 01/12] writeback: Factor out writeback_finish() Matthew Wilcox (Oracle)
@ 2023-06-27  4:05   ` Christoph Hellwig
  0 siblings, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2023-06-27  4:05 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 06:35:10PM +0100, Matthew Wilcox (Oracle) wrote:
> +	struct folio_batch fbatch;
> +	pgoff_t done_index;
> +	int err;
> +

I think this really needs a comment that it should only be used for
the writeback iterator.  In fact this whole structure could use a lot
more comments on what should / can be set by the caller, and what is
internal.

> +	unsigned range_whole:1;		/* entire file */

Same here.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 03/12] writeback: Factor should_writeback_folio() out of write_cache_pages()
  2023-06-26 17:35 ` [PATCH 03/12] writeback: Factor should_writeback_folio() " Matthew Wilcox (Oracle)
@ 2023-06-27  4:12   ` Christoph Hellwig
  2023-06-27 11:16     ` Matthew Wilcox
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2023-06-27  4:12 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

> +	if (folio_test_writeback(folio)) {
> +		if (wbc->sync_mode != WB_SYNC_NONE)
> +			folio_wait_writeback(folio);
> +		else
> +			return false;
> +	}

Please reorder this to avoid the else and return earlier while you're
at it:

	if (folio_test_writeback(folio)) {
		if (wbc->sync_mode == WB_SYNC_NONE)
			return false;
		folio_wait_writeback(folio);
	}

(that's what actually got me started on my little cleanup spree while
checking some details of the writeback waiting..)

> +	BUG_ON(folio_test_writeback(folio));
> +	if (!folio_clear_dirty_for_io(folio))
> +		return false;
> +
> +	return true;

..

	return folio_clear_dirty_for_io(folio);

?


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 04/12] writeback: Simplify the loops in write_cache_pages()
  2023-06-26 17:35 ` [PATCH 04/12] writeback: Simplify the loops in write_cache_pages() Matthew Wilcox (Oracle)
@ 2023-06-27  4:16   ` Christoph Hellwig
  0 siblings, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2023-06-27  4:16 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 06:35:13PM +0100, Matthew Wilcox (Oracle) wrote:
> Collapse the two nested loops into one.  This is needed as a step
> towards turning this into an iterator.
> ---
>  mm/page-writeback.c | 94 ++++++++++++++++++++++-----------------------
>  1 file changed, 47 insertions(+), 47 deletions(-)
> 
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 54f2972dab45..68f28eeb15ed 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -2461,6 +2461,7 @@ int write_cache_pages(struct address_space *mapping,
>  		      void *data)
>  {
>  	int error;
> +	int i = 0;
>  
>  	if (wbc->range_cyclic) {
>  		wbc->index = mapping->writeback_index; /* prev offset */
> @@ -2478,65 +2479,64 @@ int write_cache_pages(struct address_space *mapping,
>  	folio_batch_init(&wbc->fbatch);
>  	wbc->err = 0;
>  
> +	for (;;) {
> +		struct folio *folio;
>  
> +		if (i == wbc->fbatch.nr) {
> +			writeback_get_batch(mapping, wbc);
> +			i = 0;
> +		}
>  		if (wbc->fbatch.nr == 0)
>  			break;
> +		folio = wbc->fbatch.folios[i++];

Did you consider moving what is currently the "i" local variable
into strut writeback_control as well?  Then writeback_get_batch
could return the current folio, and we could hae a much nicer loop
here by moving all of the above into writeback_get_batch:

	while ((folio = writeback_get_batch(mapping, wbc))) {

(and yes, writeback_get_batch probably needs a better name with that)

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 06/12] writeback: Use the folio_batch queue iterator
  2023-06-26 17:35 ` [PATCH 06/12] writeback: Use the folio_batch queue iterator Matthew Wilcox (Oracle)
@ 2023-06-27  4:25   ` Christoph Hellwig
  0 siblings, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2023-06-27  4:25 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 06:35:15PM +0100, Matthew Wilcox (Oracle) wrote:
> Instead of keeping our own local iterator variable, use the one just
> added to folio_batch.

Ok, that's a slightly different twist to what I just suggested.

>  	for (;;) {
> -		struct folio *folio;
> +		struct folio *folio = writeback_get_next(mapping, wbc);
>  
> +		if (!folio)
>  			break;

But as a purely cosmetic nit I still think this would read nicer as:

	while ((folio = writeback_get_next(mapping, wbc))) {


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 07/12] writeback: Factor writeback_iter_init() out of write_cache_pages()
  2023-06-26 17:35 ` [PATCH 07/12] writeback: Factor writeback_iter_init() out of write_cache_pages() Matthew Wilcox (Oracle)
@ 2023-06-27  4:30   ` Christoph Hellwig
  2023-06-27  4:31     ` Christoph Hellwig
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2023-06-27  4:30 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 06:35:16PM +0100, Matthew Wilcox (Oracle) wrote:
> +	for (folio = writeback_iter_init(mapping, wbc);
> +	     folio;
> +	     folio = writeback_get_next(mapping, wbc)) {

Ok that's another way to structure it.  Guess I should look over the
whole series first..


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 07/12] writeback: Factor writeback_iter_init() out of write_cache_pages()
  2023-06-27  4:30   ` Christoph Hellwig
@ 2023-06-27  4:31     ` Christoph Hellwig
  2023-06-27 11:08       ` Matthew Wilcox
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2023-06-27  4:31 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 09:30:07PM -0700, Christoph Hellwig wrote:
> On Mon, Jun 26, 2023 at 06:35:16PM +0100, Matthew Wilcox (Oracle) wrote:
> > +	for (folio = writeback_iter_init(mapping, wbc);
> > +	     folio;
> > +	     folio = writeback_get_next(mapping, wbc)) {
> 
> Ok that's another way to structure it.  Guess I should look over the
> whole series first..

That beeing said.  Given that writeback_iter_init calls 
writeback_get_next anyway,

	writeback_iter_init(mapping, wbc);
	while ((folio = writeback_get_next(mapping, wbc)))

still feels a little easier to follow to be.  No hard feelings either
way, just an observation.


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 08/12] writeback: Factor writeback_get_folio() out of write_cache_pages()
  2023-06-26 17:35 ` [PATCH 08/12] writeback: Factor writeback_get_folio() " Matthew Wilcox (Oracle)
@ 2023-06-27  4:34   ` Christoph Hellwig
  2023-06-27 15:25     ` Matthew Wilcox
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2023-06-27  4:34 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

> +	for (;;) {
> +		folio = writeback_get_next(mapping, wbc);
> +		if (!folio)
> +			return NULL;
> +		wbc->done_index = folio->index;
> +
> +		folio_lock(folio);
> +		if (likely(should_writeback_folio(mapping, wbc, folio)))
> +			break;
> +		folio_unlock(folio);
> +	}
> +
> +	trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
> +	return folio;

Same minor nitpick, why not:


	while ((folio = writeback_get_next(mapping, wbc)) {
		wbc->done_index = folio->index;

		folio_lock(folio);
		if (likely(should_writeback_folio(mapping, wbc, folio))) {
			trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
			break;
		}
		folio_unlock(folio);
	}

	return folio;

?

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 09/12] writeback: Factor writeback_iter_next() out of write_cache_pages()
  2023-06-26 17:35 ` [PATCH 09/12] writeback: Factor writeback_iter_next() " Matthew Wilcox (Oracle)
@ 2023-06-27  4:39   ` Christoph Hellwig
  2023-06-27 15:31     ` Matthew Wilcox
  0 siblings, 1 reply; 35+ messages in thread
From: Christoph Hellwig @ 2023-06-27  4:39 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 06:35:18PM +0100, Matthew Wilcox (Oracle) wrote:
> Pull the post-processing of the writepage_t callback into a
> separate function.  That means changing writeback_finish() to
> return NULL, and writeback_get_next() to call writeback_finish()
> when we naturally run out of folios.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/page-writeback.c | 84 ++++++++++++++++++++++++---------------------
>  1 file changed, 44 insertions(+), 40 deletions(-)
> 
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 659df2b5c7c0..ef61d7006c5e 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -2360,7 +2360,7 @@ void tag_pages_for_writeback(struct address_space *mapping,
>  }
>  EXPORT_SYMBOL(tag_pages_for_writeback);
>  
> -static int writeback_finish(struct address_space *mapping,
> +static struct folio *writeback_finish(struct address_space *mapping,
>  		struct writeback_control *wbc, bool done)
>  {
>  	folio_batch_release(&wbc->fbatch);
> @@ -2375,7 +2375,7 @@ static int writeback_finish(struct address_space *mapping,
>  	if (wbc->range_cyclic || (wbc->range_whole && wbc->nr_to_write > 0))
>  		mapping->writeback_index = wbc->done_index;
>  
> -	return wbc->err;
> +	return NULL;

Having a return value that is always NULL feels a bit weird vs just
doing that return in the caller.

> +static struct folio *writeback_iter_next(struct address_space *mapping,
> +		struct writeback_control *wbc, struct folio *folio, int error)
> +{
> +	if (unlikely(error)) {
> +		/*
> +		 * Handle errors according to the type of writeback.
> +		 * There's no need to continue for background writeback.
> +		 * Just push done_index past this folio so media
> +		 * errors won't choke writeout for the entire file.
> +		 * For integrity writeback, we must process the entire
> +		 * dirty set regardless of errors because the fs may
> +		 * still have state to clear for each folio.  In that
> +		 * case we continue processing and return the first error.
> +		 */
> +		if (error == AOP_WRITEPAGE_ACTIVATE) {
> +			folio_unlock(folio);
> +			error = 0;

Note there really shouldn't be any need for this in outside of the
legacy >writepage case.  But it might make sense to delay the removal
until after ->writepage is gone to avoid bugs in conversions.

> +		} else if (wbc->sync_mode != WB_SYNC_ALL) {
> +			wbc->err = error;
> +			wbc->done_index = folio->index +
> +					folio_nr_pages(folio);

Btw, I wonder if a folio_next_index helper for this might be useful
as it's a pattern we have in a few places.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/12] Convert write_cache_pages() to an iterator
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (12 preceding siblings ...)
  2023-06-27  4:03 ` [PATCH 00/12] Convert write_cache_pages() to an iterator Christoph Hellwig
@ 2023-06-27 10:53 ` David Howells
  2023-06-28 19:31   ` Matthew Wilcox
  2023-06-28 20:03   ` David Howells
  2023-11-21  5:18 ` Christoph Hellwig
  14 siblings, 2 replies; 35+ messages in thread
From: David Howells @ 2023-06-27 10:53 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: dhowells, linux-mm, linux-fsdevel, linux-kernel, Jan Kara

Do you have this on a branch somewhere?

David


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 07/12] writeback: Factor writeback_iter_init() out of write_cache_pages()
  2023-06-27  4:31     ` Christoph Hellwig
@ 2023-06-27 11:08       ` Matthew Wilcox
  0 siblings, 0 replies; 35+ messages in thread
From: Matthew Wilcox @ 2023-06-27 11:08 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 09:31:56PM -0700, Christoph Hellwig wrote:
> On Mon, Jun 26, 2023 at 09:30:07PM -0700, Christoph Hellwig wrote:
> > On Mon, Jun 26, 2023 at 06:35:16PM +0100, Matthew Wilcox (Oracle) wrote:
> > > +	for (folio = writeback_iter_init(mapping, wbc);
> > > +	     folio;
> > > +	     folio = writeback_get_next(mapping, wbc)) {
> > 
> > Ok that's another way to structure it.  Guess I should look over the
> > whole series first..

Perhaps ... it's a little hard to decide which of your comments
are worth replying to, and which are obviated by later realisations.

> That beeing said.  Given that writeback_iter_init calls 
> writeback_get_next anyway,
> 
> 	writeback_iter_init(mapping, wbc);
> 	while ((folio = writeback_get_next(mapping, wbc)))
> 
> still feels a little easier to follow to be.  No hard feelings either
> way, just an observation.

I had it structured that way originally, but we need to pass in 'error'
to the get_next, and it's better if we also pass in 'folio', which means
that the user then needs to initialise error to 0 and folio to NULL
before using the macro, and that all felt a bit "You're holding it wrong".

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 03/12] writeback: Factor should_writeback_folio() out of write_cache_pages()
  2023-06-27  4:12   ` Christoph Hellwig
@ 2023-06-27 11:16     ` Matthew Wilcox
  2023-06-27 14:48       ` Matthew Wilcox
  0 siblings, 1 reply; 35+ messages in thread
From: Matthew Wilcox @ 2023-06-27 11:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 09:12:07PM -0700, Christoph Hellwig wrote:
> > +	if (folio_test_writeback(folio)) {
> > +		if (wbc->sync_mode != WB_SYNC_NONE)
> > +			folio_wait_writeback(folio);
> > +		else
> > +			return false;
> > +	}
> 
> Please reorder this to avoid the else and return earlier while you're
> at it:
> 
> 	if (folio_test_writeback(folio)) {
> 		if (wbc->sync_mode == WB_SYNC_NONE)
> 			return false;
> 		folio_wait_writeback(folio);
> 	}

Sure, that makes sense.

> (that's what actually got me started on my little cleanup spree while
> checking some details of the writeback waiting..)

This might be a good point to share that I'm considering (eventually)
not taking the folio lock here.

My plan looks something like this (not fully baked):

truncation (and similar) paths currently lock the folio,  They would both
lock the folio _and_ claim that they were doing writeback on the folio.

Filesystems would receive the folio from the writeback iterator with
the writeback flag already set.


This allows, eg, folio mapping/unmapping to take place completely
independent of writeback.  That seems like a good thing; I can't see
why the two should be connected.

> > +	BUG_ON(folio_test_writeback(folio));
> > +	if (!folio_clear_dirty_for_io(folio))
> > +		return false;
> > +
> > +	return true;
> 
> ..
> 
> 	return folio_clear_dirty_for_io(folio);
> 
> ?

I did consider that, but there's a nice symmetry to the code the way it's
currently written, and that took precedence in my mind over "fewer lines
of code".  There's nothing intrinsic about folio_clear_dirty_for_io()
being the last condition to be checked (is there?  We have to
redirty_for_io if we decide to not start writeback), so it seemed to
make sense to leave space to add more conditions.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 03/12] writeback: Factor should_writeback_folio() out of write_cache_pages()
  2023-06-27 11:16     ` Matthew Wilcox
@ 2023-06-27 14:48       ` Matthew Wilcox
  0 siblings, 0 replies; 35+ messages in thread
From: Matthew Wilcox @ 2023-06-27 14:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Tue, Jun 27, 2023 at 12:16:34PM +0100, Matthew Wilcox wrote:
> This might be a good point to share that I'm considering (eventually)
> not taking the folio lock here.
> 
> My plan looks something like this (not fully baked):
> 
> truncation (and similar) paths currently lock the folio,  They would both
> lock the folio _and_ claim that they were doing writeback on the folio.
> 
> Filesystems would receive the folio from the writeback iterator with
> the writeback flag already set.
> 
> 
> This allows, eg, folio mapping/unmapping to take place completely
> independent of writeback.  That seems like a good thing; I can't see
> why the two should be connected.

Ah, i_size is a problem.  With an extending write, i_size is updated
while holding the folio lock.  If we're writing back a partial folio,
we zero the tail.  That must not race with an extending write.  So
either we'd need to take both the folio lock & wb_lock when updating
i_size, or we'd need to take both the lock and wb_lock when writing
back the last page of a file.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 08/12] writeback: Factor writeback_get_folio() out of write_cache_pages()
  2023-06-27  4:34   ` Christoph Hellwig
@ 2023-06-27 15:25     ` Matthew Wilcox
  0 siblings, 0 replies; 35+ messages in thread
From: Matthew Wilcox @ 2023-06-27 15:25 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 09:34:17PM -0700, Christoph Hellwig wrote:
> > +	for (;;) {
> > +		folio = writeback_get_next(mapping, wbc);
> > +		if (!folio)
> > +			return NULL;
> > +		wbc->done_index = folio->index;
> > +
> > +		folio_lock(folio);
> > +		if (likely(should_writeback_folio(mapping, wbc, folio)))
> > +			break;
> > +		folio_unlock(folio);
> > +	}
> > +
> > +	trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
> > +	return folio;
> 
> Same minor nitpick, why not:
> 
> 
> 	while ((folio = writeback_get_next(mapping, wbc)) {
> 		wbc->done_index = folio->index;
> 
> 		folio_lock(folio);
> 		if (likely(should_writeback_folio(mapping, wbc, folio))) {
> 			trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
> 			break;
> 		}
> 		folio_unlock(folio);
> 	}
> 
> 	return folio;
> 
> ?

Because we end up having to call writeback_finish() somewhere, and it's
neater to do it here than in either writeback_get_next() or both callers
of writeback_get_folio().

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 09/12] writeback: Factor writeback_iter_next() out of write_cache_pages()
  2023-06-27  4:39   ` Christoph Hellwig
@ 2023-06-27 15:31     ` Matthew Wilcox
  2023-06-27 16:28       ` Christoph Hellwig
  2023-06-28  9:10       ` Jan Kara
  0 siblings, 2 replies; 35+ messages in thread
From: Matthew Wilcox @ 2023-06-27 15:31 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

On Mon, Jun 26, 2023 at 09:39:39PM -0700, Christoph Hellwig wrote:
> On Mon, Jun 26, 2023 at 06:35:18PM +0100, Matthew Wilcox (Oracle) wrote:
> > Pull the post-processing of the writepage_t callback into a
> > separate function.  That means changing writeback_finish() to
> > return NULL, and writeback_get_next() to call writeback_finish()
> > when we naturally run out of folios.
> > 
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> >  mm/page-writeback.c | 84 ++++++++++++++++++++++++---------------------
> >  1 file changed, 44 insertions(+), 40 deletions(-)
> > 
> > diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> > index 659df2b5c7c0..ef61d7006c5e 100644
> > --- a/mm/page-writeback.c
> > +++ b/mm/page-writeback.c
> > @@ -2360,7 +2360,7 @@ void tag_pages_for_writeback(struct address_space *mapping,
> >  }
> >  EXPORT_SYMBOL(tag_pages_for_writeback);
> >  
> > -static int writeback_finish(struct address_space *mapping,
> > +static struct folio *writeback_finish(struct address_space *mapping,
> >  		struct writeback_control *wbc, bool done)
> >  {
> >  	folio_batch_release(&wbc->fbatch);
> > @@ -2375,7 +2375,7 @@ static int writeback_finish(struct address_space *mapping,
> >  	if (wbc->range_cyclic || (wbc->range_whole && wbc->nr_to_write > 0))
> >  		mapping->writeback_index = wbc->done_index;
> >  
> > -	return wbc->err;
> > +	return NULL;
> 
> Having a return value that is always NULL feels a bit weird vs just
> doing that return in the caller.

It makes the callers neater.  Compare:

               if (!folio)
                        return writeback_finish(mapping, wbc, false);
vs
		if (!folio) {
			writeback_finish(mapping, wbc, false);
			return NULL;
		}

Similarly for the other two callers.

> > +		if (error == AOP_WRITEPAGE_ACTIVATE) {
> > +			folio_unlock(folio);
> > +			error = 0;
> 
> Note there really shouldn't be any need for this in outside of the
> legacy >writepage case.  But it might make sense to delay the removal
> until after ->writepage is gone to avoid bugs in conversions.

ext4_journalled_writepage_callback() still returns
AOP_WRITEPAGE_ACTIVATE, and that's used by a direct call to
write_cache_pages().

> > +		} else if (wbc->sync_mode != WB_SYNC_ALL) {
> > +			wbc->err = error;
> > +			wbc->done_index = folio->index +
> > +					folio_nr_pages(folio);
> 
> Btw, I wonder if a folio_next_index helper for this might be useful
> as it's a pattern we have in a few places.

I think that's a reasonable addition to the API.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 09/12] writeback: Factor writeback_iter_next() out of write_cache_pages()
  2023-06-27 15:31     ` Matthew Wilcox
@ 2023-06-27 16:28       ` Christoph Hellwig
  2023-06-28  9:10       ` Jan Kara
  1 sibling, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2023-06-27 16:28 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Christoph Hellwig, linux-mm, linux-fsdevel, linux-kernel,
	Jan Kara, David Howells

On Tue, Jun 27, 2023 at 04:31:17PM +0100, Matthew Wilcox wrote:
> It makes the callers neater.  Compare:
> 
>                if (!folio)
>                         return writeback_finish(mapping, wbc, false);
> vs
> 		if (!folio) {
> 			writeback_finish(mapping, wbc, false);
> 			return NULL;
> 		}
> 
> Similarly for the other two callers.

Not sure I agree.  See my quickly cooked up patch below.  But in the
end this completely superficial and I won't complain, do it the way
your prefer.

> 
> > > +		if (error == AOP_WRITEPAGE_ACTIVATE) {
> > > +			folio_unlock(folio);
> > > +			error = 0;
> > 
> > Note there really shouldn't be any need for this in outside of the
> > legacy >writepage case.  But it might make sense to delay the removal
> > until after ->writepage is gone to avoid bugs in conversions.
> 
> ext4_journalled_writepage_callback() still returns
> AOP_WRITEPAGE_ACTIVATE, and that's used by a direct call to
> write_cache_pages().

Yeah.  But that could trivially do the open coded unlock_page.
But probably not worth mixing into this series.

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 55832679af2194..07bbbc0dec4d00 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2360,7 +2360,7 @@ void tag_pages_for_writeback(struct address_space *mapping,
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
-static struct folio *writeback_finish(struct address_space *mapping,
+static void writeback_finish(struct address_space *mapping,
 		struct writeback_control *wbc, bool done)
 {
 	folio_batch_release(&wbc->fbatch);
@@ -2374,8 +2374,6 @@ static struct folio *writeback_finish(struct address_space *mapping,
 		wbc->done_index = 0;
 	if (wbc->range_cyclic || (wbc->range_whole && wbc->nr_to_write > 0))
 		mapping->writeback_index = wbc->done_index;
-
-	return NULL;
 }
 
 static struct folio *writeback_get_next(struct address_space *mapping,
@@ -2435,20 +2433,19 @@ static struct folio *writeback_get_folio(struct address_space *mapping,
 {
 	struct folio *folio;
 
-	for (;;) {
-		folio = writeback_get_next(mapping, wbc);
-		if (!folio)
-			return writeback_finish(mapping, wbc, false);
+	while ((folio = writeback_get_next(mapping, wbc))) {
 		wbc->done_index = folio->index;
 
 		folio_lock(folio);
-		if (likely(should_writeback_folio(mapping, wbc, folio)))
-			break;
+		if (likely(should_writeback_folio(mapping, wbc, folio))) {
+			trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
+			return folio;
+		}
 		folio_unlock(folio);
 	}
 
-	trace_wbc_writepage(wbc, inode_to_bdi(mapping->host));
-	return folio;
+	writeback_finish(mapping, wbc, false);
+	return NULL;
 }
 
 struct folio *writeback_iter_init(struct address_space *mapping,
@@ -2494,7 +2491,7 @@ struct folio *writeback_iter_next(struct address_space *mapping,
 			wbc->err = error;
 			wbc->done_index = folio->index +
 					folio_nr_pages(folio);
-			return writeback_finish(mapping, wbc, true);
+			goto done;
 		}
 		if (!wbc->err)
 			wbc->err = error;
@@ -2507,9 +2504,12 @@ struct folio *writeback_iter_next(struct address_space *mapping,
 	 * to entering this loop.
 	 */
 	if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
-		return writeback_finish(mapping, wbc, true);
+		goto done;
 
 	return writeback_get_folio(mapping, wbc);
+done:
+	writeback_finish(mapping, wbc, true);
+	return NULL;
 }
 
 /**

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [PATCH 09/12] writeback: Factor writeback_iter_next() out of write_cache_pages()
  2023-06-27 15:31     ` Matthew Wilcox
  2023-06-27 16:28       ` Christoph Hellwig
@ 2023-06-28  9:10       ` Jan Kara
  1 sibling, 0 replies; 35+ messages in thread
From: Jan Kara @ 2023-06-28  9:10 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Christoph Hellwig, linux-mm, linux-fsdevel, linux-kernel,
	Jan Kara, David Howells

On Tue 27-06-23 16:31:17, Matthew Wilcox wrote:
> > > +		if (error == AOP_WRITEPAGE_ACTIVATE) {
> > > +			folio_unlock(folio);
> > > +			error = 0;
> > 
> > Note there really shouldn't be any need for this in outside of the
> > legacy >writepage case.  But it might make sense to delay the removal
> > until after ->writepage is gone to avoid bugs in conversions.
> 
> ext4_journalled_writepage_callback() still returns
> AOP_WRITEPAGE_ACTIVATE, and that's used by a direct call to
> write_cache_pages().

Yeah. For record ext4_journalled_writepage_callback() is a bit of an abuse
of writeback code by ext4 data=journal path but it seemed like a lesser
evil than duplicating write_cache_pages() code. Essentially we need to
iterate all dirty folios and call page_mkclean() on them (to stop folio
modifications through mmap while transaction commit code is writing these
pages to the journal). If we exported from page writeback code enough
helpers for dirty page iteration, we could get rid of
ext4_journalled_writepage_callback() and its AOP_WRITEPAGE_ACTIVATE usage
as well.

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/12] Convert write_cache_pages() to an iterator
  2023-06-27 10:53 ` David Howells
@ 2023-06-28 19:31   ` Matthew Wilcox
  2023-12-12  7:46     ` Christoph Hellwig
  2023-06-28 20:03   ` David Howells
  1 sibling, 1 reply; 35+ messages in thread
From: Matthew Wilcox @ 2023-06-28 19:31 UTC (permalink / raw)
  To: David Howells; +Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara

On Tue, Jun 27, 2023 at 11:53:02AM +0100, David Howells wrote:
> Do you have this on a branch somewhere?

I just pushed it out to https://git.infradead.org/users/willy/pagecache.git/shortlog/refs/heads/writeback-iter

Running it through xfstests now.  This includes one of Christoph's
suggestions, a build fix for Linus's tree, and a bugfix I noticed last
night, so it's not quite the same as the emails that were sent out in
this thread.  I doubt it'll be what I send out for v2 either.

I'm looking at afs writeback now.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/12] Convert write_cache_pages() to an iterator
  2023-06-27 10:53 ` David Howells
  2023-06-28 19:31   ` Matthew Wilcox
@ 2023-06-28 20:03   ` David Howells
  2023-07-04 18:08     ` Matthew Wilcox
  1 sibling, 1 reply; 35+ messages in thread
From: David Howells @ 2023-06-28 20:03 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: dhowells, linux-mm, linux-fsdevel, linux-kernel, Jan Kara

Matthew Wilcox <willy@infradead.org> wrote:

> I'm looking at afs writeback now.

:-)

>  fs/iomap/buffered-io.c    |  14 +-
>  include/linux/pagevec.h   |  18 +++
>  include/linux/writeback.h |  22 ++-
>  mm/page-writeback.c       | 310 +++++++++++++++++++++-----------------
>  4 files changed, 216 insertions(+), 148 deletions(-)

Documentation/mm/writeback.rst too please.

David


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/12] Convert write_cache_pages() to an iterator
  2023-06-28 20:03   ` David Howells
@ 2023-07-04 18:08     ` Matthew Wilcox
  0 siblings, 0 replies; 35+ messages in thread
From: Matthew Wilcox @ 2023-07-04 18:08 UTC (permalink / raw)
  To: David Howells; +Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara

On Wed, Jun 28, 2023 at 09:03:10PM +0100, David Howells wrote:
> Matthew Wilcox <willy@infradead.org> wrote:
> 
> > I'm looking at afs writeback now.
> 
> :-)
> 
> >  fs/iomap/buffered-io.c    |  14 +-
> >  include/linux/pagevec.h   |  18 +++
> >  include/linux/writeback.h |  22 ++-
> >  mm/page-writeback.c       | 310 +++++++++++++++++++++-----------------
> >  4 files changed, 216 insertions(+), 148 deletions(-)
> 
> Documentation/mm/writeback.rst too please.

$ ls Documentation/mm/w*
ls: cannot access 'Documentation/mm/w*': No such file or directory

$ git grep writeback Documentation/mm
Documentation/mm/multigen_lru.rst:do not require TLB flushes; clean pages do not require writeback.
Documentation/mm/page_migration.rst:2. Ensure that writeback is complete.
Documentation/mm/page_migration.rst:15. Queued up writeback on the new page is triggered.
Documentation/mm/physical_memory.rst:``nr_writeback_throttled``
Documentation/mm/physical_memory.rst:  Number of pages written while reclaim is throttled waiting for writeback.

Or are you suggesting I write a new piece of kernel documentation?
If so, I respectfully decline.  I've updated the kernel-doc included
in Documentation/core-api/mm-api.rst and I think that's all I can
reasonably be asked to do.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/12] Convert write_cache_pages() to an iterator
  2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
                   ` (13 preceding siblings ...)
  2023-06-27 10:53 ` David Howells
@ 2023-11-21  5:18 ` Christoph Hellwig
  14 siblings, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2023-11-21  5:18 UTC (permalink / raw)
  To: Matthew Wilcox (Oracle)
  Cc: linux-mm, linux-fsdevel, linux-kernel, Jan Kara, David Howells

Just curious where this did end up.  I have some changes that could use
or conflict with this depending on your view.  They aren't time critical
yt, but if we're going to the road in this series I'd appreciate if we'd
get it done rather sooner than later :)


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH 00/12] Convert write_cache_pages() to an iterator
  2023-06-28 19:31   ` Matthew Wilcox
@ 2023-12-12  7:46     ` Christoph Hellwig
  0 siblings, 0 replies; 35+ messages in thread
From: Christoph Hellwig @ 2023-12-12  7:46 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: David Howells, linux-mm, linux-fsdevel, linux-kernel, Jan Kara

On Wed, Jun 28, 2023 at 08:31:05PM +0100, Matthew Wilcox wrote:
> On Tue, Jun 27, 2023 at 11:53:02AM +0100, David Howells wrote:
> > Do you have this on a branch somewhere?
> 
> I just pushed it out to https://git.infradead.org/users/willy/pagecache.git/shortlog/refs/heads/writeback-iter
> 
> Running it through xfstests now.  This includes one of Christoph's
> suggestions, a build fix for Linus's tree, and a bugfix I noticed last
> night, so it's not quite the same as the emails that were sent out in
> this thread.  I doubt it'll be what I send out for v2 either.

So it turns out thіs version still applies fine and tests fine with
latest mainline.

I've put up a slight tweak here:

http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/writeback-iter

this moves and documents the new fields in struct writeback_control
and drops the iomap patch for now as it has conflicts in the VFS tree
in this merge window.

Do you want me to send this version out, or do you want to take over or
is there a good reason not to progress with it?

^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2023-12-12  7:46 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-26 17:35 [PATCH 00/12] Convert write_cache_pages() to an iterator Matthew Wilcox (Oracle)
2023-06-26 17:35 ` [PATCH 01/12] writeback: Factor out writeback_finish() Matthew Wilcox (Oracle)
2023-06-27  4:05   ` Christoph Hellwig
2023-06-26 17:35 ` [PATCH 02/12] writeback: Factor writeback_get_batch() out of write_cache_pages() Matthew Wilcox (Oracle)
2023-06-26 17:35 ` [PATCH 03/12] writeback: Factor should_writeback_folio() " Matthew Wilcox (Oracle)
2023-06-27  4:12   ` Christoph Hellwig
2023-06-27 11:16     ` Matthew Wilcox
2023-06-27 14:48       ` Matthew Wilcox
2023-06-26 17:35 ` [PATCH 04/12] writeback: Simplify the loops in write_cache_pages() Matthew Wilcox (Oracle)
2023-06-27  4:16   ` Christoph Hellwig
2023-06-26 17:35 ` [PATCH 05/12] pagevec: Add ability to iterate a queue Matthew Wilcox (Oracle)
2023-06-26 17:35 ` [PATCH 06/12] writeback: Use the folio_batch queue iterator Matthew Wilcox (Oracle)
2023-06-27  4:25   ` Christoph Hellwig
2023-06-26 17:35 ` [PATCH 07/12] writeback: Factor writeback_iter_init() out of write_cache_pages() Matthew Wilcox (Oracle)
2023-06-27  4:30   ` Christoph Hellwig
2023-06-27  4:31     ` Christoph Hellwig
2023-06-27 11:08       ` Matthew Wilcox
2023-06-26 17:35 ` [PATCH 08/12] writeback: Factor writeback_get_folio() " Matthew Wilcox (Oracle)
2023-06-27  4:34   ` Christoph Hellwig
2023-06-27 15:25     ` Matthew Wilcox
2023-06-26 17:35 ` [PATCH 09/12] writeback: Factor writeback_iter_next() " Matthew Wilcox (Oracle)
2023-06-27  4:39   ` Christoph Hellwig
2023-06-27 15:31     ` Matthew Wilcox
2023-06-27 16:28       ` Christoph Hellwig
2023-06-28  9:10       ` Jan Kara
2023-06-26 17:35 ` [PATCH 10/12] writeback: Add for_each_writeback_folio() Matthew Wilcox (Oracle)
2023-06-26 17:35 ` [PATCH 11/12] iomap: Convert iomap_writepages() to use for_each_writeback_folio() Matthew Wilcox (Oracle)
2023-06-26 17:35 ` [PATCH 12/12] writeback: Remove a use of write_cache_pages() from do_writepages() Matthew Wilcox (Oracle)
2023-06-27  4:03 ` [PATCH 00/12] Convert write_cache_pages() to an iterator Christoph Hellwig
2023-06-27 10:53 ` David Howells
2023-06-28 19:31   ` Matthew Wilcox
2023-12-12  7:46     ` Christoph Hellwig
2023-06-28 20:03   ` David Howells
2023-07-04 18:08     ` Matthew Wilcox
2023-11-21  5:18 ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.