linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@zip.com.au>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: lkml <linux-kernel@vger.kernel.org>
Subject: [patch 9/21] batched addition of pages to the LRU
Date: Sun, 11 Aug 2002 00:39:07 -0700	[thread overview]
Message-ID: <3D56149B.C6E9414@zip.com.au> (raw)



The patch goes through the various places which were calling
lru_cache_add() against bulk pages and batches them up.

Also.  This whole patch series improves the behaviour of the system
under heavy writeback load.  There is a reduction in page allocation
failures, some reduction in loss of interactivity due to page
allocators getting stuck on writeback from the VM.  (This is still bad
though).

I think it's due to the change here in mpage_writepages().  That
function was originally unconditionally refiling written-back pages to
the head of the inactive list.  The theory being that they should be
moved out of the way of page allocators, who would end up waiting on
them.

It appears that this simply had the effect of pushing dirty, unwritten
data closer to the tail of the inactive list, making things worse.

So instead, if the caller is (typically) balance_dirty_pages() then
leave the pages where they are on the LRU.

If the caller is PF_MEMALLOC then the pages *have* to be refiled.  This
is because VM writeback is clustered along mapping->dirty_pages, and
it's almost certain that the pages which are being written are near the
tail of the LRU.  If they were left there, page allocators would block
on them too soon.  It would effectively become a synchronous write.



 fs/mpage.c              |   14 ++++++++++---
 include/linux/pagemap.h |    2 +
 mm/filemap.c            |   50 +++++++++++++++++++++++++++++++++++-------------
 mm/readahead.c          |   13 ++++++++++--
 mm/shmem.c              |    2 -
 mm/swap_state.c         |    6 +++--
 6 files changed, 66 insertions(+), 21 deletions(-)

--- 2.5.31/fs/mpage.c~batched-lru-add	Sun Aug 11 00:20:33 2002
+++ 2.5.31-akpm/fs/mpage.c	Sun Aug 11 00:20:57 2002
@@ -263,18 +263,25 @@ mpage_readpages(struct address_space *ma
 	struct bio *bio = NULL;
 	unsigned page_idx;
 	sector_t last_block_in_bio = 0;
+	struct pagevec lru_pvec;
 
+	pagevec_init(&lru_pvec);
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, list);
 
 		prefetchw(&page->flags);
 		list_del(&page->list);
-		if (!add_to_page_cache(page, mapping, page->index))
+		if (!add_to_page_cache(page, mapping, page->index)) {
 			bio = do_mpage_readpage(bio, page,
 					nr_pages - page_idx,
 					&last_block_in_bio, get_block);
-		page_cache_release(page);
+			if (!pagevec_add(&lru_pvec, page))
+				__pagevec_lru_add(&lru_pvec);
+		} else {
+			page_cache_release(page);
+		}
 	}
+	pagevec_lru_add(&lru_pvec);
 	BUG_ON(!list_empty(pages));
 	if (bio)
 		mpage_bio_submit(READ, bio);
@@ -566,7 +573,8 @@ mpage_writepages(struct address_space *m
 				bio = mpage_writepage(bio, page, get_block,
 						&last_block_in_bio, &ret);
 			}
-			if (!PageActive(page) && PageLRU(page)) {
+			if ((current->flags & PF_MEMALLOC) &&
+					!PageActive(page) && PageLRU(page)) {
 				if (!pagevec_add(&pvec, page))
 					pagevec_deactivate_inactive(&pvec);
 				page = NULL;
--- 2.5.31/mm/filemap.c~batched-lru-add	Sun Aug 11 00:20:33 2002
+++ 2.5.31-akpm/mm/filemap.c	Sun Aug 11 00:21:02 2002
@@ -21,6 +21,7 @@
 #include <linux/iobuf.h>
 #include <linux/hash.h>
 #include <linux/writeback.h>
+#include <linux/pagevec.h>
 #include <linux/security.h>
 /*
  * This is needed for the following functions:
@@ -530,27 +531,37 @@ int filemap_fdatawait(struct address_spa
  * In the case of swapcache, try_to_swap_out() has already locked the page, so
  * SetPageLocked() is ugly-but-OK there too.  The required page state has been
  * set up by swap_out_add_to_swap_cache().
+ *
+ * This function does not add the page to the LRU.  The caller must do that.
  */
 int add_to_page_cache(struct page *page,
-		struct address_space *mapping, unsigned long offset)
+		struct address_space *mapping, pgoff_t offset)
 {
 	int error;
 
+	page_cache_get(page);
 	write_lock(&mapping->page_lock);
 	error = radix_tree_insert(&mapping->page_tree, offset, page);
 	if (!error) {
 		SetPageLocked(page);
 		ClearPageDirty(page);
 		___add_to_page_cache(page, mapping, offset);
-		page_cache_get(page);
+	} else {
+		page_cache_release(page);
 	}
 	write_unlock(&mapping->page_lock);
-	/* Anon pages are already on the LRU */
-	if (!error && !PageSwapCache(page))
-		lru_cache_add(page);
 	return error;
 }
 
+int add_to_page_cache_lru(struct page *page,
+		struct address_space *mapping, pgoff_t offset)
+{
+	int ret = add_to_page_cache(page, mapping, offset);
+	if (ret == 0)
+		lru_cache_add(page);
+	return ret;
+}
+
 /*
  * This adds the requested page to the page cache if it isn't already there,
  * and schedules an I/O to read in its contents from disk.
@@ -566,7 +577,7 @@ static int page_cache_read(struct file *
 	if (!page)
 		return -ENOMEM;
 
-	error = add_to_page_cache(page, mapping, offset);
+	error = add_to_page_cache_lru(page, mapping, offset);
 	if (!error) {
 		error = mapping->a_ops->readpage(file, page);
 		page_cache_release(page);
@@ -797,7 +808,7 @@ repeat:
 			if (!cached_page)
 				return NULL;
 		}
-		err = add_to_page_cache(cached_page, mapping, index);
+		err = add_to_page_cache_lru(cached_page, mapping, index);
 		if (!err) {
 			page = cached_page;
 			cached_page = NULL;
@@ -830,7 +841,7 @@ grab_cache_page_nowait(struct address_sp
 		return NULL;
 	}
 	page = alloc_pages(mapping->gfp_mask & ~__GFP_FS, 0);
-	if (page && add_to_page_cache(page, mapping, index)) {
+	if (page && add_to_page_cache_lru(page, mapping, index)) {
 		page_cache_release(page);
 		page = NULL;
 	}
@@ -994,7 +1005,7 @@ no_cached_page:
 				break;
 			}
 		}
-		error = add_to_page_cache(cached_page, mapping, index);
+		error = add_to_page_cache_lru(cached_page, mapping, index);
 		if (error) {
 			if (error == -EEXIST)
 				goto find_page;
@@ -1704,7 +1715,7 @@ repeat:
 			if (!cached_page)
 				return ERR_PTR(-ENOMEM);
 		}
-		err = add_to_page_cache(cached_page, mapping, index);
+		err = add_to_page_cache_lru(cached_page, mapping, index);
 		if (err == -EEXIST)
 			goto repeat;
 		if (err < 0) {
@@ -1764,8 +1775,14 @@ retry:
 	return page;
 }
 
-static inline struct page * __grab_cache_page(struct address_space *mapping,
-				unsigned long index, struct page **cached_page)
+/*
+ * If the page was newly created, increment its refcount and add it to the
+ * caller's lru-buffering pagevec.  This function is specifically for
+ * generic_file_write().
+ */
+static inline struct page *
+__grab_cache_page(struct address_space *mapping, unsigned long index,
+			struct page **cached_page, struct pagevec *lru_pvec)
 {
 	int err;
 	struct page *page;
@@ -1782,6 +1799,9 @@ repeat:
 			goto repeat;
 		if (err == 0) {
 			page = *cached_page;
+			page_cache_get(page);
+			if (!pagevec_add(lru_pvec, page))
+				__pagevec_lru_add(lru_pvec);
 			*cached_page = NULL;
 		}
 	}
@@ -1829,6 +1849,7 @@ generic_file_write(struct file *file, co
 	int		err;
 	unsigned	bytes;
 	time_t		time_now;
+	struct pagevec	lru_pvec;
 
 	if (unlikely((ssize_t) count < 0))
 		return -EINVAL;
@@ -1836,6 +1857,8 @@ generic_file_write(struct file *file, co
 	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 		return -EFAULT;
 
+	pagevec_init(&lru_pvec);
+
 	down(&inode->i_sem);
 	pos = *ppos;
 	if (unlikely(pos < 0)) {
@@ -1976,7 +1999,7 @@ generic_file_write(struct file *file, co
 			__get_user(dummy, buf+bytes-1);
 		}
 
-		page = __grab_cache_page(mapping, index, &cached_page);
+		page = __grab_cache_page(mapping, index, &cached_page, &lru_pvec);
 		if (!page) {
 			status = -ENOMEM;
 			break;
@@ -2038,6 +2061,7 @@ generic_file_write(struct file *file, co
 out_status:	
 	err = written ? written : status;
 out:
+	pagevec_lru_add(&lru_pvec);
 	up(&inode->i_sem);
 	return err;
 }
--- 2.5.31/mm/readahead.c~batched-lru-add	Sun Aug 11 00:20:33 2002
+++ 2.5.31-akpm/mm/readahead.c	Sun Aug 11 00:20:33 2002
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/pagevec.h>
 
 struct backing_dev_info default_backing_dev_info = {
 	.ra_pages	= (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE,
@@ -36,6 +37,9 @@ read_pages(struct file *file, struct add
 		struct list_head *pages, unsigned nr_pages)
 {
 	unsigned page_idx;
+	struct pagevec lru_pvec;
+
+	pagevec_init(&lru_pvec);
 
 	if (mapping->a_ops->readpages)
 		return mapping->a_ops->readpages(mapping, pages, nr_pages);
@@ -43,10 +47,15 @@ read_pages(struct file *file, struct add
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, list);
 		list_del(&page->list);
-		if (!add_to_page_cache(page, mapping, page->index))
+		if (!add_to_page_cache(page, mapping, page->index)) {
+			if (!pagevec_add(&lru_pvec, page))
+				__pagevec_lru_add(&lru_pvec);
 			mapping->a_ops->readpage(file, page);
-		page_cache_release(page);
+		} else {
+			page_cache_release(page);
+		}
 	}
+	pagevec_lru_add(&lru_pvec);
 	return 0;
 }
 
--- 2.5.31/mm/swap_state.c~batched-lru-add	Sun Aug 11 00:20:33 2002
+++ 2.5.31-akpm/mm/swap_state.c	Sun Aug 11 00:21:01 2002
@@ -72,6 +72,9 @@ int add_to_swap_cache(struct page *page,
 		return -ENOENT;
 	}
 	error = add_to_page_cache(page, &swapper_space, entry.val);
+	/*
+	 * Anon pages are already on the LRU, we don't run lru_cache_add here.
+	 */
 	if (error != 0) {
 		swap_free(entry);
 		if (error == -EEXIST)
@@ -276,8 +279,7 @@ int move_from_swap_cache(struct page *pa
 		SetPageDirty(page);
 		___add_to_page_cache(page, mapping, index);
 		/* fix that up */
-		list_del(&page->list);
-		list_add(&page->list, &mapping->dirty_pages);
+		list_move(&page->list, &mapping->dirty_pages);
 		write_unlock(&mapping->page_lock);
 		write_unlock(&swapper_space.page_lock);
 
--- 2.5.31/mm/shmem.c~batched-lru-add	Sun Aug 11 00:20:33 2002
+++ 2.5.31-akpm/mm/shmem.c	Sun Aug 11 00:20:33 2002
@@ -668,7 +668,7 @@ repeat:
 		page = page_cache_alloc(mapping);
 		if (!page)
 			goto no_mem;
-		error = add_to_page_cache(page, mapping, idx);
+		error = add_to_page_cache_lru(page, mapping, idx);
 		if (error < 0) {
 			page_cache_release(page);
 			goto no_mem;
--- 2.5.31/include/linux/pagemap.h~batched-lru-add	Sun Aug 11 00:20:33 2002
+++ 2.5.31-akpm/include/linux/pagemap.h	Sun Aug 11 00:21:02 2002
@@ -58,6 +58,8 @@ extern struct page * read_cache_page(str
 
 extern int add_to_page_cache(struct page *page,
 		struct address_space *mapping, unsigned long index);
+extern int add_to_page_cache_lru(struct page *page,
+		struct address_space *mapping, unsigned long index);
 extern void remove_from_page_cache(struct page *page);
 extern void __remove_from_page_cache(struct page *page);
 

.

             reply	other threads:[~2002-08-11  7:26 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-08-11  7:39 Andrew Morton [this message]
2002-08-11 15:23 ` [patch 9/21] batched addition of pages to the LRU Rik van Riel
2002-08-12  5:16   ` Andrew Morton
2002-08-12  5:24     ` Rik van Riel
2002-08-12  5:51       ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3D56149B.C6E9414@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).