All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] Swap over NFS
@ 2006-08-25 15:37 ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:37 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Andrew Morton, Peter Zijlstra, Rik van Riel, Trond Myklebust

Hi,

These patches implement swap files on NFS, but lay the foundation to
allow swap files on any non block device backed file.

As is, these patches allow for swapfiles to me used on NFS mounts. However
some extra work is needed to make this safe. It is not very hard to deadlock
a kernel with only these patches.

In the next VM deadlock avoidance series I will include a patch to remedy
this.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 0/6] Swap over NFS
@ 2006-08-25 15:37 ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:37 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Andrew Morton, Peter Zijlstra, Rik van Riel, Trond Myklebust

Hi,

These patches implement swap files on NFS, but lay the foundation to
allow swap files on any non block device backed file.

As is, these patches allow for swapfiles to me used on NFS mounts. However
some extra work is needed to make this safe. It is not very hard to deadlock
a kernel with only these patches.

In the next VM deadlock avoidance series I will include a patch to remedy
this.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 1/6] mm: Generic swap file support
  2006-08-25 15:37 ` Peter Zijlstra
@ 2006-08-25 15:37   ` Peter Zijlstra
  -1 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:37 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Rik van Riel, Peter Zijlstra, Andrew Morton, Trond Myklebust


Add support for non block device backed swap files.

A new addres_space_operations method is added:
  int swapfile(struct address_space *, int)

When during sys_swapon() this method is found and returns no error the 
swapper_space.a_ops will proxy to sis->swap_file->f_mapping->a_ops.

The swapfile method will be used to communicate to the address_space that the
VM relies on it, and the address_space should take adequate measures (like 
reserving memory for mempools or the like).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/fs.h   |    1 
 include/linux/swap.h |    4 +++
 init/Kconfig         |    5 ++++
 mm/page_io.c         |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/swap_state.c      |    6 +++++
 mm/swapfile.c        |   27 ++++++++++++++++++++++
 6 files changed, 102 insertions(+), 1 deletion(-)

Index: linux-2.6/include/linux/swap.h
===================================================================
--- linux-2.6.orig/include/linux/swap.h
+++ linux-2.6/include/linux/swap.h
@@ -115,6 +115,7 @@ enum {
 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
 	SWP_ACTIVE	= (SWP_USED | SWP_WRITEOK),
+	SWP_FILE	= (1 << 2),	/* file swap area */
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
 };
@@ -212,6 +213,9 @@ extern void swap_unplug_io_fn(struct bac
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct file *, struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
+extern void swap_sync_page(struct page *page);
+extern int swap_set_page_dirty(struct page *page);
+extern int swap_releasepage(struct page *page, gfp_t gfp_mask);
 extern int rw_swap_page_sync(int, swp_entry_t, struct page *);
 
 /* linux/mm/swap_state.c */
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -100,6 +100,11 @@ config SWAP
 	  used to provide more virtual memory than the actual RAM present
 	  in your computer.  If unsure say Y.
 
+config SWAP_FILE
+	bool "Support for paging to/from non block device files"
+	depends on SWAP
+	default n
+
 config SYSVIPC
 	bool "System V IPC"
 	---help---
Index: linux-2.6/mm/page_io.c
===================================================================
--- linux-2.6.orig/mm/page_io.c
+++ linux-2.6/mm/page_io.c
@@ -17,6 +17,7 @@
 #include <linux/bio.h>
 #include <linux/swapops.h>
 #include <linux/writeback.h>
+#include <linux/buffer_head.h>
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
@@ -91,6 +92,14 @@ int swap_writepage(struct page *page, st
 		unlock_page(page);
 		goto out;
 	}
+#ifdef CONFIG_SWAP_FILE
+	{
+		struct swap_info_struct *sis = page_swap_info(page);
+		if (sis->flags & SWP_FILE)
+			return sis->swap_file->f_mapping->
+				a_ops->writepage(page, wbc);
+	}
+#endif
 	bio = get_swap_bio(GFP_NOIO, page_private(page), page,
 				end_swap_bio_write);
 	if (bio == NULL) {
@@ -116,6 +125,14 @@ int swap_readpage(struct file *file, str
 
 	BUG_ON(!PageLocked(page));
 	ClearPageUptodate(page);
+#ifdef CONFIG_SWAP_FILE
+	{
+		struct swap_info_struct *sis = page_swap_info(page);
+		if (sis->flags & SWP_FILE)
+			return sis->swap_file->f_mapping->
+				a_ops->readpage(sis->swap_file, page);
+	}
+#endif
 	bio = get_swap_bio(GFP_KERNEL, page_private(page), page,
 				end_swap_bio_read);
 	if (bio == NULL) {
@@ -129,6 +146,49 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_SWAP_FILE
+void swap_sync_page(struct page *page)
+{
+	struct swap_info_struct *sis = page_swap_info(page);
+
+	if (sis->flags & SWP_FILE) {
+		const struct address_space_operations * a_ops =
+			sis->swap_file->f_mapping->a_ops;
+		if (a_ops->sync_page)
+			a_ops->sync_page(page);
+	} else
+		block_sync_page(page);
+}
+
+int swap_set_page_dirty(struct page *page)
+{
+	struct swap_info_struct *sis = page_swap_info(page);
+
+	if (sis->flags & SWP_FILE) {
+		const struct address_space_operations * a_ops =
+			sis->swap_file->f_mapping->a_ops;
+		if (a_ops->set_page_dirty)
+			return a_ops->set_page_dirty(page);
+		return __set_page_dirty_buffers(page);
+	}
+
+	return __set_page_dirty_nobuffers(page);
+}
+
+int swap_releasepage(struct page *page, gfp_t gfp_mask)
+{
+	struct swap_info_struct *sis = page_swap_info(page);
+	const struct address_space_operations * a_ops =
+		sis->swap_file->f_mapping->a_ops;
+
+	if ((sis->flags & SWP_FILE) && a_ops->releasepage)
+		return a_ops->releasepage(page, gfp_mask);
+
+	BUG();
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_SOFTWARE_SUSPEND
 /*
  * A scruffy utility function to read or write an arbitrary swap page
Index: linux-2.6/mm/swap_state.c
===================================================================
--- linux-2.6.orig/mm/swap_state.c
+++ linux-2.6/mm/swap_state.c
@@ -26,8 +26,14 @@
  */
 static const struct address_space_operations swap_aops = {
 	.writepage	= swap_writepage,
+#ifdef CONFIG_SWAP_FILE
+	.sync_page	= swap_sync_page,
+	.set_page_dirty	= swap_set_page_dirty,
+	.releasepage	= swap_releasepage,
+#else
 	.sync_page	= block_sync_page,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
+#endif
 	.migratepage	= migrate_page,
 };
 
Index: linux-2.6/mm/swapfile.c
===================================================================
--- linux-2.6.orig/mm/swapfile.c
+++ linux-2.6/mm/swapfile.c
@@ -411,7 +411,12 @@ void free_swap_and_cache(swp_entry_t ent
 	if (page) {
 		int one_user;
 
+#ifdef CONFIG_SWAP_FILE
+		if (PagePrivate(page))
+			page_mapping(page)->a_ops->releasepage(page, 0);
+#else
 		BUG_ON(PagePrivate(page));
+#endif
 		one_user = (page_count(page) == 2);
 		/* Only cache user (+us), or swap space full? Free it! */
 		/* Also recheck PageSwapCache after page is locked (above) */
@@ -943,6 +948,13 @@ static void destroy_swap_extents(struct 
 		list_del(&se->list);
 		kfree(se);
 	}
+#ifdef CONFIG_SWAP_FILE
+	if (sis->flags & SWP_FILE) {
+		sis->flags &= ~SWP_FILE;
+		sis->swap_file->f_mapping->a_ops->
+			swapfile(sis->swap_file->f_mapping, 0);
+	}
+#endif
 }
 
 /*
@@ -1035,6 +1047,19 @@ static int setup_swap_extents(struct swa
 		goto done;
 	}
 
+#ifdef CONFIG_SWAP_FILE
+	if (sis->swap_file->f_mapping->a_ops->swapfile) {
+		ret = sis->swap_file->f_mapping->a_ops->
+			swapfile(sis->swap_file->f_mapping, 1);
+		if (!ret) {
+			sis->flags |= SWP_FILE;
+			ret = add_swap_extent(sis, 0, sis->max, 0);
+			*span = sis->pages;
+		}
+		goto done;
+	}
+#endif
+
 	blkbits = inode->i_blkbits;
 	blocks_per_page = PAGE_SIZE >> blkbits;
 
@@ -1591,7 +1616,7 @@ asmlinkage long sys_swapon(const char __
 
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);
-	p->flags = SWP_ACTIVE;
+	p->flags |= SWP_WRITEOK;
 	nr_swap_pages += nr_good_pages;
 	total_swap_pages += nr_good_pages;
 
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -382,6 +382,7 @@ struct address_space_operations {
 	/* migrate the contents of a page to the specified target */
 	int (*migratepage) (struct address_space *,
 			struct page *, struct page *);
+	int (*swapfile)(struct address_space *, int);
 };
 
 struct backing_dev_info;

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 1/6] mm: Generic swap file support
@ 2006-08-25 15:37   ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:37 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Rik van Riel, Peter Zijlstra, Andrew Morton, Trond Myklebust

Add support for non block device backed swap files.

A new addres_space_operations method is added:
  int swapfile(struct address_space *, int)

When during sys_swapon() this method is found and returns no error the 
swapper_space.a_ops will proxy to sis->swap_file->f_mapping->a_ops.

The swapfile method will be used to communicate to the address_space that the
VM relies on it, and the address_space should take adequate measures (like 
reserving memory for mempools or the like).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/fs.h   |    1 
 include/linux/swap.h |    4 +++
 init/Kconfig         |    5 ++++
 mm/page_io.c         |   60 +++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/swap_state.c      |    6 +++++
 mm/swapfile.c        |   27 ++++++++++++++++++++++
 6 files changed, 102 insertions(+), 1 deletion(-)

Index: linux-2.6/include/linux/swap.h
===================================================================
--- linux-2.6.orig/include/linux/swap.h
+++ linux-2.6/include/linux/swap.h
@@ -115,6 +115,7 @@ enum {
 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
 	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
 	SWP_ACTIVE	= (SWP_USED | SWP_WRITEOK),
+	SWP_FILE	= (1 << 2),	/* file swap area */
 					/* add others here before... */
 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
 };
@@ -212,6 +213,9 @@ extern void swap_unplug_io_fn(struct bac
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct file *, struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
+extern void swap_sync_page(struct page *page);
+extern int swap_set_page_dirty(struct page *page);
+extern int swap_releasepage(struct page *page, gfp_t gfp_mask);
 extern int rw_swap_page_sync(int, swp_entry_t, struct page *);
 
 /* linux/mm/swap_state.c */
Index: linux-2.6/init/Kconfig
===================================================================
--- linux-2.6.orig/init/Kconfig
+++ linux-2.6/init/Kconfig
@@ -100,6 +100,11 @@ config SWAP
 	  used to provide more virtual memory than the actual RAM present
 	  in your computer.  If unsure say Y.
 
+config SWAP_FILE
+	bool "Support for paging to/from non block device files"
+	depends on SWAP
+	default n
+
 config SYSVIPC
 	bool "System V IPC"
 	---help---
Index: linux-2.6/mm/page_io.c
===================================================================
--- linux-2.6.orig/mm/page_io.c
+++ linux-2.6/mm/page_io.c
@@ -17,6 +17,7 @@
 #include <linux/bio.h>
 #include <linux/swapops.h>
 #include <linux/writeback.h>
+#include <linux/buffer_head.h>
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags, pgoff_t index,
@@ -91,6 +92,14 @@ int swap_writepage(struct page *page, st
 		unlock_page(page);
 		goto out;
 	}
+#ifdef CONFIG_SWAP_FILE
+	{
+		struct swap_info_struct *sis = page_swap_info(page);
+		if (sis->flags & SWP_FILE)
+			return sis->swap_file->f_mapping->
+				a_ops->writepage(page, wbc);
+	}
+#endif
 	bio = get_swap_bio(GFP_NOIO, page_private(page), page,
 				end_swap_bio_write);
 	if (bio == NULL) {
@@ -116,6 +125,14 @@ int swap_readpage(struct file *file, str
 
 	BUG_ON(!PageLocked(page));
 	ClearPageUptodate(page);
+#ifdef CONFIG_SWAP_FILE
+	{
+		struct swap_info_struct *sis = page_swap_info(page);
+		if (sis->flags & SWP_FILE)
+			return sis->swap_file->f_mapping->
+				a_ops->readpage(sis->swap_file, page);
+	}
+#endif
 	bio = get_swap_bio(GFP_KERNEL, page_private(page), page,
 				end_swap_bio_read);
 	if (bio == NULL) {
@@ -129,6 +146,49 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_SWAP_FILE
+void swap_sync_page(struct page *page)
+{
+	struct swap_info_struct *sis = page_swap_info(page);
+
+	if (sis->flags & SWP_FILE) {
+		const struct address_space_operations * a_ops =
+			sis->swap_file->f_mapping->a_ops;
+		if (a_ops->sync_page)
+			a_ops->sync_page(page);
+	} else
+		block_sync_page(page);
+}
+
+int swap_set_page_dirty(struct page *page)
+{
+	struct swap_info_struct *sis = page_swap_info(page);
+
+	if (sis->flags & SWP_FILE) {
+		const struct address_space_operations * a_ops =
+			sis->swap_file->f_mapping->a_ops;
+		if (a_ops->set_page_dirty)
+			return a_ops->set_page_dirty(page);
+		return __set_page_dirty_buffers(page);
+	}
+
+	return __set_page_dirty_nobuffers(page);
+}
+
+int swap_releasepage(struct page *page, gfp_t gfp_mask)
+{
+	struct swap_info_struct *sis = page_swap_info(page);
+	const struct address_space_operations * a_ops =
+		sis->swap_file->f_mapping->a_ops;
+
+	if ((sis->flags & SWP_FILE) && a_ops->releasepage)
+		return a_ops->releasepage(page, gfp_mask);
+
+	BUG();
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_SOFTWARE_SUSPEND
 /*
  * A scruffy utility function to read or write an arbitrary swap page
Index: linux-2.6/mm/swap_state.c
===================================================================
--- linux-2.6.orig/mm/swap_state.c
+++ linux-2.6/mm/swap_state.c
@@ -26,8 +26,14 @@
  */
 static const struct address_space_operations swap_aops = {
 	.writepage	= swap_writepage,
+#ifdef CONFIG_SWAP_FILE
+	.sync_page	= swap_sync_page,
+	.set_page_dirty	= swap_set_page_dirty,
+	.releasepage	= swap_releasepage,
+#else
 	.sync_page	= block_sync_page,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
+#endif
 	.migratepage	= migrate_page,
 };
 
Index: linux-2.6/mm/swapfile.c
===================================================================
--- linux-2.6.orig/mm/swapfile.c
+++ linux-2.6/mm/swapfile.c
@@ -411,7 +411,12 @@ void free_swap_and_cache(swp_entry_t ent
 	if (page) {
 		int one_user;
 
+#ifdef CONFIG_SWAP_FILE
+		if (PagePrivate(page))
+			page_mapping(page)->a_ops->releasepage(page, 0);
+#else
 		BUG_ON(PagePrivate(page));
+#endif
 		one_user = (page_count(page) == 2);
 		/* Only cache user (+us), or swap space full? Free it! */
 		/* Also recheck PageSwapCache after page is locked (above) */
@@ -943,6 +948,13 @@ static void destroy_swap_extents(struct 
 		list_del(&se->list);
 		kfree(se);
 	}
+#ifdef CONFIG_SWAP_FILE
+	if (sis->flags & SWP_FILE) {
+		sis->flags &= ~SWP_FILE;
+		sis->swap_file->f_mapping->a_ops->
+			swapfile(sis->swap_file->f_mapping, 0);
+	}
+#endif
 }
 
 /*
@@ -1035,6 +1047,19 @@ static int setup_swap_extents(struct swa
 		goto done;
 	}
 
+#ifdef CONFIG_SWAP_FILE
+	if (sis->swap_file->f_mapping->a_ops->swapfile) {
+		ret = sis->swap_file->f_mapping->a_ops->
+			swapfile(sis->swap_file->f_mapping, 1);
+		if (!ret) {
+			sis->flags |= SWP_FILE;
+			ret = add_swap_extent(sis, 0, sis->max, 0);
+			*span = sis->pages;
+		}
+		goto done;
+	}
+#endif
+
 	blkbits = inode->i_blkbits;
 	blocks_per_page = PAGE_SIZE >> blkbits;
 
@@ -1591,7 +1616,7 @@ asmlinkage long sys_swapon(const char __
 
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);
-	p->flags = SWP_ACTIVE;
+	p->flags |= SWP_WRITEOK;
 	nr_swap_pages += nr_good_pages;
 	total_swap_pages += nr_good_pages;
 
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -382,6 +382,7 @@ struct address_space_operations {
 	/* migrate the contents of a page to the specified target */
 	int (*migratepage) (struct address_space *,
 			struct page *, struct page *);
+	int (*swapfile)(struct address_space *, int);
 };
 
 struct backing_dev_info;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 2/6] mm: New page_file_* methods
  2006-08-25 15:37 ` Peter Zijlstra
@ 2006-08-25 15:37   ` Peter Zijlstra
  -1 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:37 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Andrew Morton, Peter Zijlstra, Rik van Riel, Trond Myklebust


In order to teach filesystems to handle swap cache pages, two new page
functions are introduced:

  pgoff_t page_file_index(struct page *);
  struct address_space *page_file_mapping(struct page *);

page_file_index - gives the offset of this page in the file in PAGE_CACHE_SIZE
blocks. Like page->index is for mapped pages, this function also gives the
correct index for PG_swapcache pages.

page_file_mapping - gives the mapping backing the actual page; that is for
swap cache pages it will give swap_file->f_mapping.

page_offset() is modified to use page_file_index(), so that it will give the
expected result, even for PG_swapcache pages.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/mm.h      |   30 ++++++++++++++++++++++++++++++
 include/linux/pagemap.h |    2 +-
 include/linux/swap.h    |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/swapops.h |   44 --------------------------------------------
 4 files changed, 79 insertions(+), 45 deletions(-)

Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -15,6 +15,7 @@
 #include <linux/fs.h>
 #include <linux/mutex.h>
 #include <linux/debug_locks.h>
+#include <linux/swap.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -579,6 +580,22 @@ static inline struct address_space *page
 	return mapping;
 }
 
+static inline
+struct swap_info_struct * page_swap_info(struct page *page)
+{
+	swp_entry_t swap = { .val = page_private(page) };
+	BUG_ON(!PageSwapCache(page));
+	return get_swap_info_struct(swp_type(swap));
+}
+
+static inline
+struct address_space *page_file_mapping(struct page *page)
+{
+	if (unlikely(PageSwapCache(page)))
+		return page_swap_info(page)->swap_file->f_mapping;
+	return page->mapping;
+}
+
 static inline int PageAnon(struct page *page)
 {
 	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
@@ -596,6 +613,19 @@ static inline pgoff_t page_index(struct 
 }
 
 /*
+ * Return the file index of the page. Regular pagecache pages use ->index
+ * whereas swapcache pages use swp_offset(->private)
+ */
+static inline pgoff_t page_file_index(struct page *page)
+{
+	if (unlikely(PageSwapCache(page))) {
+		swp_entry_t swap = { .val = page_private(page) };
+		return swp_offset(swap);
+	}
+	return page->index;
+}
+
+/*
  * The atomic page->_mapcount, like _count, starts from -1:
  * so that transitions both from it and to it can be tracked,
  * using atomic_inc_and_test and atomic_add_negative(-1).
Index: linux-2.6/include/linux/pagemap.h
===================================================================
--- linux-2.6.orig/include/linux/pagemap.h
+++ linux-2.6/include/linux/pagemap.h
@@ -118,7 +118,7 @@ extern void __remove_from_page_cache(str
  */
 static inline loff_t page_offset(struct page *page)
 {
-	return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
+	return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT;
 }
 
 static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
Index: linux-2.6/include/linux/swap.h
===================================================================
--- linux-2.6.orig/include/linux/swap.h
+++ linux-2.6/include/linux/swap.h
@@ -75,6 +75,50 @@ typedef struct {
 } swp_entry_t;
 
 /*
+ * swapcache pages are stored in the swapper_space radix tree.  We want to
+ * get good packing density in that tree, so the index should be dense in
+ * the low-order bits.
+ *
+ * We arrange the `type' and `offset' fields so that `type' is at the five
+ * high-order bits of the swp_entry_t and `offset' is right-aligned in the
+ * remaining bits.
+ *
+ * swp_entry_t's are *never* stored anywhere in their arch-dependent format.
+ */
+#define SWP_TYPE_SHIFT(e)	(sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT)
+#define SWP_OFFSET_MASK(e)	((1UL << SWP_TYPE_SHIFT(e)) - 1)
+
+/*
+ * Store a type+offset into a swp_entry_t in an arch-independent format
+ */
+static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
+{
+	swp_entry_t ret;
+
+	ret.val = (type << SWP_TYPE_SHIFT(ret)) |
+			(offset & SWP_OFFSET_MASK(ret));
+	return ret;
+}
+
+/*
+ * Extract the `type' field from a swp_entry_t.  The swp_entry_t is in
+ * arch-independent format
+ */
+static inline unsigned swp_type(swp_entry_t entry)
+{
+	return (entry.val >> SWP_TYPE_SHIFT(entry));
+}
+
+/*
+ * Extract the `offset' field from a swp_entry_t.  The swp_entry_t is in
+ * arch-independent format
+ */
+static inline pgoff_t swp_offset(swp_entry_t entry)
+{
+	return entry.val & SWP_OFFSET_MASK(entry);
+}
+
+/*
  * current->reclaim_state points to one of these when a task is running
  * memory reclaim
  */
@@ -322,6 +366,10 @@ static inline int valid_swaphandles(swp_
 	return 0;
 }
 
+static inline struct swap_info_struct *get_swap_info_struct(unsigned type)
+{
+	return NULL;
+}
 #define can_share_swap_page(p)			(page_mapcount(p) == 1)
 
 static inline int move_to_swap_cache(struct page *page, swp_entry_t entry)
Index: linux-2.6/include/linux/swapops.h
===================================================================
--- linux-2.6.orig/include/linux/swapops.h
+++ linux-2.6/include/linux/swapops.h
@@ -1,48 +1,4 @@
 /*
- * swapcache pages are stored in the swapper_space radix tree.  We want to
- * get good packing density in that tree, so the index should be dense in
- * the low-order bits.
- *
- * We arrange the `type' and `offset' fields so that `type' is at the five
- * high-order bits of the swp_entry_t and `offset' is right-aligned in the
- * remaining bits.
- *
- * swp_entry_t's are *never* stored anywhere in their arch-dependent format.
- */
-#define SWP_TYPE_SHIFT(e)	(sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT)
-#define SWP_OFFSET_MASK(e)	((1UL << SWP_TYPE_SHIFT(e)) - 1)
-
-/*
- * Store a type+offset into a swp_entry_t in an arch-independent format
- */
-static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
-{
-	swp_entry_t ret;
-
-	ret.val = (type << SWP_TYPE_SHIFT(ret)) |
-			(offset & SWP_OFFSET_MASK(ret));
-	return ret;
-}
-
-/*
- * Extract the `type' field from a swp_entry_t.  The swp_entry_t is in
- * arch-independent format
- */
-static inline unsigned swp_type(swp_entry_t entry)
-{
-	return (entry.val >> SWP_TYPE_SHIFT(entry));
-}
-
-/*
- * Extract the `offset' field from a swp_entry_t.  The swp_entry_t is in
- * arch-independent format
- */
-static inline pgoff_t swp_offset(swp_entry_t entry)
-{
-	return entry.val & SWP_OFFSET_MASK(entry);
-}
-
-/*
  * Convert the arch-dependent pte representation of a swp_entry_t into an
  * arch-independent swp_entry_t.
  */

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 2/6] mm: New page_file_* methods
@ 2006-08-25 15:37   ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:37 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Andrew Morton, Peter Zijlstra, Rik van Riel, Trond Myklebust

In order to teach filesystems to handle swap cache pages, two new page
functions are introduced:

  pgoff_t page_file_index(struct page *);
  struct address_space *page_file_mapping(struct page *);

page_file_index - gives the offset of this page in the file in PAGE_CACHE_SIZE
blocks. Like page->index is for mapped pages, this function also gives the
correct index for PG_swapcache pages.

page_file_mapping - gives the mapping backing the actual page; that is for
swap cache pages it will give swap_file->f_mapping.

page_offset() is modified to use page_file_index(), so that it will give the
expected result, even for PG_swapcache pages.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/mm.h      |   30 ++++++++++++++++++++++++++++++
 include/linux/pagemap.h |    2 +-
 include/linux/swap.h    |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/swapops.h |   44 --------------------------------------------
 4 files changed, 79 insertions(+), 45 deletions(-)

Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h
+++ linux-2.6/include/linux/mm.h
@@ -15,6 +15,7 @@
 #include <linux/fs.h>
 #include <linux/mutex.h>
 #include <linux/debug_locks.h>
+#include <linux/swap.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -579,6 +580,22 @@ static inline struct address_space *page
 	return mapping;
 }
 
+static inline
+struct swap_info_struct * page_swap_info(struct page *page)
+{
+	swp_entry_t swap = { .val = page_private(page) };
+	BUG_ON(!PageSwapCache(page));
+	return get_swap_info_struct(swp_type(swap));
+}
+
+static inline
+struct address_space *page_file_mapping(struct page *page)
+{
+	if (unlikely(PageSwapCache(page)))
+		return page_swap_info(page)->swap_file->f_mapping;
+	return page->mapping;
+}
+
 static inline int PageAnon(struct page *page)
 {
 	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
@@ -596,6 +613,19 @@ static inline pgoff_t page_index(struct 
 }
 
 /*
+ * Return the file index of the page. Regular pagecache pages use ->index
+ * whereas swapcache pages use swp_offset(->private)
+ */
+static inline pgoff_t page_file_index(struct page *page)
+{
+	if (unlikely(PageSwapCache(page))) {
+		swp_entry_t swap = { .val = page_private(page) };
+		return swp_offset(swap);
+	}
+	return page->index;
+}
+
+/*
  * The atomic page->_mapcount, like _count, starts from -1:
  * so that transitions both from it and to it can be tracked,
  * using atomic_inc_and_test and atomic_add_negative(-1).
Index: linux-2.6/include/linux/pagemap.h
===================================================================
--- linux-2.6.orig/include/linux/pagemap.h
+++ linux-2.6/include/linux/pagemap.h
@@ -118,7 +118,7 @@ extern void __remove_from_page_cache(str
  */
 static inline loff_t page_offset(struct page *page)
 {
-	return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
+	return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT;
 }
 
 static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
Index: linux-2.6/include/linux/swap.h
===================================================================
--- linux-2.6.orig/include/linux/swap.h
+++ linux-2.6/include/linux/swap.h
@@ -75,6 +75,50 @@ typedef struct {
 } swp_entry_t;
 
 /*
+ * swapcache pages are stored in the swapper_space radix tree.  We want to
+ * get good packing density in that tree, so the index should be dense in
+ * the low-order bits.
+ *
+ * We arrange the `type' and `offset' fields so that `type' is at the five
+ * high-order bits of the swp_entry_t and `offset' is right-aligned in the
+ * remaining bits.
+ *
+ * swp_entry_t's are *never* stored anywhere in their arch-dependent format.
+ */
+#define SWP_TYPE_SHIFT(e)	(sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT)
+#define SWP_OFFSET_MASK(e)	((1UL << SWP_TYPE_SHIFT(e)) - 1)
+
+/*
+ * Store a type+offset into a swp_entry_t in an arch-independent format
+ */
+static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
+{
+	swp_entry_t ret;
+
+	ret.val = (type << SWP_TYPE_SHIFT(ret)) |
+			(offset & SWP_OFFSET_MASK(ret));
+	return ret;
+}
+
+/*
+ * Extract the `type' field from a swp_entry_t.  The swp_entry_t is in
+ * arch-independent format
+ */
+static inline unsigned swp_type(swp_entry_t entry)
+{
+	return (entry.val >> SWP_TYPE_SHIFT(entry));
+}
+
+/*
+ * Extract the `offset' field from a swp_entry_t.  The swp_entry_t is in
+ * arch-independent format
+ */
+static inline pgoff_t swp_offset(swp_entry_t entry)
+{
+	return entry.val & SWP_OFFSET_MASK(entry);
+}
+
+/*
  * current->reclaim_state points to one of these when a task is running
  * memory reclaim
  */
@@ -322,6 +366,10 @@ static inline int valid_swaphandles(swp_
 	return 0;
 }
 
+static inline struct swap_info_struct *get_swap_info_struct(unsigned type)
+{
+	return NULL;
+}
 #define can_share_swap_page(p)			(page_mapcount(p) == 1)
 
 static inline int move_to_swap_cache(struct page *page, swp_entry_t entry)
Index: linux-2.6/include/linux/swapops.h
===================================================================
--- linux-2.6.orig/include/linux/swapops.h
+++ linux-2.6/include/linux/swapops.h
@@ -1,48 +1,4 @@
 /*
- * swapcache pages are stored in the swapper_space radix tree.  We want to
- * get good packing density in that tree, so the index should be dense in
- * the low-order bits.
- *
- * We arrange the `type' and `offset' fields so that `type' is at the five
- * high-order bits of the swp_entry_t and `offset' is right-aligned in the
- * remaining bits.
- *
- * swp_entry_t's are *never* stored anywhere in their arch-dependent format.
- */
-#define SWP_TYPE_SHIFT(e)	(sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT)
-#define SWP_OFFSET_MASK(e)	((1UL << SWP_TYPE_SHIFT(e)) - 1)
-
-/*
- * Store a type+offset into a swp_entry_t in an arch-independent format
- */
-static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
-{
-	swp_entry_t ret;
-
-	ret.val = (type << SWP_TYPE_SHIFT(ret)) |
-			(offset & SWP_OFFSET_MASK(ret));
-	return ret;
-}
-
-/*
- * Extract the `type' field from a swp_entry_t.  The swp_entry_t is in
- * arch-independent format
- */
-static inline unsigned swp_type(swp_entry_t entry)
-{
-	return (entry.val >> SWP_TYPE_SHIFT(entry));
-}
-
-/*
- * Extract the `offset' field from a swp_entry_t.  The swp_entry_t is in
- * arch-independent format
- */
-static inline pgoff_t swp_offset(swp_entry_t entry)
-{
-	return entry.val & SWP_OFFSET_MASK(entry);
-}
-
-/*
  * Convert the arch-dependent pte representation of a swp_entry_t into an
  * arch-independent swp_entry_t.
  */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 3/6] uml: arch/um remove_mapping() clash
  2006-08-25 15:37 ` Peter Zijlstra
@ 2006-08-25 15:37   ` Peter Zijlstra
  -1 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:37 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Rik van Riel, Peter Zijlstra, Andrew Morton, Trond Myklebust


Now that 'include/linux/mm.h' includes 'include/linux/swap.h', the global
remove_mapping() definition clashes with the arch/um one.

Rename the arch/um one.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/um/kernel/physmem.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Index: linux-2.6/arch/um/kernel/physmem.c
===================================================================
--- linux-2.6.orig/arch/um/kernel/physmem.c
+++ linux-2.6/arch/um/kernel/physmem.c
@@ -160,7 +160,7 @@ int physmem_subst_mapping(void *virt, in
 
 static int physmem_fd = -1;
 
-static void remove_mapping(struct phys_desc *desc)
+static void um_remove_mapping(struct phys_desc *desc)
 {
 	void *virt = desc->virt;
 	int err;
@@ -184,7 +184,7 @@ int physmem_remove_mapping(void *virt)
 	if(desc == NULL)
 		return(0);
 
-	remove_mapping(desc);
+	um_remove_mapping(desc);
 	return(1);
 }
 
@@ -205,7 +205,7 @@ void physmem_forget_descriptor(int fd)
 		page = list_entry(ele, struct phys_desc, list);
 		offset = page->offset;
 		addr = page->virt;
-		remove_mapping(page);
+		um_remove_mapping(page);
 		err = os_seek_file(fd, offset);
 		if(err)
 			panic("physmem_forget_descriptor - failed to seek "

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 3/6] uml: arch/um remove_mapping() clash
@ 2006-08-25 15:37   ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:37 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Rik van Riel, Peter Zijlstra, Andrew Morton, Trond Myklebust

Now that 'include/linux/mm.h' includes 'include/linux/swap.h', the global
remove_mapping() definition clashes with the arch/um one.

Rename the arch/um one.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 arch/um/kernel/physmem.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

Index: linux-2.6/arch/um/kernel/physmem.c
===================================================================
--- linux-2.6.orig/arch/um/kernel/physmem.c
+++ linux-2.6/arch/um/kernel/physmem.c
@@ -160,7 +160,7 @@ int physmem_subst_mapping(void *virt, in
 
 static int physmem_fd = -1;
 
-static void remove_mapping(struct phys_desc *desc)
+static void um_remove_mapping(struct phys_desc *desc)
 {
 	void *virt = desc->virt;
 	int err;
@@ -184,7 +184,7 @@ int physmem_remove_mapping(void *virt)
 	if(desc == NULL)
 		return(0);
 
-	remove_mapping(desc);
+	um_remove_mapping(desc);
 	return(1);
 }
 
@@ -205,7 +205,7 @@ void physmem_forget_descriptor(int fd)
 		page = list_entry(ele, struct phys_desc, list);
 		offset = page->offset;
 		addr = page->virt;
-		remove_mapping(page);
+		um_remove_mapping(page);
 		err = os_seek_file(fd, offset);
 		if(err)
 			panic("physmem_forget_descriptor - failed to seek "

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 4/6] nfs: Teach NFS about swap cache pages
  2006-08-25 15:37 ` Peter Zijlstra
@ 2006-08-25 15:37   ` Peter Zijlstra
  -1 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:37 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Andrew Morton, Peter Zijlstra, Rik van Riel, Trond Myklebust


Teach the NFS client how to treat PG_swapcache pages.

Replace all occurences of page->index and page->mapping in the NFS client
with the new page_file_index() and page_file_mapping() functions.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 fs/nfs/dir.c      |    4 ++--
 fs/nfs/file.c     |    6 +++---
 fs/nfs/pagelist.c |    8 ++++----
 fs/nfs/read.c     |   10 +++++-----
 fs/nfs/write.c    |   34 +++++++++++++++++-----------------
 5 files changed, 31 insertions(+), 31 deletions(-)

Index: linux-2.6/fs/nfs/file.c
===================================================================
--- linux-2.6.orig/fs/nfs/file.c
+++ linux-2.6/fs/nfs/file.c
@@ -303,16 +303,16 @@ static int nfs_commit_write(struct file 
 
 static void nfs_invalidate_page(struct page *page, unsigned long offset)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = page_file_mapping(page)->host;
 
 	/* Cancel any unstarted writes on this page */
 	if (offset == 0)
-		nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE);
+		nfs_sync_inode_wait(inode, page_file_index(page), 1, FLUSH_INVALIDATE);
 }
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-	return !nfs_wb_page(page->mapping->host, page);
+	return !nfs_wb_page(page_file_mapping(page)->host, page);
 }
 
 const struct address_space_operations nfs_file_aops = {
Index: linux-2.6/fs/nfs/pagelist.c
===================================================================
--- linux-2.6.orig/fs/nfs/pagelist.c
+++ linux-2.6/fs/nfs/pagelist.c
@@ -82,11 +82,11 @@ nfs_create_request(struct nfs_open_conte
 	 * update_nfs_request below if the region is not locked. */
 	req->wb_page    = page;
 	atomic_set(&req->wb_complete, 0);
-	req->wb_index	= page->index;
+	req->wb_index	= page_file_index(page);
 	page_cache_get(page);
 	BUG_ON(PagePrivate(page));
 	BUG_ON(!PageLocked(page));
-	BUG_ON(page->mapping->host != inode);
+	BUG_ON(page_file_mapping(page)->host != inode);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -271,7 +271,7 @@ nfs_coalesce_requests(struct list_head *
  * nfs_scan_lock_dirty - Scan the radix tree for dirty requests
  * @nfsi: NFS inode
  * @dst: Destination list
- * @idx_start: lower bound of page->index to scan
+ * @idx_start: lower bound of page_file_index(page) to scan
  * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves elements from one of the inode request lists.
@@ -328,7 +328,7 @@ out:
  * @nfsi: NFS inode
  * @head: One of the NFS inode request lists
  * @dst: Destination list
- * @idx_start: lower bound of page->index to scan
+ * @idx_start: lower bound of page_file_index(page) to scan
  * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves elements from one of the inode request lists.
Index: linux-2.6/fs/nfs/read.c
===================================================================
--- linux-2.6.orig/fs/nfs/read.c
+++ linux-2.6/fs/nfs/read.c
@@ -84,9 +84,9 @@ unsigned int nfs_page_length(struct inod
 	if (i_size <= 0)
 		return 0;
 	idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
-	if (page->index > idx)
+	if (page_file_index(page) > idx)
 		return 0;
-	if (page->index != idx)
+	if (page_file_index(page) != idx)
 		return PAGE_CACHE_SIZE;
 	return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
 }
@@ -586,11 +586,11 @@ int nfs_readpage_result(struct rpc_task 
 int nfs_readpage(struct file *file, struct page *page)
 {
 	struct nfs_open_context *ctx;
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = page_file_mapping(page)->host;
 	int		error;
 
 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
-		page, PAGE_CACHE_SIZE, page->index);
+		page, PAGE_CACHE_SIZE, page_file_index(page));
 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
 	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
 
@@ -638,7 +638,7 @@ static int
 readpage_async_filler(void *data, struct page *page)
 {
 	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = page_file_mapping(page)->host;
 	struct nfs_page *new;
 	unsigned int len;
 
Index: linux-2.6/fs/nfs/write.c
===================================================================
--- linux-2.6.orig/fs/nfs/write.c
+++ linux-2.6/fs/nfs/write.c
@@ -152,13 +152,13 @@ void nfs_writedata_release(void *wdata)
 /* Adjust the file length if we're writing beyond the end */
 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = page_file_mapping(page)->host;
 	loff_t end, i_size = i_size_read(inode);
 	unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
 
-	if (i_size > 0 && page->index < end_index)
+	if (i_size > 0 && page_file_index(page) < end_index)
 		return;
-	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
+	end = page_offset(page) + ((loff_t)offset+count);
 	if (i_size >= end)
 		return;
 	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
@@ -181,11 +181,11 @@ static void nfs_mark_uptodate(struct pag
 		return;
 	}
 
-	end_offs = i_size_read(page->mapping->host) - 1;
+	end_offs = i_size_read(page_file_mapping(page)->host) - 1;
 	if (end_offs < 0)
 		return;
 	/* Is this the last page? */
-	if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
+	if (page_file_index(page) != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
 		return;
 	/* This is the last page: set PG_uptodate if we cover the entire
 	 * extent of the data, then zero the rest of the page.
@@ -300,7 +300,7 @@ static int wb_priority(struct writeback_
 int nfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct nfs_open_context *ctx;
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = page_file_mapping(page)->host;
 	unsigned long end_index;
 	unsigned offset = PAGE_CACHE_SIZE;
 	loff_t i_size = i_size_read(inode);
@@ -327,14 +327,14 @@ int nfs_writepage(struct page *page, str
 	nfs_wb_page_priority(inode, page, priority);
 
 	/* easy case */
-	if (page->index < end_index)
+	if (page_file_index(page) < end_index)
 		goto do_it;
 	/* things got complicated... */
 	offset = i_size & (PAGE_CACHE_SIZE-1);
 
 	/* OK, are we completely out? */
 	err = 0; /* potential race with truncate - ignore */
-	if (page->index >= end_index+1 || !offset)
+	if (page_file_index(page) >= end_index+1 || !offset)
 		goto out;
 do_it:
 	ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
@@ -606,7 +606,7 @@ static void nfs_cancel_commit_list(struc
  * nfs_scan_dirty - Scan an inode for dirty requests
  * @inode: NFS inode to scan
  * @dst: destination list
- * @idx_start: lower bound of page->index to scan.
+ * @idx_start: lower bound of page_file_index(page) to scan.
  * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves requests from the inode's dirty page list.
@@ -632,7 +632,7 @@ nfs_scan_dirty(struct inode *inode, stru
  * nfs_scan_commit - Scan an inode for commit requests
  * @inode: NFS inode to scan
  * @dst: destination list
- * @idx_start: lower bound of page->index to scan.
+ * @idx_start: lower bound of page_file_index(page) to scan.
  * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves requests from the inode's 'commit' request list.
@@ -713,14 +713,14 @@ static struct nfs_page * nfs_update_requ
 
 	end = offset + bytes;
 
-	if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR))
+	if (nfs_wait_on_write_congestion(page_file_mapping(page), server->flags & NFS_MOUNT_INTR))
 		return ERR_PTR(-ERESTARTSYS);
 	for (;;) {
 		/* Loop over all inode entries and see if we find
 		 * A request for the page we wish to update
 		 */
 		spin_lock(&nfsi->req_lock);
-		req = _nfs_find_request(inode, page->index);
+		req = _nfs_find_request(inode, page_file_index(page));
 		if (req) {
 			if (!nfs_lock_request_dontget(req)) {
 				int error;
@@ -791,7 +791,7 @@ static struct nfs_page * nfs_update_requ
 int nfs_flush_incompatible(struct file *file, struct page *page)
 {
 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
-	struct inode	*inode = page->mapping->host;
+	struct inode	*inode = page_file_mapping(page)->host;
 	struct nfs_page	*req;
 	int		status = 0;
 	/*
@@ -802,7 +802,7 @@ int nfs_flush_incompatible(struct file *
 	 * Also do the same if we find a request from an existing
 	 * dropped page.
 	 */
-	req = nfs_find_request(inode, page->index);
+	req = nfs_find_request(inode, page_file_index(page));
 	if (req) {
 		if (req->wb_page != page || ctx != req->wb_context)
 			status = nfs_wb_page(inode, page);
@@ -821,7 +821,7 @@ int nfs_updatepage(struct file *file, st
 		unsigned int offset, unsigned int count)
 {
 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
-	struct inode	*inode = page->mapping->host;
+	struct inode	*inode = page_file_mapping(page)->host;
 	struct nfs_page	*req;
 	int		status = 0;
 
@@ -854,12 +854,12 @@ int nfs_updatepage(struct file *file, st
 		offset = 0;
 		if (unlikely(end_offs < 0)) {
 			/* Do nothing */
-		} else if (page->index == end_index) {
+		} else if (page_file_index(page) == end_index) {
 			unsigned int pglen;
 			pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
 			if (count < pglen)
 				count = pglen;
-		} else if (page->index < end_index)
+		} else if (page_file_index(page) < end_index)
 			count = PAGE_CACHE_SIZE;
 	}
 
Index: linux-2.6/fs/nfs/dir.c
===================================================================
--- linux-2.6.orig/fs/nfs/dir.c
+++ linux-2.6/fs/nfs/dir.c
@@ -177,7 +177,7 @@ int nfs_readdir_filler(nfs_readdir_descr
 
 	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
 			__FUNCTION__, (long long)desc->entry->cookie,
-			page->index);
+			page_file_index(page));
 
  again:
 	timestamp = jiffies;
@@ -201,7 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descr
 	 * Note: assumes we have exclusive access to this mapping either
 	 *	 through inode->i_mutex or some other mechanism.
 	 */
-	if (page->index == 0)
+	if (page_file_index(page) == 0)
 		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
 	unlock_page(page);
 	return 0;

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 4/6] nfs: Teach NFS about swap cache pages
@ 2006-08-25 15:37   ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:37 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Andrew Morton, Peter Zijlstra, Rik van Riel, Trond Myklebust

Teach the NFS client how to treat PG_swapcache pages.

Replace all occurences of page->index and page->mapping in the NFS client
with the new page_file_index() and page_file_mapping() functions.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 fs/nfs/dir.c      |    4 ++--
 fs/nfs/file.c     |    6 +++---
 fs/nfs/pagelist.c |    8 ++++----
 fs/nfs/read.c     |   10 +++++-----
 fs/nfs/write.c    |   34 +++++++++++++++++-----------------
 5 files changed, 31 insertions(+), 31 deletions(-)

Index: linux-2.6/fs/nfs/file.c
===================================================================
--- linux-2.6.orig/fs/nfs/file.c
+++ linux-2.6/fs/nfs/file.c
@@ -303,16 +303,16 @@ static int nfs_commit_write(struct file 
 
 static void nfs_invalidate_page(struct page *page, unsigned long offset)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = page_file_mapping(page)->host;
 
 	/* Cancel any unstarted writes on this page */
 	if (offset == 0)
-		nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE);
+		nfs_sync_inode_wait(inode, page_file_index(page), 1, FLUSH_INVALIDATE);
 }
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-	return !nfs_wb_page(page->mapping->host, page);
+	return !nfs_wb_page(page_file_mapping(page)->host, page);
 }
 
 const struct address_space_operations nfs_file_aops = {
Index: linux-2.6/fs/nfs/pagelist.c
===================================================================
--- linux-2.6.orig/fs/nfs/pagelist.c
+++ linux-2.6/fs/nfs/pagelist.c
@@ -82,11 +82,11 @@ nfs_create_request(struct nfs_open_conte
 	 * update_nfs_request below if the region is not locked. */
 	req->wb_page    = page;
 	atomic_set(&req->wb_complete, 0);
-	req->wb_index	= page->index;
+	req->wb_index	= page_file_index(page);
 	page_cache_get(page);
 	BUG_ON(PagePrivate(page));
 	BUG_ON(!PageLocked(page));
-	BUG_ON(page->mapping->host != inode);
+	BUG_ON(page_file_mapping(page)->host != inode);
 	req->wb_offset  = offset;
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
@@ -271,7 +271,7 @@ nfs_coalesce_requests(struct list_head *
  * nfs_scan_lock_dirty - Scan the radix tree for dirty requests
  * @nfsi: NFS inode
  * @dst: Destination list
- * @idx_start: lower bound of page->index to scan
+ * @idx_start: lower bound of page_file_index(page) to scan
  * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves elements from one of the inode request lists.
@@ -328,7 +328,7 @@ out:
  * @nfsi: NFS inode
  * @head: One of the NFS inode request lists
  * @dst: Destination list
- * @idx_start: lower bound of page->index to scan
+ * @idx_start: lower bound of page_file_index(page) to scan
  * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves elements from one of the inode request lists.
Index: linux-2.6/fs/nfs/read.c
===================================================================
--- linux-2.6.orig/fs/nfs/read.c
+++ linux-2.6/fs/nfs/read.c
@@ -84,9 +84,9 @@ unsigned int nfs_page_length(struct inod
 	if (i_size <= 0)
 		return 0;
 	idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
-	if (page->index > idx)
+	if (page_file_index(page) > idx)
 		return 0;
-	if (page->index != idx)
+	if (page_file_index(page) != idx)
 		return PAGE_CACHE_SIZE;
 	return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
 }
@@ -586,11 +586,11 @@ int nfs_readpage_result(struct rpc_task 
 int nfs_readpage(struct file *file, struct page *page)
 {
 	struct nfs_open_context *ctx;
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = page_file_mapping(page)->host;
 	int		error;
 
 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
-		page, PAGE_CACHE_SIZE, page->index);
+		page, PAGE_CACHE_SIZE, page_file_index(page));
 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
 	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
 
@@ -638,7 +638,7 @@ static int
 readpage_async_filler(void *data, struct page *page)
 {
 	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = page_file_mapping(page)->host;
 	struct nfs_page *new;
 	unsigned int len;
 
Index: linux-2.6/fs/nfs/write.c
===================================================================
--- linux-2.6.orig/fs/nfs/write.c
+++ linux-2.6/fs/nfs/write.c
@@ -152,13 +152,13 @@ void nfs_writedata_release(void *wdata)
 /* Adjust the file length if we're writing beyond the end */
 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = page_file_mapping(page)->host;
 	loff_t end, i_size = i_size_read(inode);
 	unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
 
-	if (i_size > 0 && page->index < end_index)
+	if (i_size > 0 && page_file_index(page) < end_index)
 		return;
-	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
+	end = page_offset(page) + ((loff_t)offset+count);
 	if (i_size >= end)
 		return;
 	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
@@ -181,11 +181,11 @@ static void nfs_mark_uptodate(struct pag
 		return;
 	}
 
-	end_offs = i_size_read(page->mapping->host) - 1;
+	end_offs = i_size_read(page_file_mapping(page)->host) - 1;
 	if (end_offs < 0)
 		return;
 	/* Is this the last page? */
-	if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
+	if (page_file_index(page) != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
 		return;
 	/* This is the last page: set PG_uptodate if we cover the entire
 	 * extent of the data, then zero the rest of the page.
@@ -300,7 +300,7 @@ static int wb_priority(struct writeback_
 int nfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct nfs_open_context *ctx;
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = page_file_mapping(page)->host;
 	unsigned long end_index;
 	unsigned offset = PAGE_CACHE_SIZE;
 	loff_t i_size = i_size_read(inode);
@@ -327,14 +327,14 @@ int nfs_writepage(struct page *page, str
 	nfs_wb_page_priority(inode, page, priority);
 
 	/* easy case */
-	if (page->index < end_index)
+	if (page_file_index(page) < end_index)
 		goto do_it;
 	/* things got complicated... */
 	offset = i_size & (PAGE_CACHE_SIZE-1);
 
 	/* OK, are we completely out? */
 	err = 0; /* potential race with truncate - ignore */
-	if (page->index >= end_index+1 || !offset)
+	if (page_file_index(page) >= end_index+1 || !offset)
 		goto out;
 do_it:
 	ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
@@ -606,7 +606,7 @@ static void nfs_cancel_commit_list(struc
  * nfs_scan_dirty - Scan an inode for dirty requests
  * @inode: NFS inode to scan
  * @dst: destination list
- * @idx_start: lower bound of page->index to scan.
+ * @idx_start: lower bound of page_file_index(page) to scan.
  * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves requests from the inode's dirty page list.
@@ -632,7 +632,7 @@ nfs_scan_dirty(struct inode *inode, stru
  * nfs_scan_commit - Scan an inode for commit requests
  * @inode: NFS inode to scan
  * @dst: destination list
- * @idx_start: lower bound of page->index to scan.
+ * @idx_start: lower bound of page_file_index(page) to scan.
  * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves requests from the inode's 'commit' request list.
@@ -713,14 +713,14 @@ static struct nfs_page * nfs_update_requ
 
 	end = offset + bytes;
 
-	if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR))
+	if (nfs_wait_on_write_congestion(page_file_mapping(page), server->flags & NFS_MOUNT_INTR))
 		return ERR_PTR(-ERESTARTSYS);
 	for (;;) {
 		/* Loop over all inode entries and see if we find
 		 * A request for the page we wish to update
 		 */
 		spin_lock(&nfsi->req_lock);
-		req = _nfs_find_request(inode, page->index);
+		req = _nfs_find_request(inode, page_file_index(page));
 		if (req) {
 			if (!nfs_lock_request_dontget(req)) {
 				int error;
@@ -791,7 +791,7 @@ static struct nfs_page * nfs_update_requ
 int nfs_flush_incompatible(struct file *file, struct page *page)
 {
 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
-	struct inode	*inode = page->mapping->host;
+	struct inode	*inode = page_file_mapping(page)->host;
 	struct nfs_page	*req;
 	int		status = 0;
 	/*
@@ -802,7 +802,7 @@ int nfs_flush_incompatible(struct file *
 	 * Also do the same if we find a request from an existing
 	 * dropped page.
 	 */
-	req = nfs_find_request(inode, page->index);
+	req = nfs_find_request(inode, page_file_index(page));
 	if (req) {
 		if (req->wb_page != page || ctx != req->wb_context)
 			status = nfs_wb_page(inode, page);
@@ -821,7 +821,7 @@ int nfs_updatepage(struct file *file, st
 		unsigned int offset, unsigned int count)
 {
 	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
-	struct inode	*inode = page->mapping->host;
+	struct inode	*inode = page_file_mapping(page)->host;
 	struct nfs_page	*req;
 	int		status = 0;
 
@@ -854,12 +854,12 @@ int nfs_updatepage(struct file *file, st
 		offset = 0;
 		if (unlikely(end_offs < 0)) {
 			/* Do nothing */
-		} else if (page->index == end_index) {
+		} else if (page_file_index(page) == end_index) {
 			unsigned int pglen;
 			pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
 			if (count < pglen)
 				count = pglen;
-		} else if (page->index < end_index)
+		} else if (page_file_index(page) < end_index)
 			count = PAGE_CACHE_SIZE;
 	}
 
Index: linux-2.6/fs/nfs/dir.c
===================================================================
--- linux-2.6.orig/fs/nfs/dir.c
+++ linux-2.6/fs/nfs/dir.c
@@ -177,7 +177,7 @@ int nfs_readdir_filler(nfs_readdir_descr
 
 	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
 			__FUNCTION__, (long long)desc->entry->cookie,
-			page->index);
+			page_file_index(page));
 
  again:
 	timestamp = jiffies;
@@ -201,7 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descr
 	 * Note: assumes we have exclusive access to this mapping either
 	 *	 through inode->i_mutex or some other mechanism.
 	 */
-	if (page->index == 0)
+	if (page_file_index(page) == 0)
 		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
 	unlock_page(page);
 	return 0;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 5/6] nfs: Add comment on PG_private use of NFS
  2006-08-25 15:37 ` Peter Zijlstra
@ 2006-08-25 15:38   ` Peter Zijlstra
  -1 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:38 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Rik van Riel, Peter Zijlstra, Andrew Morton, Trond Myklebust


Add a comment explaining the use of PG_private in the NFS client.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 fs/nfs/write.c |    5 +++++
 1 file changed, 5 insertions(+)

Index: linux-2.6/fs/nfs/write.c
===================================================================
--- linux-2.6.orig/fs/nfs/write.c
+++ linux-2.6/fs/nfs/write.c
@@ -424,6 +424,11 @@ static int nfs_inode_add_request(struct 
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
+	/*
+	 * The PG_private bit is unfortunately needed if we want to fix the
+	 * hole in the mmap semantics. If we do not set it, then the VM will
+	 * fail to call the "releasepage" address ops.
+	 */
 	SetPagePrivate(req->wb_page);
 	nfsi->npages++;
 	atomic_inc(&req->wb_count);

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 5/6] nfs: Add comment on PG_private use of NFS
@ 2006-08-25 15:38   ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:38 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Rik van Riel, Peter Zijlstra, Andrew Morton, Trond Myklebust

Add a comment explaining the use of PG_private in the NFS client.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 fs/nfs/write.c |    5 +++++
 1 file changed, 5 insertions(+)

Index: linux-2.6/fs/nfs/write.c
===================================================================
--- linux-2.6.orig/fs/nfs/write.c
+++ linux-2.6/fs/nfs/write.c
@@ -424,6 +424,11 @@ static int nfs_inode_add_request(struct 
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
+	/*
+	 * The PG_private bit is unfortunately needed if we want to fix the
+	 * hole in the mmap semantics. If we do not set it, then the VM will
+	 * fail to call the "releasepage" address ops.
+	 */
 	SetPagePrivate(req->wb_page);
 	nfsi->npages++;
 	atomic_inc(&req->wb_count);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 6/6] nfs: Enable swap over NFS
  2006-08-25 15:37 ` Peter Zijlstra
@ 2006-08-25 15:38   ` Peter Zijlstra
  -1 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:38 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Andrew Morton, Peter Zijlstra, Rik van Riel, Trond Myklebust


Now that NFS can handle swap cache pages, add a swapfile method to allow
swapping over NFS.

NOTE: this dummy method is obviously not enough to make it safe.
A more complete version of the nfs_swapfile() function will be present
in the next VM deadlock avoidance patches.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 fs/nfs/file.c |    6 ++++++
 1 file changed, 6 insertions(+)

Index: linux-2.6/fs/nfs/file.c
===================================================================
--- linux-2.6.orig/fs/nfs/file.c
+++ linux-2.6/fs/nfs/file.c
@@ -315,6 +315,11 @@ static int nfs_release_page(struct page 
 	return !nfs_wb_page(page_file_mapping(page)->host, page);
 }
 
+static int nfs_swapfile(struct address_space *mapping, int enable)
+{
+	return 0;
+}
+
 const struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
@@ -328,6 +333,7 @@ const struct address_space_operations nf
 #ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
 #endif
+	.swapfile = nfs_swapfile,
 };
 
 /* 

^ permalink raw reply	[flat|nested] 36+ messages in thread

* [PATCH 6/6] nfs: Enable swap over NFS
@ 2006-08-25 15:38   ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 15:38 UTC (permalink / raw)
  To: linux-mm, linux-kernel
  Cc: Andrew Morton, Peter Zijlstra, Rik van Riel, Trond Myklebust

Now that NFS can handle swap cache pages, add a swapfile method to allow
swapping over NFS.

NOTE: this dummy method is obviously not enough to make it safe.
A more complete version of the nfs_swapfile() function will be present
in the next VM deadlock avoidance patches.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 fs/nfs/file.c |    6 ++++++
 1 file changed, 6 insertions(+)

Index: linux-2.6/fs/nfs/file.c
===================================================================
--- linux-2.6.orig/fs/nfs/file.c
+++ linux-2.6/fs/nfs/file.c
@@ -315,6 +315,11 @@ static int nfs_release_page(struct page 
 	return !nfs_wb_page(page_file_mapping(page)->host, page);
 }
 
+static int nfs_swapfile(struct address_space *mapping, int enable)
+{
+	return 0;
+}
+
 const struct address_space_operations nfs_file_aops = {
 	.readpage = nfs_readpage,
 	.readpages = nfs_readpages,
@@ -328,6 +333,7 @@ const struct address_space_operations nf
 #ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
 #endif
+	.swapfile = nfs_swapfile,
 };
 
 /* 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 7/6] Lost bits - fix PG_writeback vs PG_private race in NFS
  2006-08-25 15:37 ` Peter Zijlstra
@ 2006-08-25 16:36   ` Peter Zijlstra
  -1 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 16:36 UTC (permalink / raw)
  To: linux-mm
  Cc: linux-kernel, Andrew Morton, Rik van Riel, Trond Myklebust,
	Peter Zijlstra


Make sure we clear PG_writeback after we clear PG_private, otherwise
weird and wonderfull stuff will happen.

Also, teach try_to_release_page() about PG_swapcache pages.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 fs/buffer.c    |    2 +-
 fs/nfs/write.c |    5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -1567,7 +1567,7 @@ static void discard_buffer(struct buffer
  */
 int try_to_release_page(struct page *page, gfp_t gfp_mask)
 {
-	struct address_space * const mapping = page->mapping;
+	struct address_space * const mapping = page_mapping(page);
 
 	BUG_ON(!PageLocked(page));
 	if (PageWriteback(page))
Index: linux-2.6/fs/nfs/write.c
===================================================================
--- linux-2.6.orig/fs/nfs/write.c
+++ linux-2.6/fs/nfs/write.c
@@ -902,7 +902,6 @@ done:
 
 static void nfs_writepage_release(struct nfs_page *req)
 {
-	end_page_writeback(req->wb_page);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (!PageError(req->wb_page)) {
@@ -922,6 +921,7 @@ out:
 #else
 	nfs_inode_remove_request(req);
 #endif
+	end_page_writeback(req->wb_page);
 	nfs_clear_page_writeback(req);
 }
 
@@ -1222,12 +1222,10 @@ static void nfs_writeback_done_full(stru
 			ClearPageUptodate(page);
 			SetPageError(page);
 			req->wb_context->error = task->tk_status;
-			end_page_writeback(page);
 			nfs_inode_remove_request(req);
 			dprintk(", error = %d\n", task->tk_status);
 			goto next;
 		}
-		end_page_writeback(page);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) {
@@ -1242,6 +1240,7 @@ static void nfs_writeback_done_full(stru
 		nfs_inode_remove_request(req);
 #endif
 	next:
+		end_page_writeback(page);
 		nfs_clear_page_writeback(req);
 	}
 }



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 7/6] Lost bits - fix PG_writeback vs PG_private race in NFS
@ 2006-08-25 16:36   ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 16:36 UTC (permalink / raw)
  To: linux-mm
  Cc: linux-kernel, Andrew Morton, Rik van Riel, Trond Myklebust,
	Peter Zijlstra

Make sure we clear PG_writeback after we clear PG_private, otherwise
weird and wonderfull stuff will happen.

Also, teach try_to_release_page() about PG_swapcache pages.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 fs/buffer.c    |    2 +-
 fs/nfs/write.c |    5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -1567,7 +1567,7 @@ static void discard_buffer(struct buffer
  */
 int try_to_release_page(struct page *page, gfp_t gfp_mask)
 {
-	struct address_space * const mapping = page->mapping;
+	struct address_space * const mapping = page_mapping(page);
 
 	BUG_ON(!PageLocked(page));
 	if (PageWriteback(page))
Index: linux-2.6/fs/nfs/write.c
===================================================================
--- linux-2.6.orig/fs/nfs/write.c
+++ linux-2.6/fs/nfs/write.c
@@ -902,7 +902,6 @@ done:
 
 static void nfs_writepage_release(struct nfs_page *req)
 {
-	end_page_writeback(req->wb_page);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (!PageError(req->wb_page)) {
@@ -922,6 +921,7 @@ out:
 #else
 	nfs_inode_remove_request(req);
 #endif
+	end_page_writeback(req->wb_page);
 	nfs_clear_page_writeback(req);
 }
 
@@ -1222,12 +1222,10 @@ static void nfs_writeback_done_full(stru
 			ClearPageUptodate(page);
 			SetPageError(page);
 			req->wb_context->error = task->tk_status;
-			end_page_writeback(page);
 			nfs_inode_remove_request(req);
 			dprintk(", error = %d\n", task->tk_status);
 			goto next;
 		}
-		end_page_writeback(page);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) {
@@ -1242,6 +1240,7 @@ static void nfs_writeback_done_full(stru
 		nfs_inode_remove_request(req);
 #endif
 	next:
+		end_page_writeback(page);
 		nfs_clear_page_writeback(req);
 	}
 }


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] nfs: Teach NFS about swap cache pages
  2006-08-25 15:37   ` Peter Zijlstra
@ 2006-08-25 20:03     ` Trond Myklebust
  -1 siblings, 0 replies; 36+ messages in thread
From: Trond Myklebust @ 2006-08-25 20:03 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 17:37 +0200, Peter Zijlstra wrote:
> Teach the NFS client how to treat PG_swapcache pages.
> 
> Replace all occurences of page->index and page->mapping in the NFS client
> with the new page_file_index() and page_file_mapping() functions.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
>  fs/nfs/dir.c      |    4 ++--
>  fs/nfs/file.c     |    6 +++---
>  fs/nfs/pagelist.c |    8 ++++----
>  fs/nfs/read.c     |   10 +++++-----
>  fs/nfs/write.c    |   34 +++++++++++++++++-----------------
>  5 files changed, 31 insertions(+), 31 deletions(-)

<snip>

> @@ -821,7 +821,7 @@ int nfs_updatepage(struct file *file, st
>  		unsigned int offset, unsigned int count)
>  {
>  	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
> -	struct inode	*inode = page->mapping->host;
> +	struct inode	*inode = page_file_mapping(page)->host;
>  	struct nfs_page	*req;
>  	int		status = 0;
>  
> @@ -854,12 +854,12 @@ int nfs_updatepage(struct file *file, st
>  		offset = 0;
>  		if (unlikely(end_offs < 0)) {
>  			/* Do nothing */
> -		} else if (page->index == end_index) {
> +		} else if (page_file_index(page) == end_index) {

Is this necessary? When will we ever call nfs_updatepage() with a swap
page? AFAICS, the swap stuff always uses page dirtying and (ugh)
writepage().

>  			unsigned int pglen;
>  			pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
>  			if (count < pglen)
>  				count = pglen;
> -		} else if (page->index < end_index)
> +		} else if (page_file_index(page) < end_index)
>  			count = PAGE_CACHE_SIZE;
>  	}
>  
> Index: linux-2.6/fs/nfs/dir.c
> ===================================================================
> --- linux-2.6.orig/fs/nfs/dir.c
> +++ linux-2.6/fs/nfs/dir.c
> @@ -177,7 +177,7 @@ int nfs_readdir_filler(nfs_readdir_descr
>  
>  	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
>  			__FUNCTION__, (long long)desc->entry->cookie,
> -			page->index);
> +			page_file_index(page));
>  
>   again:
>  	timestamp = jiffies;
> @@ -201,7 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descr
>  	 * Note: assumes we have exclusive access to this mapping either
>  	 *	 through inode->i_mutex or some other mechanism.
>  	 */
> -	if (page->index == 0)
> +	if (page_file_index(page) == 0)
>  		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
>  	unlock_page(page);
>  	return 0;

Why are we worried about the possibility of NFS readdir pages being swap
pages?

Cheers,
  Trond


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] nfs: Teach NFS about swap cache pages
@ 2006-08-25 20:03     ` Trond Myklebust
  0 siblings, 0 replies; 36+ messages in thread
From: Trond Myklebust @ 2006-08-25 20:03 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 17:37 +0200, Peter Zijlstra wrote:
> Teach the NFS client how to treat PG_swapcache pages.
> 
> Replace all occurences of page->index and page->mapping in the NFS client
> with the new page_file_index() and page_file_mapping() functions.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> ---
>  fs/nfs/dir.c      |    4 ++--
>  fs/nfs/file.c     |    6 +++---
>  fs/nfs/pagelist.c |    8 ++++----
>  fs/nfs/read.c     |   10 +++++-----
>  fs/nfs/write.c    |   34 +++++++++++++++++-----------------
>  5 files changed, 31 insertions(+), 31 deletions(-)

<snip>

> @@ -821,7 +821,7 @@ int nfs_updatepage(struct file *file, st
>  		unsigned int offset, unsigned int count)
>  {
>  	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
> -	struct inode	*inode = page->mapping->host;
> +	struct inode	*inode = page_file_mapping(page)->host;
>  	struct nfs_page	*req;
>  	int		status = 0;
>  
> @@ -854,12 +854,12 @@ int nfs_updatepage(struct file *file, st
>  		offset = 0;
>  		if (unlikely(end_offs < 0)) {
>  			/* Do nothing */
> -		} else if (page->index == end_index) {
> +		} else if (page_file_index(page) == end_index) {

Is this necessary? When will we ever call nfs_updatepage() with a swap
page? AFAICS, the swap stuff always uses page dirtying and (ugh)
writepage().

>  			unsigned int pglen;
>  			pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
>  			if (count < pglen)
>  				count = pglen;
> -		} else if (page->index < end_index)
> +		} else if (page_file_index(page) < end_index)
>  			count = PAGE_CACHE_SIZE;
>  	}
>  
> Index: linux-2.6/fs/nfs/dir.c
> ===================================================================
> --- linux-2.6.orig/fs/nfs/dir.c
> +++ linux-2.6/fs/nfs/dir.c
> @@ -177,7 +177,7 @@ int nfs_readdir_filler(nfs_readdir_descr
>  
>  	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
>  			__FUNCTION__, (long long)desc->entry->cookie,
> -			page->index);
> +			page_file_index(page));
>  
>   again:
>  	timestamp = jiffies;
> @@ -201,7 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descr
>  	 * Note: assumes we have exclusive access to this mapping either
>  	 *	 through inode->i_mutex or some other mechanism.
>  	 */
> -	if (page->index == 0)
> +	if (page_file_index(page) == 0)
>  		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
>  	unlock_page(page);
>  	return 0;

Why are we worried about the possibility of NFS readdir pages being swap
pages?

Cheers,
  Trond

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 7/6] Lost bits - fix PG_writeback vs PG_private race in NFS
  2006-08-25 16:36   ` Peter Zijlstra
@ 2006-08-25 20:11     ` Trond Myklebust
  -1 siblings, 0 replies; 36+ messages in thread
From: Trond Myklebust @ 2006-08-25 20:11 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 18:36 +0200, Peter Zijlstra wrote:
> Make sure we clear PG_writeback after we clear PG_private, otherwise
> weird and wonderfull stuff will happen.
> 
NACK.

Look carefully at the case of unstable writes: your patch does nothing
to guarantee that PG_writeback is cleared after PG_private for that
case.
Anyhow, you don't explain exactly what is wrong with clearing
PG_writeback before PG_private.

Cheers,
  Trond


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 7/6] Lost bits - fix PG_writeback vs PG_private race in NFS
@ 2006-08-25 20:11     ` Trond Myklebust
  0 siblings, 0 replies; 36+ messages in thread
From: Trond Myklebust @ 2006-08-25 20:11 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 18:36 +0200, Peter Zijlstra wrote:
> Make sure we clear PG_writeback after we clear PG_private, otherwise
> weird and wonderfull stuff will happen.
> 
NACK.

Look carefully at the case of unstable writes: your patch does nothing
to guarantee that PG_writeback is cleared after PG_private for that
case.
Anyhow, you don't explain exactly what is wrong with clearing
PG_writeback before PG_private.

Cheers,
  Trond

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] nfs: Teach NFS about swap cache pages
  2006-08-25 20:03     ` Trond Myklebust
@ 2006-08-25 20:20       ` Peter Zijlstra
  -1 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 20:20 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 16:03 -0400, Trond Myklebust wrote:
> On Fri, 2006-08-25 at 17:37 +0200, Peter Zijlstra wrote:
> > Teach the NFS client how to treat PG_swapcache pages.
> > 
> > Replace all occurences of page->index and page->mapping in the NFS client
> > with the new page_file_index() and page_file_mapping() functions.
> > 
> > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> > ---
> >  fs/nfs/dir.c      |    4 ++--
> >  fs/nfs/file.c     |    6 +++---
> >  fs/nfs/pagelist.c |    8 ++++----
> >  fs/nfs/read.c     |   10 +++++-----
> >  fs/nfs/write.c    |   34 +++++++++++++++++-----------------
> >  5 files changed, 31 insertions(+), 31 deletions(-)
> 
> <snip>
> 
> > @@ -821,7 +821,7 @@ int nfs_updatepage(struct file *file, st
> >  		unsigned int offset, unsigned int count)
> >  {
> >  	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
> > -	struct inode	*inode = page->mapping->host;
> > +	struct inode	*inode = page_file_mapping(page)->host;
> >  	struct nfs_page	*req;
> >  	int		status = 0;
> >  
> > @@ -854,12 +854,12 @@ int nfs_updatepage(struct file *file, st
> >  		offset = 0;
> >  		if (unlikely(end_offs < 0)) {
> >  			/* Do nothing */
> > -		} else if (page->index == end_index) {
> > +		} else if (page_file_index(page) == end_index) {
> 
> Is this necessary? When will we ever call nfs_updatepage() with a swap
> page? AFAICS, the swap stuff always uses page dirtying and (ugh)
> writepage().

Yes, swap uses writepage(), Nikita Danilov had a patch that did cluster
pageout using writepages(), however that tended to deadlock even on
local disk.

> >  			unsigned int pglen;
> >  			pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
> >  			if (count < pglen)
> >  				count = pglen;
> > -		} else if (page->index < end_index)
> > +		} else if (page_file_index(page) < end_index)
> >  			count = PAGE_CACHE_SIZE;
> >  	}
> >  
> > Index: linux-2.6/fs/nfs/dir.c
> > ===================================================================
> > --- linux-2.6.orig/fs/nfs/dir.c
> > +++ linux-2.6/fs/nfs/dir.c
> > @@ -177,7 +177,7 @@ int nfs_readdir_filler(nfs_readdir_descr
> >  
> >  	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
> >  			__FUNCTION__, (long long)desc->entry->cookie,
> > -			page->index);
> > +			page_file_index(page));
> >  
> >   again:
> >  	timestamp = jiffies;
> > @@ -201,7 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descr
> >  	 * Note: assumes we have exclusive access to this mapping either
> >  	 *	 through inode->i_mutex or some other mechanism.
> >  	 */
> > -	if (page->index == 0)
> > +	if (page_file_index(page) == 0)
> >  		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
> >  	unlock_page(page);
> >  	return 0;
> 
> Why are we worried about the possibility of NFS readdir pages being swap
> pages?

Indiscriminate search and replace followed by a manual check for
correctness. They might not be needed, but they're not wrong either.

Would you prefer I take them out?


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] nfs: Teach NFS about swap cache pages
@ 2006-08-25 20:20       ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 20:20 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 16:03 -0400, Trond Myklebust wrote:
> On Fri, 2006-08-25 at 17:37 +0200, Peter Zijlstra wrote:
> > Teach the NFS client how to treat PG_swapcache pages.
> > 
> > Replace all occurences of page->index and page->mapping in the NFS client
> > with the new page_file_index() and page_file_mapping() functions.
> > 
> > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> > ---
> >  fs/nfs/dir.c      |    4 ++--
> >  fs/nfs/file.c     |    6 +++---
> >  fs/nfs/pagelist.c |    8 ++++----
> >  fs/nfs/read.c     |   10 +++++-----
> >  fs/nfs/write.c    |   34 +++++++++++++++++-----------------
> >  5 files changed, 31 insertions(+), 31 deletions(-)
> 
> <snip>
> 
> > @@ -821,7 +821,7 @@ int nfs_updatepage(struct file *file, st
> >  		unsigned int offset, unsigned int count)
> >  {
> >  	struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
> > -	struct inode	*inode = page->mapping->host;
> > +	struct inode	*inode = page_file_mapping(page)->host;
> >  	struct nfs_page	*req;
> >  	int		status = 0;
> >  
> > @@ -854,12 +854,12 @@ int nfs_updatepage(struct file *file, st
> >  		offset = 0;
> >  		if (unlikely(end_offs < 0)) {
> >  			/* Do nothing */
> > -		} else if (page->index == end_index) {
> > +		} else if (page_file_index(page) == end_index) {
> 
> Is this necessary? When will we ever call nfs_updatepage() with a swap
> page? AFAICS, the swap stuff always uses page dirtying and (ugh)
> writepage().

Yes, swap uses writepage(), Nikita Danilov had a patch that did cluster
pageout using writepages(), however that tended to deadlock even on
local disk.

> >  			unsigned int pglen;
> >  			pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
> >  			if (count < pglen)
> >  				count = pglen;
> > -		} else if (page->index < end_index)
> > +		} else if (page_file_index(page) < end_index)
> >  			count = PAGE_CACHE_SIZE;
> >  	}
> >  
> > Index: linux-2.6/fs/nfs/dir.c
> > ===================================================================
> > --- linux-2.6.orig/fs/nfs/dir.c
> > +++ linux-2.6/fs/nfs/dir.c
> > @@ -177,7 +177,7 @@ int nfs_readdir_filler(nfs_readdir_descr
> >  
> >  	dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
> >  			__FUNCTION__, (long long)desc->entry->cookie,
> > -			page->index);
> > +			page_file_index(page));
> >  
> >   again:
> >  	timestamp = jiffies;
> > @@ -201,7 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descr
> >  	 * Note: assumes we have exclusive access to this mapping either
> >  	 *	 through inode->i_mutex or some other mechanism.
> >  	 */
> > -	if (page->index == 0)
> > +	if (page_file_index(page) == 0)
> >  		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
> >  	unlock_page(page);
> >  	return 0;
> 
> Why are we worried about the possibility of NFS readdir pages being swap
> pages?

Indiscriminate search and replace followed by a manual check for
correctness. They might not be needed, but they're not wrong either.

Would you prefer I take them out?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] nfs: Teach NFS about swap cache pages
  2006-08-25 20:20       ` Peter Zijlstra
@ 2006-08-25 20:37         ` Trond Myklebust
  -1 siblings, 0 replies; 36+ messages in thread
From: Trond Myklebust @ 2006-08-25 20:37 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 22:20 +0200, Peter Zijlstra wrote:
> Indiscriminate search and replace followed by a manual check for
> correctness. They might not be needed, but they're not wrong either.
> 
> Would you prefer I take them out?

It won't give us any massive performance optimisations, but it is nice
to be able to avoid that call to test_bit() whenever possible.

Cheers,
  Trond


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 4/6] nfs: Teach NFS about swap cache pages
@ 2006-08-25 20:37         ` Trond Myklebust
  0 siblings, 0 replies; 36+ messages in thread
From: Trond Myklebust @ 2006-08-25 20:37 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 22:20 +0200, Peter Zijlstra wrote:
> Indiscriminate search and replace followed by a manual check for
> correctness. They might not be needed, but they're not wrong either.
> 
> Would you prefer I take them out?

It won't give us any massive performance optimisations, but it is nice
to be able to avoid that call to test_bit() whenever possible.

Cheers,
  Trond

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 7/6] Lost bits - fix PG_writeback vs PG_private race in NFS
  2006-08-25 20:11     ` Trond Myklebust
@ 2006-08-25 20:44       ` Peter Zijlstra
  -1 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 20:44 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 16:11 -0400, Trond Myklebust wrote:
> On Fri, 2006-08-25 at 18:36 +0200, Peter Zijlstra wrote:
> > Make sure we clear PG_writeback after we clear PG_private, otherwise
> > weird and wonderfull stuff will happen.
> > 
> NACK.
> 
> Look carefully at the case of unstable writes: your patch does nothing
> to guarantee that PG_writeback is cleared after PG_private for that
> case.

Ah, right. Thanks for pointing this out.

> Anyhow, you don't explain exactly what is wrong with clearing
> PG_writeback before PG_private.

Yes, this was a rather hasty patch, I was mortified to find that I
missed a few changes and my patch-set would crash instantly someone
would try it.

The VM doesn't really like PG_private set on PG_swapcache pages, I guess
I'll have to rectify that and leave the NFS behaviour as is.

Will correct this in the next round.

Thanks for the feedback,

Peter


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 7/6] Lost bits - fix PG_writeback vs PG_private race in NFS
@ 2006-08-25 20:44       ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-25 20:44 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 16:11 -0400, Trond Myklebust wrote:
> On Fri, 2006-08-25 at 18:36 +0200, Peter Zijlstra wrote:
> > Make sure we clear PG_writeback after we clear PG_private, otherwise
> > weird and wonderfull stuff will happen.
> > 
> NACK.
> 
> Look carefully at the case of unstable writes: your patch does nothing
> to guarantee that PG_writeback is cleared after PG_private for that
> case.

Ah, right. Thanks for pointing this out.

> Anyhow, you don't explain exactly what is wrong with clearing
> PG_writeback before PG_private.

Yes, this was a rather hasty patch, I was mortified to find that I
missed a few changes and my patch-set would crash instantly someone
would try it.

The VM doesn't really like PG_private set on PG_swapcache pages, I guess
I'll have to rectify that and leave the NFS behaviour as is.

Will correct this in the next round.

Thanks for the feedback,

Peter

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 7/6] Lost bits - fix PG_writeback vs PG_private race in NFS
  2006-08-25 20:44       ` Peter Zijlstra
@ 2006-08-25 21:19         ` Trond Myklebust
  -1 siblings, 0 replies; 36+ messages in thread
From: Trond Myklebust @ 2006-08-25 21:19 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 22:44 +0200, Peter Zijlstra wrote:
> The VM doesn't really like PG_private set on PG_swapcache pages, I guess
> I'll have to rectify that and leave the NFS behaviour as is.

You might want to consider disabling NFS data cache revalidation on swap
files since it doesn't really make sense to have other clients change
the file while you are using it.

If you do, you could also skip setting PG_private on swap pages, since
there ought to be no further races with invalidate_inode_pages2() to
deal with.

Cheers,
  Trond


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 7/6] Lost bits - fix PG_writeback vs PG_private race in NFS
@ 2006-08-25 21:19         ` Trond Myklebust
  0 siblings, 0 replies; 36+ messages in thread
From: Trond Myklebust @ 2006-08-25 21:19 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel

On Fri, 2006-08-25 at 22:44 +0200, Peter Zijlstra wrote:
> The VM doesn't really like PG_private set on PG_swapcache pages, I guess
> I'll have to rectify that and leave the NFS behaviour as is.

You might want to consider disabling NFS data cache revalidation on swap
files since it doesn't really make sense to have other clients change
the file while you are using it.

If you do, you could also skip setting PG_private on swap pages, since
there ought to be no further races with invalidate_inode_pages2() to
deal with.

Cheers,
  Trond

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 6/6] nfs: Enable swap over NFS
  2006-08-25 15:38   ` Peter Zijlstra
@ 2006-08-26 14:36     ` Pavel Machek
  -1 siblings, 0 replies; 36+ messages in thread
From: Pavel Machek @ 2006-08-26 14:36 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel, Trond Myklebust

Hi!

> Now that NFS can handle swap cache pages, add a swapfile method to allow
> swapping over NFS.
> 
> NOTE: this dummy method is obviously not enough to make it safe.
> A more complete version of the nfs_swapfile() function will be present
> in the next VM deadlock avoidance patches.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

We probably do not want to enable functionality before it is safe...

Also swsusp interactions will be interesting. (Rafael is working on
swapfile support these days).
						Pavel
-- 
Thanks for all the (sleeping) penguins.

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 6/6] nfs: Enable swap over NFS
@ 2006-08-26 14:36     ` Pavel Machek
  0 siblings, 0 replies; 36+ messages in thread
From: Pavel Machek @ 2006-08-26 14:36 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel, Trond Myklebust

Hi!

> Now that NFS can handle swap cache pages, add a swapfile method to allow
> swapping over NFS.
> 
> NOTE: this dummy method is obviously not enough to make it safe.
> A more complete version of the nfs_swapfile() function will be present
> in the next VM deadlock avoidance patches.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

We probably do not want to enable functionality before it is safe...

Also swsusp interactions will be interesting. (Rafael is working on
swapfile support these days).
						Pavel
-- 
Thanks for all the (sleeping) penguins.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 6/6] nfs: Enable swap over NFS
  2006-08-26 14:36     ` Pavel Machek
@ 2006-08-26 17:30       ` Peter Zijlstra
  -1 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-26 17:30 UTC (permalink / raw)
  To: Pavel Machek
  Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel, Trond Myklebust

On Sat, 2006-08-26 at 14:36 +0000, Pavel Machek wrote:
> Hi!
> 
> > Now that NFS can handle swap cache pages, add a swapfile method to allow
> > swapping over NFS.
> > 
> > NOTE: this dummy method is obviously not enough to make it safe.
> > A more complete version of the nfs_swapfile() function will be present
> > in the next VM deadlock avoidance patches.
> > 
> > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> 
> We probably do not want to enable functionality before it is safe...

:-), probably not no, but some ppl might want to live on the edge.

> Also swsusp interactions will be interesting. (Rafael is working on
> swapfile support these days).

Yes, I've considered this, and this was one of the motivators to keep
the functionality under its own config option, so that it might be
mutually exclusive with swsusp to swapfile.



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 6/6] nfs: Enable swap over NFS
@ 2006-08-26 17:30       ` Peter Zijlstra
  0 siblings, 0 replies; 36+ messages in thread
From: Peter Zijlstra @ 2006-08-26 17:30 UTC (permalink / raw)
  To: Pavel Machek
  Cc: linux-mm, linux-kernel, Andrew Morton, Rik van Riel, Trond Myklebust

On Sat, 2006-08-26 at 14:36 +0000, Pavel Machek wrote:
> Hi!
> 
> > Now that NFS can handle swap cache pages, add a swapfile method to allow
> > swapping over NFS.
> > 
> > NOTE: this dummy method is obviously not enough to make it safe.
> > A more complete version of the nfs_swapfile() function will be present
> > in the next VM deadlock avoidance patches.
> > 
> > Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> 
> We probably do not want to enable functionality before it is safe...

:-), probably not no, but some ppl might want to live on the edge.

> Also swsusp interactions will be interesting. (Rafael is working on
> swapfile support these days).

Yes, I've considered this, and this was one of the motivators to keep
the functionality under its own config option, so that it might be
mutually exclusive with swsusp to swapfile.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 6/6] nfs: Enable swap over NFS
  2006-08-26 14:36     ` Pavel Machek
@ 2006-08-26 21:54       ` Rik van Riel
  -1 siblings, 0 replies; 36+ messages in thread
From: Rik van Riel @ 2006-08-26 21:54 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Peter Zijlstra, linux-mm, linux-kernel, Andrew Morton, Trond Myklebust

Pavel Machek wrote:
> Hi!
> 
>> Now that NFS can handle swap cache pages, add a swapfile method to allow
>> swapping over NFS.
>>
>> NOTE: this dummy method is obviously not enough to make it safe.
>> A more complete version of the nfs_swapfile() function will be present
>> in the next VM deadlock avoidance patches.
>>
>> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> 
> We probably do not want to enable functionality before it is safe...

OTOH, if we never enable this, what motivation do we have to
make it safe? :)

Scratching an itch works, so maybe we ought to create an itch?

-- 
What is important?  What you want to be true, or what is true?

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 6/6] nfs: Enable swap over NFS
@ 2006-08-26 21:54       ` Rik van Riel
  0 siblings, 0 replies; 36+ messages in thread
From: Rik van Riel @ 2006-08-26 21:54 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Peter Zijlstra, linux-mm, linux-kernel, Andrew Morton, Trond Myklebust

Pavel Machek wrote:
> Hi!
> 
>> Now that NFS can handle swap cache pages, add a swapfile method to allow
>> swapping over NFS.
>>
>> NOTE: this dummy method is obviously not enough to make it safe.
>> A more complete version of the nfs_swapfile() function will be present
>> in the next VM deadlock avoidance patches.
>>
>> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> 
> We probably do not want to enable functionality before it is safe...

OTOH, if we never enable this, what motivation do we have to
make it safe? :)

Scratching an itch works, so maybe we ought to create an itch?

-- 
What is important?  What you want to be true, or what is true?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] uml: arch/um remove_mapping() clash
  2006-08-25 15:37   ` Peter Zijlstra
@ 2006-08-29 20:20     ` Jeff Dike
  -1 siblings, 0 replies; 36+ messages in thread
From: Jeff Dike @ 2006-08-29 20:20 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-mm, linux-kernel, Rik van Riel, Andrew Morton, Trond Myklebust

On Fri, Aug 25, 2006 at 05:37:40PM +0200, Peter Zijlstra wrote:
> Now that 'include/linux/mm.h' includes 'include/linux/swap.h', the global
> remove_mapping() definition clashes with the arch/um one.
> 
> Rename the arch/um one.

If you tested the UML build -

Acked-by: Jeff Dike <jdike@addtoit.com>

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH 3/6] uml: arch/um remove_mapping() clash
@ 2006-08-29 20:20     ` Jeff Dike
  0 siblings, 0 replies; 36+ messages in thread
From: Jeff Dike @ 2006-08-29 20:20 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-mm, linux-kernel, Rik van Riel, Andrew Morton, Trond Myklebust

On Fri, Aug 25, 2006 at 05:37:40PM +0200, Peter Zijlstra wrote:
> Now that 'include/linux/mm.h' includes 'include/linux/swap.h', the global
> remove_mapping() definition clashes with the arch/um one.
> 
> Rename the arch/um one.

If you tested the UML build -

Acked-by: Jeff Dike <jdike@addtoit.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2006-08-29 20:21 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-08-25 15:37 [PATCH 0/6] Swap over NFS Peter Zijlstra
2006-08-25 15:37 ` Peter Zijlstra
2006-08-25 15:37 ` [PATCH 1/6] mm: Generic swap file support Peter Zijlstra
2006-08-25 15:37   ` Peter Zijlstra
2006-08-25 15:37 ` [PATCH 2/6] mm: New page_file_* methods Peter Zijlstra
2006-08-25 15:37   ` Peter Zijlstra
2006-08-25 15:37 ` [PATCH 3/6] uml: arch/um remove_mapping() clash Peter Zijlstra
2006-08-25 15:37   ` Peter Zijlstra
2006-08-29 20:20   ` Jeff Dike
2006-08-29 20:20     ` Jeff Dike
2006-08-25 15:37 ` [PATCH 4/6] nfs: Teach NFS about swap cache pages Peter Zijlstra
2006-08-25 15:37   ` Peter Zijlstra
2006-08-25 20:03   ` Trond Myklebust
2006-08-25 20:03     ` Trond Myklebust
2006-08-25 20:20     ` Peter Zijlstra
2006-08-25 20:20       ` Peter Zijlstra
2006-08-25 20:37       ` Trond Myklebust
2006-08-25 20:37         ` Trond Myklebust
2006-08-25 15:38 ` [PATCH 5/6] nfs: Add comment on PG_private use of NFS Peter Zijlstra
2006-08-25 15:38   ` Peter Zijlstra
2006-08-25 15:38 ` [PATCH 6/6] nfs: Enable swap over NFS Peter Zijlstra
2006-08-25 15:38   ` Peter Zijlstra
2006-08-26 14:36   ` Pavel Machek
2006-08-26 14:36     ` Pavel Machek
2006-08-26 17:30     ` Peter Zijlstra
2006-08-26 17:30       ` Peter Zijlstra
2006-08-26 21:54     ` Rik van Riel
2006-08-26 21:54       ` Rik van Riel
2006-08-25 16:36 ` [PATCH 7/6] Lost bits - fix PG_writeback vs PG_private race in NFS Peter Zijlstra
2006-08-25 16:36   ` Peter Zijlstra
2006-08-25 20:11   ` Trond Myklebust
2006-08-25 20:11     ` Trond Myklebust
2006-08-25 20:44     ` Peter Zijlstra
2006-08-25 20:44       ` Peter Zijlstra
2006-08-25 21:19       ` Trond Myklebust
2006-08-25 21:19         ` Trond Myklebust

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.