linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] first try for swap prefetch
@ 2003-04-10 17:47 Thomas Schlichter
  2003-04-10 23:18 ` Andrew Morton
  0 siblings, 1 reply; 12+ messages in thread
From: Thomas Schlichter @ 2003-04-10 17:47 UTC (permalink / raw)
  To: linux-kernel


[-- Attachment #1.1: body text --]
[-- Type: text/plain, Size: 892 bytes --]

Hi,

as mentioned a few days ago I was going to try to implement a swap prefetch to 
better utilize the free memory. Now here is my first try. This version works 
only as a module and tests for free pagecahe memory in a interval specified 
as a module parameter.

As I tested this I saw that many of the page reloads do not come from the swap 
space but from buffers that got moved away. I could easily save which buffers 
have been removed but I don't know how to read them back to the pagecache...

An other thing I saw was that anywhere in the kernel there must be some code 
which always tries to hold some memory pages free, even if there are cached 
pages that just can be freed as they are not modified... Perhaps that code 
should be changed...

I hope someone may give me some hints or show me obvious mistakes I made... 
;-)

Thank you!

Best regards
   Thomas

[-- Attachment #1.2: swap_prefetch.diff --]
[-- Type: text/x-diff, Size: 8914 bytes --]

diff -urP linux-2.5.67/arch/i386/Kconfig linux-2.5.67_patched/arch/i386/Kconfig
--- linux-2.5.67/arch/i386/Kconfig	Mon Apr  7 19:30:43 2003
+++ linux-2.5.67_patched/arch/i386/Kconfig	Thu Apr 10 17:47:36 2003
@@ -373,6 +373,13 @@
 	depends on MK8 || MPENTIUM4
 	default y
 
+config SWAP_PREFETCH
+	tristate "Prefetch swapped memory"
+	depends on SWAP
+	help
+	  This option enables the kernel to prefetch swapped memory pages
+	  when idle.
+
 config HUGETLB_PAGE
 	bool "Huge TLB Page Support"
 	help
diff -urP linux-2.5.67/include/linux/swap.h linux-2.5.67_patched/include/linux/swap.h
--- linux-2.5.67/include/linux/swap.h	Mon Apr  7 19:30:35 2003
+++ linux-2.5.67_patched/include/linux/swap.h	Thu Apr 10 18:36:33 2003
@@ -155,6 +155,8 @@
 extern unsigned int nr_free_pages_pgdat(pg_data_t *pgdat);
 extern unsigned int nr_free_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
+extern unsigned int nr_avail_buffer_pages(void);
+extern unsigned int nr_avail_pagecache_pages(void);
 
 /* linux/mm/swap.c */
 extern void FASTCALL(lru_cache_add(struct page *));
diff -urP linux-2.5.67/include/linux/swap_prefetch.h linux-2.5.67_patched/include/linux/swap_prefetch.h
--- linux-2.5.67/include/linux/swap_prefetch.h	Thu Jan  1 01:00:00 1970
+++ linux-2.5.67_patched/include/linux/swap_prefetch.h	Thu Apr 10 18:36:40 2003
@@ -0,0 +1,52 @@
+#ifndef _LINUX_SWAP_PREFETCH_H
+#define _LINUX_SWAP_PREFETCH_H
+
+#include <linux/swap.h>
+#include <linux/radix-tree.h>
+
+struct swapped_entry_t {
+	struct list_head list;
+	swp_entry_t entry;
+};
+
+struct swapped_root_t {
+	spinlock_t lock;
+	struct list_head list;
+	struct radix_tree_root tree;
+};
+
+extern struct swapped_root_t swapped_root;
+
+static inline void add_to_swapped_list(swp_entry_t entry)
+{
+	struct swapped_entry_t * swapped_entry;
+	int ret;
+
+	swapped_entry = kmalloc(sizeof(*swapped_entry), GFP_ATOMIC);
+	if(swapped_entry) {
+		swapped_entry->entry = entry;
+		spin_lock(&swapped_root.lock);
+		ret = radix_tree_insert(&swapped_root.tree, entry.val, swapped_entry);
+		if(ret == 0)
+			list_add(&swapped_entry->list, &swapped_root.list);
+		else
+			kfree(swapped_entry);
+		spin_unlock(&swapped_root.lock);
+	}
+}
+
+static inline void remove_from_swapped_list(swp_entry_t entry)
+{
+	struct swapped_entry_t * swapped_entry;
+
+	spin_lock(&swapped_root.lock);
+	swapped_entry = radix_tree_lookup(&swapped_root.tree, entry.val);
+	if(swapped_entry) {
+		list_del(&swapped_entry->list);
+		radix_tree_delete(&swapped_root.tree, entry.val);
+		kfree(swapped_entry);
+	}
+	spin_unlock(&swapped_root.lock);
+}
+
+#endif /* _LINUX_SWAP_PREFETCH_H */
diff -urP linux-2.5.67/kernel/ksyms.c linux-2.5.67_patched/kernel/ksyms.c
--- linux-2.5.67/kernel/ksyms.c	Mon Apr  7 19:30:34 2003
+++ linux-2.5.67_patched/kernel/ksyms.c	Thu Apr 10 17:47:36 2003
@@ -58,6 +58,7 @@
 #include <linux/ptrace.h>
 #include <linux/time.h>
 #include <linux/backing-dev.h>
+#include <linux/swap_prefetch.h>
 #include <asm/checksum.h>
 
 #if defined(CONFIG_PROC_FS)
@@ -70,6 +71,11 @@
 extern struct timezone sys_tz;
 
 extern int panic_timeout;
+
+/* needed for swap prefetch support */
+EXPORT_SYMBOL(swapped_root);
+EXPORT_SYMBOL(nr_avail_pagecache_pages);
+EXPORT_SYMBOL(read_swap_cache_async);
 
 /* process memory management */
 EXPORT_SYMBOL(do_mmap_pgoff);
diff -urP linux-2.5.67/mm/Makefile linux-2.5.67_patched/mm/Makefile
--- linux-2.5.67/mm/Makefile	Mon Apr  7 19:31:52 2003
+++ linux-2.5.67_patched/mm/Makefile	Thu Apr 10 17:47:36 2003
@@ -12,3 +12,5 @@
 			   slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
+
+obj-$(CONFIG_SWAP_PREFETCH)	+= swap_prefetch.o
diff -urP linux-2.5.67/mm/page_alloc.c linux-2.5.67_patched/mm/page_alloc.c
--- linux-2.5.67/mm/page_alloc.c	Mon Apr  7 19:30:39 2003
+++ linux-2.5.67_patched/mm/page_alloc.c	Thu Apr 10 17:47:36 2003
@@ -787,6 +787,48 @@
 }
 #endif
 
+static unsigned int nr_avail_zone_pages(int offset)
+{
+	pg_data_t *pgdat;
+	unsigned long avail = 0;
+
+	for_each_pgdat(pgdat) {
+		struct zonelist *zonelist = pgdat->node_zonelists + offset;
+		struct zone **zonep = zonelist->zones;
+		struct zone *zone;
+		unsigned long low = 0;
+
+		for (zone = *zonep++; zone; zone = *zonep++) {
+			unsigned long local_free = zone->free_pages;
+			unsigned long local_low  = zone->pages_low;
+			
+			low += local_low;
+			if (local_free > low) {
+				avail = max(avail, local_free - low);
+			}
+			low += local_low * sysctl_lower_zone_protection;
+		}
+	}
+
+	return avail;
+}
+
+/*
+ * Amount of available RAM allocatable within ZONE_DMA and ZONE_NORMAL
+ */
+unsigned int nr_avail_buffer_pages(void)
+{
+	return nr_avail_zone_pages(GFP_USER & GFP_ZONEMASK);
+}
+
+/*
+ * Amount of available RAM allocatable within all zones
+ */
+unsigned int nr_avail_pagecache_pages(void)
+{
+	return nr_avail_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
+}
+
 #ifdef CONFIG_NUMA
 static void show_node(struct zone *zone)
 {
diff -urP linux-2.5.67/mm/swap_prefetch.c linux-2.5.67_patched/mm/swap_prefetch.c
--- linux-2.5.67/mm/swap_prefetch.c	Thu Jan  1 01:00:00 1970
+++ linux-2.5.67_patched/mm/swap_prefetch.c	Thu Apr 10 18:33:06 2003
@@ -0,0 +1,79 @@
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/swap_prefetch.h>
+
+#define INTERVAL	60		/* (secs) Default is 1 minute */
+
+static int interval       = INTERVAL;
+
+MODULE_PARM(interval,"i");
+MODULE_PARM_DESC(interval,
+	"delay in seconds to wait between memory checks (default 60)");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Schlichter <thomas.schlichter@web.de>");
+MODULE_DESCRIPTION("prefetches swap pages when there is free memory");
+
+/*
+ *	Our timer
+ */
+static void prefetch_timer_handler(unsigned long data);
+static struct timer_list prefetch_timer =
+		TIMER_INITIALIZER(prefetch_timer_handler, 0, 0);
+
+/*
+ *	Our work
+ */
+static void prefetch_work_handler(void *data);
+static DECLARE_WORK(prefetch_work, prefetch_work_handler, 0);
+
+/*
+ *	If the timer expires..
+ */
+static void prefetch_timer_handler(unsigned long data)
+{
+	schedule_work(&prefetch_work);
+	prefetch_timer.expires = jiffies + interval * HZ;
+	add_timer(&prefetch_timer);
+}
+
+/*
+ *	..do the work
+ */
+static void prefetch_work_handler(void *data)
+{
+	printk(KERN_INFO "Available pages before: %d\n", nr_avail_pagecache_pages());
+
+	while(nr_avail_pagecache_pages() != 0) {
+		struct swapped_entry_t *swapped_entry;
+		swp_entry_t entry;
+
+		spin_lock(&swapped_root.lock);
+		if(list_empty(&swapped_root.list)) {
+			spin_unlock(&swapped_root.lock);
+			break;
+		}
+		swapped_entry = list_entry(swapped_root.list.next,
+					struct swapped_entry_t, list);
+		entry = swapped_entry->entry;
+		spin_unlock(&swapped_root.lock);
+
+		read_swap_cache_async(entry);
+	}
+
+	printk(KERN_INFO "Available pages after: %d\n", nr_avail_pagecache_pages());
+}
+
+static int __init prefetch_init(void)
+{
+	prefetch_timer_handler(0);
+	return 0;
+}
+
+static void __exit prefetch_exit(void)
+{
+	del_timer(&prefetch_timer);
+}
+
+module_init(prefetch_init);
+module_exit(prefetch_exit);
diff -urP linux-2.5.67/mm/swap_state.c linux-2.5.67_patched/mm/swap_state.c
--- linux-2.5.67/mm/swap_state.c	Mon Apr  7 19:31:11 2003
+++ linux-2.5.67_patched/mm/swap_state.c	Thu Apr 10 18:32:32 2003
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>	/* block_sync_page() */
+#include <linux/swap_prefetch.h>
 
 #include <asm/pgtable.h>
 
@@ -49,6 +50,12 @@
 	.private_list	= LIST_HEAD_INIT(swapper_space.private_list),
 };
 
+struct swapped_root_t swapped_root = {
+	.lock = SPIN_LOCK_UNLOCKED,
+	.list = LIST_HEAD_INIT(swapped_root.list),
+	.tree = RADIX_TREE_INIT(GFP_ATOMIC),
+};
+
 #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
 
 static struct {
@@ -344,6 +351,8 @@
 {
 	struct page *found_page, *new_page = NULL;
 	int err;
+
+	remove_from_swapped_list(entry);
 
 	do {
 		/*
diff -urP linux-2.5.67/mm/vmscan.c linux-2.5.67_patched/mm/vmscan.c
--- linux-2.5.67/mm/vmscan.c	Mon Apr  7 19:30:43 2003
+++ linux-2.5.67_patched/mm/vmscan.c	Thu Apr 10 18:31:02 2003
@@ -27,6 +27,7 @@
 #include <linux/pagevec.h>
 #include <linux/backing-dev.h>
 #include <linux/rmap-locking.h>
+#include <linux/swap_prefetch.h>
 
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
@@ -402,6 +403,7 @@
 			swp_entry_t swap = { .val = page->index };
 			__delete_from_swap_cache(page);
 			write_unlock(&mapping->page_lock);
+			add_to_swapped_list(swap);
 			swap_free(swap);
 			__put_page(page);	/* The pagecache ref */
 			goto free_it;

[-- Attachment #2: signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] first try for swap prefetch
  2003-04-10 17:47 [RFC] first try for swap prefetch Thomas Schlichter
@ 2003-04-10 23:18 ` Andrew Morton
  2003-04-11 11:51   ` Thomas Schlichter
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2003-04-10 23:18 UTC (permalink / raw)
  To: Thomas Schlichter; +Cc: linux-kernel

Thomas Schlichter <schlicht@uni-mannheim.de> wrote:
>
> Hi,
> 
> as mentioned a few days ago I was going to try to implement a swap prefetch to 
> better utilize the free memory. Now here is my first try.

That's surprisingly cute.  Does it actually do anything noticeable?

+	swapped_entry = kmalloc(sizeof(*swapped_entry), GFP_ATOMIC);

These guys will need a slab cache (not SLAB_HW_CACHE_ALIGNED) to save space.

+	swapped_entry = radix_tree_lookup(&swapped_root.tree, entry.val);
+	if(swapped_entry) {
+		list_del(&swapped_entry->list);
+		radix_tree_delete(&swapped_root.tree, entry.val);

you can just do

	if (radix_tree_delete(...) != -ENOENT)
		list_del(...)

+		read_swap_cache_async(entry);

What you want here is a way of telling if the disk(s) which back the swap are
idle.  We used to have that, but Hugh deleted it.  It can be put back, but
it's probably better to put a `last_read_request_time' and
`last_write_request_time' into struct backing_dev_info.  If nobody has used
the disk in the past N milliseconds, then start the speculative swapin.

It might make sense to poke the speculative swapin code in the page-freeing
path too.

And to put the speculatively-swapped-in pages at the tail of the inactive
list (perhaps).

But first-up, some demonstrated goodness is needed...


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] first try for swap prefetch
  2003-04-10 23:18 ` Andrew Morton
@ 2003-04-11 11:51   ` Thomas Schlichter
  2003-04-11 12:13     ` William Lee Irwin III
                       ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Thomas Schlichter @ 2003-04-11 11:51 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

[-- Attachment #1: signed data --]
[-- Type: text/plain, Size: 2682 bytes --]

On April 11, Andrew Morton wrote:
> Thomas Schlichter <schlicht@uni-mannheim.de> wrote:
> > Hi,
> >
> > as mentioned a few days ago I was going to try to implement a swap
> > prefetch to better utilize the free memory. Now here is my first try.
>
> That's surprisingly cute.  Does it actually do anything noticeable?

Well, it fills free pagecache memory with swapped pages... ;-)

But at the moment I can not 'feel' any real improvement... :-(
I think the problem is that R/O pages are not written to swap space and so not 
prefetched with my patch. But I will look after it...

> +	swapped_entry = kmalloc(sizeof(*swapped_entry), GFP_ATOMIC);
>
> These guys will need a slab cache (not SLAB_HW_CACHE_ALIGNED) to save
> space.

OK, I'll do it.

> +	swapped_entry = radix_tree_lookup(&swapped_root.tree, entry.val);
> +	if(swapped_entry) {
> +		list_del(&swapped_entry->list);
> +		radix_tree_delete(&swapped_root.tree, entry.val);
>
> you can just do
>
> 	if (radix_tree_delete(...) != -ENOENT)
> 		list_del(...)
>
> +		read_swap_cache_async(entry);

Sorry, but I think I can not. The list_del() needs the value returned by 
radix_tree_lookup(), so I can not kick it... By the way, the only reason for 
the radix tree is to make this list_del() not O(n) for searching the list... 
Do you know how expensive the radix_tree_lookup() is? O(1) or O(log(n))?? For 
my shame I do not really know that data structure... :-(

> What you want here is a way of telling if the disk(s) which back the swap
> are idle.  We used to have that, but Hugh deleted it.  It can be put back,
> but it's probably better to put a `last_read_request_time' and
> `last_write_request_time' into struct backing_dev_info.  If nobody has used
> the disk in the past N milliseconds, then start the speculative swapin.

That's good. I was looking for anything like that but didn't find anything 
fitting in the current sources...

> It might make sense to poke the speculative swapin code in the page-freeing
> path too.

I wanted to do this but don't know which function is the correct one for this. 
But I will search harder... or can you give me a hint?

> And to put the speculatively-swapped-in pages at the tail of the inactive
> list (perhaps).

This may be a good idea...

> But first-up, some demonstrated goodness is needed...

Yup, but currently it improves nothing very much, as stated above, I think 
first I should implement the R/O pages thing and investigete which part of 
the kernel works against my code and frees some pages after I just filled 
them...

Thank you for helping me with your comments!

Best regards
   Thomas

[-- Attachment #2: signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] first try for swap prefetch
  2003-04-11 11:51   ` Thomas Schlichter
@ 2003-04-11 12:13     ` William Lee Irwin III
  2003-04-11 12:21     ` John Bradford
  2003-04-11 21:39     ` Andrew Morton
  2 siblings, 0 replies; 12+ messages in thread
From: William Lee Irwin III @ 2003-04-11 12:13 UTC (permalink / raw)
  To: Thomas Schlichter; +Cc: Andrew Morton, linux-kernel

On Fri, Apr 11, 2003 at 01:51:55PM +0200, Thomas Schlichter wrote:
> Sorry, but I think I can not. The list_del() needs the value returned by 
> radix_tree_lookup(), so I can not kick it... By the way, the only reason for 
> the radix tree is to make this list_del() not O(n) for searching the list... 
> Do you know how expensive the radix_tree_lookup() is? O(1) or O(log(n))?? For 
> my shame I do not really know that data structure... :-(

It's O(lg(keyspace)). This is regarded as constant by many.


-- wli

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] first try for swap prefetch
  2003-04-11 11:51   ` Thomas Schlichter
  2003-04-11 12:13     ` William Lee Irwin III
@ 2003-04-11 12:21     ` John Bradford
  2003-04-11 12:22       ` Zwane Mwaikambo
  2003-04-11 21:39     ` Andrew Morton
  2 siblings, 1 reply; 12+ messages in thread
From: John Bradford @ 2003-04-11 12:21 UTC (permalink / raw)
  To: Thomas Schlichter; +Cc: Andrew Morton, linux-kernel

> > > as mentioned a few days ago I was going to try to implement a swap
> > > prefetch to better utilize the free memory. Now here is my first try.
> >
> > That's surprisingly cute.  Does it actually do anything noticeable?
> 
> Well, it fills free pagecache memory with swapped pages... ;-)

Actually, it could potentially do something very useful - if you are
using a laptop, or other machine where disks are spun down to save
power, you might be swapping in data while the disk still happens to
be spinning, rather than letting it spin down, then having to spin it
up again - in that instance you are definitely gaining something,
(more battery life).

John.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] first try for swap prefetch
  2003-04-11 12:21     ` John Bradford
@ 2003-04-11 12:22       ` Zwane Mwaikambo
  2003-04-11 13:29         ` John Bradford
  0 siblings, 1 reply; 12+ messages in thread
From: Zwane Mwaikambo @ 2003-04-11 12:22 UTC (permalink / raw)
  To: John Bradford; +Cc: Thomas Schlichter, Andrew Morton, linux-kernel

On Fri, 11 Apr 2003, John Bradford wrote:

> Actually, it could potentially do something very useful - if you are
> using a laptop, or other machine where disks are spun down to save
> power, you might be swapping in data while the disk still happens to
> be spinning, rather than letting it spin down, then having to spin it
> up again - in that instance you are definitely gaining something,
> (more battery life).

That sounds like a rather short disk spin down time (in which case you 
might not be gaining all that much battery life given the constant spin 
up/down), either that or you're paging in way too much stuff.

	Zwane

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] first try for swap prefetch
  2003-04-11 12:22       ` Zwane Mwaikambo
@ 2003-04-11 13:29         ` John Bradford
  0 siblings, 0 replies; 12+ messages in thread
From: John Bradford @ 2003-04-11 13:29 UTC (permalink / raw)
  To: Zwane Mwaikambo
  Cc: John Bradford, Thomas Schlichter, Andrew Morton, linux-kernel

> > Actually, it could potentially do something very useful - if you are
> > using a laptop, or other machine where disks are spun down to save
> > power, you might be swapping in data while the disk still happens to
> > be spinning, rather than letting it spin down, then having to spin it
> > up again - in that instance you are definitely gaining something,
> > (more battery life).
> 
> That sounds like a rather short disk spin down time (in which case you 
> might not be gaining all that much battery life given the constant spin 
> up/down), either that or you're paging in way too much stuff.

Sure, it's probably not going to happen with normal usage, but say
you're using a large application, then load a web browser to read the
documentaion, and part of the large application is swapped out.  Once
the web browser has loaded, it might free some RAM, and then you spend
10 minutes reading the documentation.  The disk might spin down after
five minutes, and then have to spin back up again when you switch back
to your main application.  We could possibly avoid this by swapping
the pages back in after one minute of inactivity, then letting the
disk spin down.

Infact, we could spin down the disk _immediately_, if we find that we
can swap all of the pages back in to physical RAM.  Of course, that
would only make sense if the disk is being used primarily for swap,
but it's a scenario where we could do better than we are at the
moment.

John.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] first try for swap prefetch
  2003-04-11 11:51   ` Thomas Schlichter
  2003-04-11 12:13     ` William Lee Irwin III
  2003-04-11 12:21     ` John Bradford
@ 2003-04-11 21:39     ` Andrew Morton
  2003-04-12  5:05       ` Thomas Schlichter
  2 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2003-04-11 21:39 UTC (permalink / raw)
  To: Thomas Schlichter; +Cc: linux-kernel

Thomas Schlichter <schlicht@uni-mannheim.de> wrote:
>
> > you can just do
> >
> > 	if (radix_tree_delete(...) != -ENOENT)
> > 		list_del(...)
> >
> > +		read_swap_cache_async(entry);
> 
> Sorry, but I think I can not. The list_del() needs the value returned by 
> radix_tree_lookup(), so I can not kick it...

OK, I'll change radix_tree_delete() to return the deleted object address if
it was found, else NULL.  That's a better API.

> Do you know how expensive the radix_tree_lookup() is? O(1) or O(log(n))?? For 
> my shame I do not really know that data structure... :-(

It is proportional to

	log_base_64(largest index which the tree has ever stored)

log_base_64: because each node has 64 slots.  Each time maxindex grows by a
factor of 64 we need to introduce a new level.

"largest index ever": because we do not (and cannot feasibly) reduce the
height when items are removed.

> > It might make sense to poke the speculative swapin code in the page-freeing
> > path too.
> 
> I wanted to do this but don't know which function is the correct one for this. 
> But I will search harder... or can you give me a hint?

free_pages_bulk() would probably suit.



diff -puN fs/nfs/write.c~radix_tree_delete-api-cleanup fs/nfs/write.c
diff -puN lib/radix-tree.c~radix_tree_delete-api-cleanup lib/radix-tree.c
--- 25/lib/radix-tree.c~radix_tree_delete-api-cleanup	Fri Apr 11 14:30:30 2003
+++ 25-akpm/lib/radix-tree.c	Fri Apr 11 14:30:30 2003
@@ -349,15 +349,18 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *	@index:		index key
  *
  *	Remove the item at @index from the radix tree rooted at @root.
+ *
+ *	Returns the address of the deleted item, or NULL if it was not present.
  */
-int radix_tree_delete(struct radix_tree_root *root, unsigned long index)
+void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 {
 	struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
 	unsigned int height, shift;
+	void *ret = NULL;
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
-		return -ENOENT;
+		goto out;
 
 	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 	pathp->node = NULL;
@@ -365,7 +368,7 @@ int radix_tree_delete(struct radix_tree_
 
 	while (height > 0) {
 		if (*pathp->slot == NULL)
-			return -ENOENT;
+			goto out;
 
 		pathp[1].node = *pathp[0].slot;
 		pathp[1].slot = (struct radix_tree_node **)
@@ -375,8 +378,9 @@ int radix_tree_delete(struct radix_tree_
 		height--;
 	}
 
-	if (*pathp[0].slot == NULL)
-		return -ENOENT;
+	ret = *pathp[0].slot;
+	if (ret == NULL)
+		goto out;
 
 	*pathp[0].slot = NULL;
 	while (pathp[0].node && --pathp[0].node->count == 0) {
@@ -387,8 +391,8 @@ int radix_tree_delete(struct radix_tree_
 
 	if (root->rnode == NULL)
 		root->height = 0;  /* Empty tree, we can reset the height */
-
-	return 0;
+out:
+	return ret;
 }
 EXPORT_SYMBOL(radix_tree_delete);
 
diff -puN mm/filemap.c~radix_tree_delete-api-cleanup mm/filemap.c
diff -puN include/linux/radix-tree.h~radix_tree_delete-api-cleanup include/linux/radix-tree.h
--- 25/include/linux/radix-tree.h~radix_tree_delete-api-cleanup	Fri Apr 11 14:30:30 2003
+++ 25-akpm/include/linux/radix-tree.h	Fri Apr 11 14:30:30 2003
@@ -43,7 +43,7 @@ do {					\
 
 extern int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
 extern void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
-extern int radix_tree_delete(struct radix_tree_root *, unsigned long);
+extern void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 extern unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items);

_


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] first try for swap prefetch
  2003-04-11 21:39     ` Andrew Morton
@ 2003-04-12  5:05       ` Thomas Schlichter
  2003-04-12  5:37         ` Andrew Morton
  0 siblings, 1 reply; 12+ messages in thread
From: Thomas Schlichter @ 2003-04-12  5:05 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel

[-- Attachment #1: signed data --]
[-- Type: text/plain, Size: 2410 bytes --]

On April 1, Andrew Morton wrote:
> Thomas Schlichter <schlicht@uni-mannheim.de> wrote:
> > > you can just do
> > >
> > > 	if (radix_tree_delete(...) != -ENOENT)
> > > 		list_del(...)
> > >
> > > +		read_swap_cache_async(entry);
> >
> > Sorry, but I think I can not. The list_del() needs the value returned by
> > radix_tree_lookup(), so I can not kick it...
>
> OK, I'll change radix_tree_delete() to return the deleted object address if
> it was found, else NULL.  That's a better API.

That's right, I like it better that way, too!
Thank you for the patch!

> > Do you know how expensive the radix_tree_lookup() is? O(1) or O(log(n))??
> > For my shame I do not really know that data structure... :-(
>
> It is proportional to
>
> 	log_base_64(largest index which the tree has ever stored)
>
> log_base_64: because each node has 64 slots.  Each time maxindex grows by a
> factor of 64 we need to introduce a new level.
>
> "largest index ever": because we do not (and cannot feasibly) reduce the
> height when items are removed.

Thanks for the detailed answer.

> > > It might make sense to poke the speculative swapin code in the
> > > page-freeing path too.
> >
> > I wanted to do this but don't know which function is the correct one for
> > this. But I will search harder... or can you give me a hint?
>
> free_pages_bulk() would probably suit.

I don't think so, as this is part of the buddy allocator which controls the 
usage of the physical memory. Now I've implemented following:

1. Add an entry when a page is removed by the kswapd.
2. Remove the entry when the page is added to the page_cache.
3. Remove the entry when the page is removed from the page_cache.

So with point 3 I cover the freeing of the pages. But as the kswapd calls the 
function from 3, too, I do the 1st point after kswapd did do point3...

To finish my second (and surely better) try I just need one more 
information...

How can I get the file pointer for a buffered page with the information 
available in the kswapd (minly the page struct)??

This is very importand because, as described above, I extract the needed 
information for my prefetch daemon in the kswapd. My daemon needs the file 
pointer to be able to load the buffer pages with the page_cache_read() 
function from the mm/filemap.c file.

I'm sorry if I bother you...

Best regards
   Thomas

[-- Attachment #2: signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] first try for swap prefetch
  2003-04-12  5:05       ` Thomas Schlichter
@ 2003-04-12  5:37         ` Andrew Morton
  2003-04-17 16:02           ` [RFC] second try for swap prefetch (does Oops!) Thomas Schlichter
  0 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2003-04-12  5:37 UTC (permalink / raw)
  To: Thomas Schlichter; +Cc: linux-kernel

Thomas Schlichter <schlicht@uni-mannheim.de> wrote:
>
> How can I get the file pointer for a buffered page with the information 
> available in the kswapd (minly the page struct)??

You can't, really.  There can be any number of file*'s pointing at an inode.

The pagefault handler will find it by find_vma(faulting_address)->vm_file. 
Other codepaths use syscalls, and the user passed the file* in.

You can call page_cache_readahead() with a NULL file*.  That'll mostly work
except for the odd filesytem like NFS which will oops.  But it's good enough
for testing and development.

Or you could cook up a local file struct along the lines of
fs/nfsd/vfs.c:nfsd_read(), but I would not like to lead a young person
that way ;)


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [RFC] second try for swap prefetch (does Oops!)
  2003-04-12  5:37         ` Andrew Morton
@ 2003-04-17 16:02           ` Thomas Schlichter
  0 siblings, 0 replies; 12+ messages in thread
From: Thomas Schlichter @ 2003-04-17 16:02 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel


[-- Attachment #1.1: body text --]
[-- Type: text/plain, Size: 1821 bytes --]

Hi,

in the patch attached I improved the memory usage for my data structures by 
using a kmem_cache. Also I do not use a single radix-tree for the pointers to 
the list anymore but every mapping gets its own... So I should be able to 
prefetch not only from the swap space but from other disk-places, too.

But exactly this does not work and I need some help with this...
If I do add pages from the swaper_space mapping to the prefetch list 
everything works perfectly. But as soon as I add all pages with a mapping to 
the list I get the Oops attached... :-(

It happens in the radix_tree_delete call from the swap_prefetch work handler. 
So I think I access an invalid (perhaps not initialized?) radix tree... But 
why I wonder is that this entry was properly inserted to the tree, because 
else it would never had been inserted to the list!

So I am only very confused..!

Thanks for your help!

   Thomas Schlichter

On April 12, Andrew Morton wrote:
> Thomas Schlichter <schlicht@uni-mannheim.de> wrote:
> > How can I get the file pointer for a buffered page with the information
> > available in the kswapd (minly the page struct)??
>
> You can't, really.  There can be any number of file*'s pointing at an
> inode.

OK, I understand...

> The pagefault handler will find it by find_vma(faulting_address)->vm_file.
> Other codepaths use syscalls, and the user passed the file* in.
>
> You can call page_cache_readahead() with a NULL file*.  That'll mostly work
> except for the odd filesytem like NFS which will oops.  But it's good
> enough for testing and development.

That's the way I try it now... ;-)

> Or you could cook up a local file struct along the lines of
> fs/nfsd/vfs.c:nfsd_read(), but I would not like to lead a young person
> that way ;)

Thx... ;-)

[-- Attachment #1.2: swap_prefetch.diff --]
[-- Type: text/x-diff, Size: 12610 bytes --]

diff -urP linux-2.5.67/arch/i386/Kconfig linux-2.5.67_patched/arch/i386/Kconfig
--- linux-2.5.67/arch/i386/Kconfig	Thu Apr 10 19:25:40 2003
+++ linux-2.5.67_patched/arch/i386/Kconfig	Thu Apr 10 17:47:36 2003
@@ -373,6 +373,13 @@
 	depends on MK8 || MPENTIUM4
 	default y
 
+config SWAP_PREFETCH
+	tristate "Prefetch swapped memory"
+	depends on SWAP
+	help
+	  This option enables the kernel to prefetch swapped memory pages
+	  when idle.
+
 config HUGETLB_PAGE
 	bool "Huge TLB Page Support"
 	help
diff -urP linux-2.5.67/fs/inode.c linux-2.5.67_patched/fs/inode.c
--- linux-2.5.67/fs/inode.c	Mon Apr  7 19:32:58 2003
+++ linux-2.5.67_patched/fs/inode.c	Sat Apr 12 03:30:39 2003
@@ -180,6 +180,7 @@
 	INIT_LIST_HEAD(&inode->i_dentry);
 	INIT_LIST_HEAD(&inode->i_devices);
 	sema_init(&inode->i_sem, 1);
+	INIT_RADIX_TREE(&inode->i_data.swap_tree, GFP_ATOMIC);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 	rwlock_init(&inode->i_data.page_lock);
 	init_MUTEX(&inode->i_data.i_shared_sem);
diff -urP linux-2.5.67/include/linux/fs.h linux-2.5.67_patched/include/linux/fs.h
--- linux-2.5.67/include/linux/fs.h	Mon Apr  7 19:30:58 2003
+++ linux-2.5.67_patched/include/linux/fs.h	Sat Apr 12 03:31:24 2003
@@ -312,6 +312,7 @@
 struct backing_dev_info;
 struct address_space {
 	struct inode		*host;		/* owner: inode, block_device */
+	struct radix_tree_root	swap_tree;	/* radix tree of swapped pages */
 	struct radix_tree_root	page_tree;	/* radix tree of all pages */
 	rwlock_t		page_lock;	/* and rwlock protecting it */
 	struct list_head	clean_pages;	/* list of clean pages */
diff -urP linux-2.5.67/include/linux/swap.h linux-2.5.67_patched/include/linux/swap.h
--- linux-2.5.67/include/linux/swap.h	Thu Apr 10 19:25:40 2003
+++ linux-2.5.67_patched/include/linux/swap.h	Thu Apr 10 18:36:33 2003
@@ -155,6 +155,8 @@
 extern unsigned int nr_free_pages_pgdat(pg_data_t *pgdat);
 extern unsigned int nr_free_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
+extern unsigned int nr_avail_buffer_pages(void);
+extern unsigned int nr_avail_pagecache_pages(void);
 
 /* linux/mm/swap.c */
 extern void FASTCALL(lru_cache_add(struct page *));
diff -urP linux-2.5.67/include/linux/swap_prefetch.h linux-2.5.67_patched/include/linux/swap_prefetch.h
--- linux-2.5.67/include/linux/swap_prefetch.h	Thu Jan  1 01:00:00 1970
+++ linux-2.5.67_patched/include/linux/swap_prefetch.h	Wed Apr 16 16:00:09 2003
@@ -0,0 +1,57 @@
+#ifndef _LINUX_SWAP_PREFETCH_H
+#define _LINUX_SWAP_PREFETCH_H
+
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/radix-tree.h>
+
+struct swapped_entry_t {
+	struct list_head	head;
+	swp_entry_t		swp_entry;
+	struct address_space	*mapping;
+};
+
+struct swapped_list_t {
+	spinlock_t		lock;
+	struct list_head	head;
+	kmem_cache_t		*cache;
+};
+
+extern struct swapped_list_t	swapped_list;
+
+static inline void add_to_swapped_list(struct address_space *mapping,
+							unsigned long index)
+{
+	struct swapped_entry_t *entry;
+	int error;
+ 
+	entry = kmem_cache_alloc(swapped_list.cache, GFP_ATOMIC);
+	if(entry) {
+		entry->swp_entry.val = index;
+		entry->mapping       = mapping;
+
+		spin_lock(&swapped_list.lock);
+		error = radix_tree_insert(&mapping->swap_tree, index, entry);
+		if(!error)
+			list_add(&entry->head, &swapped_list.head);
+		else
+			kmem_cache_free(swapped_list.cache, entry);
+		spin_unlock(&swapped_list.lock);
+	}
+}
+
+static inline void remove_from_swapped_list(struct address_space *mapping,
+							unsigned long index)
+{
+	struct swapped_entry_t *entry;
+
+	spin_lock(&swapped_list.lock);
+	entry = radix_tree_delete(&mapping->swap_tree, index);
+	if(entry) {
+		list_del(&entry->head);
+		kmem_cache_free(swapped_list.cache, entry);
+	}
+	spin_unlock(&swapped_list.lock);
+}
+
+#endif /* _LINUX_SWAP_PREFETCH_H */
diff -urP linux-2.5.67/kernel/ksyms.c linux-2.5.67_patched/kernel/ksyms.c
--- linux-2.5.67/kernel/ksyms.c	Thu Apr 10 19:25:40 2003
+++ linux-2.5.67_patched/kernel/ksyms.c	Mon Apr 14 01:51:51 2003
@@ -58,6 +58,7 @@
 #include <linux/ptrace.h>
 #include <linux/time.h>
 #include <linux/backing-dev.h>
+#include <linux/swap_prefetch.h>
 #include <asm/checksum.h>
 
 #if defined(CONFIG_PROC_FS)
@@ -70,6 +71,13 @@
 extern struct timezone sys_tz;
 
 extern int panic_timeout;
+
+/* needed for swap prefetch support */
+EXPORT_SYMBOL(swapped_list);
+EXPORT_SYMBOL(swapper_space);
+EXPORT_SYMBOL(swapin_readahead);
+EXPORT_SYMBOL(do_page_cache_readahead);
+EXPORT_SYMBOL(nr_avail_pagecache_pages);
 
 /* process memory management */
 EXPORT_SYMBOL(do_mmap_pgoff);
diff -urP linux-2.5.67/mm/Makefile linux-2.5.67_patched/mm/Makefile
--- linux-2.5.67/mm/Makefile	Thu Apr 10 19:25:40 2003
+++ linux-2.5.67_patched/mm/Makefile	Thu Apr 10 17:47:36 2003
@@ -12,3 +12,5 @@
 			   slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
+
+obj-$(CONFIG_SWAP_PREFETCH)	+= swap_prefetch.o
diff -urP linux-2.5.67/mm/filemap.c linux-2.5.67_patched/mm/filemap.c
--- linux-2.5.67/mm/filemap.c	Mon Apr  7 19:31:02 2003
+++ linux-2.5.67_patched/mm/filemap.c	Wed Apr 16 16:04:40 2003
@@ -16,8 +16,7 @@
 #include <linux/fs.h>
 #include <linux/aio.h>
 #include <linux/kernel_stat.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
+#include <linux/swap_prefetch.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
@@ -84,6 +83,7 @@
 
 	BUG_ON(PageDirty(page) && !PageSwapCache(page));
 
+	remove_from_swapped_list(mapping, page->index);
 	radix_tree_delete(&mapping->page_tree, page->index);
 	list_del(&page->list);
 	page->mapping = NULL;
@@ -223,8 +223,11 @@
 int add_to_page_cache(struct page *page, struct address_space *mapping,
 		pgoff_t offset, int gfp_mask)
 {
-	int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+	int error;
 
+	remove_from_swapped_list(mapping, offset);
+
+	error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
 	if (error == 0) {
 		page_cache_get(page);
 		write_lock(&mapping->page_lock);
diff -urP linux-2.5.67/mm/page_alloc.c linux-2.5.67_patched/mm/page_alloc.c
--- linux-2.5.67/mm/page_alloc.c	Thu Apr 10 19:25:40 2003
+++ linux-2.5.67_patched/mm/page_alloc.c	Thu Apr 10 17:47:36 2003
@@ -787,6 +787,48 @@
 }
 #endif
 
+static unsigned int nr_avail_zone_pages(int offset)
+{
+	pg_data_t *pgdat;
+	unsigned long avail = 0;
+
+	for_each_pgdat(pgdat) {
+		struct zonelist *zonelist = pgdat->node_zonelists + offset;
+		struct zone **zonep = zonelist->zones;
+		struct zone *zone;
+		unsigned long low = 0;
+
+		for (zone = *zonep++; zone; zone = *zonep++) {
+			unsigned long local_free = zone->free_pages;
+			unsigned long local_low  = zone->pages_low;
+			
+			low += local_low;
+			if (local_free > low) {
+				avail = max(avail, local_free - low);
+			}
+			low += local_low * sysctl_lower_zone_protection;
+		}
+	}
+
+	return avail;
+}
+
+/*
+ * Amount of available RAM allocatable within ZONE_DMA and ZONE_NORMAL
+ */
+unsigned int nr_avail_buffer_pages(void)
+{
+	return nr_avail_zone_pages(GFP_USER & GFP_ZONEMASK);
+}
+
+/*
+ * Amount of available RAM allocatable within all zones
+ */
+unsigned int nr_avail_pagecache_pages(void)
+{
+	return nr_avail_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
+}
+
 #ifdef CONFIG_NUMA
 static void show_node(struct zone *zone)
 {
diff -urP linux-2.5.67/mm/swap.c linux-2.5.67_patched/mm/swap.c
--- linux-2.5.67/mm/swap.c	Mon Apr  7 19:31:05 2003
+++ linux-2.5.67_patched/mm/swap.c	Sat Apr 12 03:19:53 2003
@@ -13,9 +13,8 @@
  * Buffermem limits added 12.3.98, Rik van Riel.
  */
 
-#include <linux/mm.h>
 #include <linux/kernel_stat.h>
-#include <linux/swap.h>
+#include <linux/swap_prefetch.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
@@ -24,6 +23,11 @@
 #include <linux/buffer_head.h>
 #include <linux/percpu.h>
 
+struct swapped_list_t swapped_list = {
+	.lock = SPIN_LOCK_UNLOCKED,
+	.head = LIST_HEAD_INIT(swapped_list.head),
+};
+
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
@@ -390,4 +394,12 @@
 	 * Right now other parts of the system means that we
 	 * _really_ don't want to cluster much more
 	 */
+
+	/*
+	 * Create kmem cache for swapped entries
+	 */
+ 	swapped_list.cache = kmem_cache_create("swapped_entry",
+		sizeof(struct swapped_entry_t), 0, 0, NULL, NULL);
+	if(!swapped_list.cache)
+		panic("swap_setup(): cannot create swapped_entry SLAB cache");
 }
diff -urP linux-2.5.67/mm/swap_prefetch.c linux-2.5.67_patched/mm/swap_prefetch.c
--- linux-2.5.67/mm/swap_prefetch.c	Thu Jan  1 01:00:00 1970
+++ linux-2.5.67_patched/mm/swap_prefetch.c	Thu Apr 17 00:29:40 2003
@@ -0,0 +1,88 @@
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/swap_prefetch.h>
+
+#define RESERVED_PAGES	50		/* let 200 kByte of pagecache free */
+#define INTERVAL	60		/* (secs) Default is 1 minute */
+
+static int reserved_pages = RESERVED_PAGES;
+static int interval       = INTERVAL;
+
+MODULE_PARM(reserved_pages,"i");
+MODULE_PARM_DESC(reserved_pages,
+	"count of pagechache pages to let free (default 50)");
+
+MODULE_PARM(interval,"i");
+MODULE_PARM_DESC(interval,
+	"delay in seconds to wait between memory checks (default 60)");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Schlichter <thomas.schlichter@web.de>");
+MODULE_DESCRIPTION("prefetches swap pages when there is free memory");
+
+/*
+ *	Our timer
+ */
+static void prefetch_timer_handler(unsigned long data);
+static struct timer_list prefetch_timer =
+		TIMER_INITIALIZER(prefetch_timer_handler, 0, 0);
+
+/*
+ *	Our work
+ */
+static void prefetch_work_handler(void *data);
+static DECLARE_WORK(prefetch_work, prefetch_work_handler, 0);
+
+/*
+ *	If the timer expires..
+ */
+static void prefetch_timer_handler(unsigned long data)
+{
+	schedule_work(&prefetch_work);
+	prefetch_timer.expires = jiffies + interval * HZ;
+	add_timer(&prefetch_timer);
+}
+
+/*
+ *	..do the work
+ */
+static void prefetch_work_handler(void *data)
+{
+	printk(KERN_INFO "Available pages before: %d\n", nr_avail_pagecache_pages());
+
+	while(nr_avail_pagecache_pages() > reserved_pages) {
+		struct swapped_entry_t *entry;
+
+		spin_lock(&swapped_list.lock);
+		if(list_empty(&swapped_list.head)) {
+			spin_unlock(&swapped_list.lock);
+			break;
+		}
+		entry = list_entry(swapped_list.head.next, struct swapped_entry_t, head);
+		radix_tree_delete(&entry->mapping->swap_tree, entry->swp_entry.val);
+		list_del(&entry->head);
+		spin_unlock(&swapped_list.lock);
+
+		if(entry->mapping == &swapper_space)
+			swapin_readahead(entry->swp_entry);
+		else
+			do_page_cache_readahead(entry->mapping, NULL, entry->swp_entry.val, 1);
+		kmem_cache_free(swapped_list.cache, entry);
+	}
+
+	printk(KERN_INFO "Available pages after: %d\n", nr_avail_pagecache_pages());
+}
+
+static int __init prefetch_init(void)
+{
+	prefetch_timer_handler(0);
+	return 0;
+}
+
+static void __exit prefetch_exit(void)
+{
+	del_timer(&prefetch_timer);
+}
+
+module_init(prefetch_init);
+module_exit(prefetch_exit);
diff -urP linux-2.5.67/mm/swap_state.c linux-2.5.67_patched/mm/swap_state.c
--- linux-2.5.67/mm/swap_state.c	Thu Apr 10 19:25:40 2003
+++ linux-2.5.67_patched/mm/swap_state.c	Sat Apr 12 03:29:59 2003
@@ -33,6 +33,7 @@
 extern struct address_space_operations swap_aops;
 
 struct address_space swapper_space = {
+	.swap_tree	= RADIX_TREE_INIT(GFP_ATOMIC),
 	.page_tree	= RADIX_TREE_INIT(GFP_ATOMIC),
 	.page_lock	= RW_LOCK_UNLOCKED,
 	.clean_pages	= LIST_HEAD_INIT(swapper_space.clean_pages),
diff -urP linux-2.5.67/mm/vmscan.c linux-2.5.67_patched/mm/vmscan.c
--- linux-2.5.67/mm/vmscan.c	Thu Apr 10 19:25:40 2003
+++ linux-2.5.67_patched/mm/vmscan.c	Thu Apr 17 14:41:17 2003
@@ -11,10 +11,9 @@
  *  Multiqueue VM started 5.8.00, Rik van Riel.
  */
 
-#include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/kernel_stat.h>
-#include <linux/swap.h>
+#include <linux/swap_prefetch.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
@@ -417,6 +416,9 @@
 		ret++;
 		if (!pagevec_add(&freed_pvec, page))
 			__pagevec_release_nonlru(&freed_pvec);
+		if (mapping)
+//		if (mapping == &swapper_space)
+			add_to_swapped_list(mapping, page->index);
 		continue;
 
 activate_locked:

[-- Attachment #1.3: ksymoops.txt --]
[-- Type: text/plain, Size: 2971 bytes --]

ksymoops 2.4.2 on i586 2.5.67.  Options used
     -V (default)
     -k /proc/ksyms (default)
     -l /proc/modules (default)
     -o /lib/modules/2.5.67/ (default)
     -m /boot/System.map-2.5.67 (default)

Warning: You did not tell me where to find symbol information.  I will
assume that the log matches the kernel and modules that are running
right now and I'll use the default options above for symbol resolution.
If the current kernel and/or modules do not match the log, you can get
more accurate output by telling me the kernel version and where to find
map, modules, ksyms etc.  ksymoops -h explains the options.

Error (regular_file): read_ksyms stat /proc/ksyms failed
No modules in ksyms, skipping objects
No ksyms, skipping lsmod
Unable to handle kernel paging request at virtual address 64815c28
c02018e8
*pde = 00000000
Oops: 0000 [#1]
CPU:    0
EIP:    0060:[<c02018e8>]    Tainted: P  
Using defaults from ksymoops -t elf32-i386 -a i386
EFLAGS: 00010283
eax: 64815c28   ebx: c12b9f3c   ecx: 8b21ca84   edx: 64815c24
esi: 8b21ca7e   edi: 00000001   ebp: c12b9f6c   esp: c12b9f20
ds: 007b   es: 007b   ss: 0068
Stack: c2e34c6c c03840e0 c12b8000 c1daf716 00000000 00000000 ceeb5570 64815c24 
       64815c28 00000282 c2e34c50 c2e34c50 c03840e0 c12b8000 00000000 00000000 
       000003d4 00002a76 c12b9f74 c12b9f88 d4b0415f ceeb5568 00000001 c12b8000 
 [<d4b0415f>] prefetch_work_handler+0x12f/0x210 [swap_prefetch]
 [<d4b04d80>] prefetch_work+0x0/0x60 [swap_prefetch]
 [<c0139dcf>] worker_thread+0x28f/0x438
 [<c0139b40>] worker_thread+0x0/0x438
 [<d4b04030>] prefetch_work_handler+0x0/0x210 [swap_prefetch]
 [<c0122d38>] default_wake_function+0x0/0x18
 [<c0122d38>] default_wake_function+0x0/0x18
 [<c01081e5>] kernel_thread_helper+0x5/0xc
Code: 8b 10 85 d2 74 6a 89 f8 89 f1 d3 e8 83 e0 3f 8d 44 82 04 89 

>>EIP; c02018e8 <radix_tree_delete+4c/c8>   <=====
Code;  c02018e8 <radix_tree_delete+4c/c8>
00000000 <_EIP>:
Code;  c02018e8 <radix_tree_delete+4c/c8>   <=====
   0:   8b 10                     mov    (%eax),%edx   <=====
Code;  c02018ea <radix_tree_delete+4e/c8>
   2:   85 d2                     test   %edx,%edx
Code;  c02018ec <radix_tree_delete+50/c8>
   4:   74 6a                     je     70 <_EIP+0x70> c0201958 <radix_tree_delete+bc/c8>
Code;  c02018ee <radix_tree_delete+52/c8>
   6:   89 f8                     mov    %edi,%eax
Code;  c02018f0 <radix_tree_delete+54/c8>
   8:   89 f1                     mov    %esi,%ecx
Code;  c02018f2 <radix_tree_delete+56/c8>
   a:   d3 e8                     shr    %cl,%eax
Code;  c02018f4 <radix_tree_delete+58/c8>
   c:   83 e0 3f                  and    $0x3f,%eax
Code;  c02018f6 <radix_tree_delete+5a/c8>
   f:   8d 44 82 04               lea    0x4(%edx,%eax,4),%eax
Code;  c02018fa <radix_tree_delete+5e/c8>
  13:   89 00                     mov    %eax,(%eax)


1 warning and 1 error issued.  Results may not be reliable.

[-- Attachment #2: signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC] first try for swap prefetch
@ 2003-04-11 16:57 Chuck Ebbert
  0 siblings, 0 replies; 12+ messages in thread
From: Chuck Ebbert @ 2003-04-11 16:57 UTC (permalink / raw)
  To: John Bradford; +Cc: linux-kernel

John Bradford wrote:


> We could possibly avoid this by swapping the pages back
>in after one minute of inactivity, then letting the
>disk spin down.


  Why not also write pages out to swap before it's really necessary?
If they were left mapped but marked as having up-to-date copies
on swap, they could be discarded immediately if the system needed
memory.  (Of course if they got written to they would have to be
paged out again.)

--
 "Let's fight until six, and then have dinner," said Tweedledum.
 --Lewis Carroll, _Through the Looking Glass_

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2003-04-17 15:50 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-04-10 17:47 [RFC] first try for swap prefetch Thomas Schlichter
2003-04-10 23:18 ` Andrew Morton
2003-04-11 11:51   ` Thomas Schlichter
2003-04-11 12:13     ` William Lee Irwin III
2003-04-11 12:21     ` John Bradford
2003-04-11 12:22       ` Zwane Mwaikambo
2003-04-11 13:29         ` John Bradford
2003-04-11 21:39     ` Andrew Morton
2003-04-12  5:05       ` Thomas Schlichter
2003-04-12  5:37         ` Andrew Morton
2003-04-17 16:02           ` [RFC] second try for swap prefetch (does Oops!) Thomas Schlichter
2003-04-11 16:57 [RFC] first try for swap prefetch Chuck Ebbert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).