linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] first try for swap prefetch
@ 2003-04-10 17:47 Thomas Schlichter
  2003-04-10 23:18 ` Andrew Morton
  0 siblings, 1 reply; 12+ messages in thread
From: Thomas Schlichter @ 2003-04-10 17:47 UTC (permalink / raw)
  To: linux-kernel


[-- Attachment #1.1: body text --]
[-- Type: text/plain, Size: 892 bytes --]

Hi,

as mentioned a few days ago I was going to try to implement a swap prefetch to 
better utilize the free memory. Now here is my first try. This version works 
only as a module and tests for free pagecahe memory in a interval specified 
as a module parameter.

As I tested this I saw that many of the page reloads do not come from the swap 
space but from buffers that got moved away. I could easily save which buffers 
have been removed but I don't know how to read them back to the pagecache...

An other thing I saw was that anywhere in the kernel there must be some code 
which always tries to hold some memory pages free, even if there are cached 
pages that just can be freed as they are not modified... Perhaps that code 
should be changed...

I hope someone may give me some hints or show me obvious mistakes I made... 
;-)

Thank you!

Best regards
   Thomas

[-- Attachment #1.2: swap_prefetch.diff --]
[-- Type: text/x-diff, Size: 8914 bytes --]

diff -urP linux-2.5.67/arch/i386/Kconfig linux-2.5.67_patched/arch/i386/Kconfig
--- linux-2.5.67/arch/i386/Kconfig	Mon Apr  7 19:30:43 2003
+++ linux-2.5.67_patched/arch/i386/Kconfig	Thu Apr 10 17:47:36 2003
@@ -373,6 +373,13 @@
 	depends on MK8 || MPENTIUM4
 	default y
 
+config SWAP_PREFETCH
+	tristate "Prefetch swapped memory"
+	depends on SWAP
+	help
+	  This option enables the kernel to prefetch swapped memory pages
+	  when idle.
+
 config HUGETLB_PAGE
 	bool "Huge TLB Page Support"
 	help
diff -urP linux-2.5.67/include/linux/swap.h linux-2.5.67_patched/include/linux/swap.h
--- linux-2.5.67/include/linux/swap.h	Mon Apr  7 19:30:35 2003
+++ linux-2.5.67_patched/include/linux/swap.h	Thu Apr 10 18:36:33 2003
@@ -155,6 +155,8 @@
 extern unsigned int nr_free_pages_pgdat(pg_data_t *pgdat);
 extern unsigned int nr_free_buffer_pages(void);
 extern unsigned int nr_free_pagecache_pages(void);
+extern unsigned int nr_avail_buffer_pages(void);
+extern unsigned int nr_avail_pagecache_pages(void);
 
 /* linux/mm/swap.c */
 extern void FASTCALL(lru_cache_add(struct page *));
diff -urP linux-2.5.67/include/linux/swap_prefetch.h linux-2.5.67_patched/include/linux/swap_prefetch.h
--- linux-2.5.67/include/linux/swap_prefetch.h	Thu Jan  1 01:00:00 1970
+++ linux-2.5.67_patched/include/linux/swap_prefetch.h	Thu Apr 10 18:36:40 2003
@@ -0,0 +1,52 @@
+#ifndef _LINUX_SWAP_PREFETCH_H
+#define _LINUX_SWAP_PREFETCH_H
+
+#include <linux/swap.h>
+#include <linux/radix-tree.h>
+
+struct swapped_entry_t {
+	struct list_head list;
+	swp_entry_t entry;
+};
+
+struct swapped_root_t {
+	spinlock_t lock;
+	struct list_head list;
+	struct radix_tree_root tree;
+};
+
+extern struct swapped_root_t swapped_root;
+
+static inline void add_to_swapped_list(swp_entry_t entry)
+{
+	struct swapped_entry_t * swapped_entry;
+	int ret;
+
+	swapped_entry = kmalloc(sizeof(*swapped_entry), GFP_ATOMIC);
+	if(swapped_entry) {
+		swapped_entry->entry = entry;
+		spin_lock(&swapped_root.lock);
+		ret = radix_tree_insert(&swapped_root.tree, entry.val, swapped_entry);
+		if(ret == 0)
+			list_add(&swapped_entry->list, &swapped_root.list);
+		else
+			kfree(swapped_entry);
+		spin_unlock(&swapped_root.lock);
+	}
+}
+
+static inline void remove_from_swapped_list(swp_entry_t entry)
+{
+	struct swapped_entry_t * swapped_entry;
+
+	spin_lock(&swapped_root.lock);
+	swapped_entry = radix_tree_lookup(&swapped_root.tree, entry.val);
+	if(swapped_entry) {
+		list_del(&swapped_entry->list);
+		radix_tree_delete(&swapped_root.tree, entry.val);
+		kfree(swapped_entry);
+	}
+	spin_unlock(&swapped_root.lock);
+}
+
+#endif /* _LINUX_SWAP_PREFETCH_H */
diff -urP linux-2.5.67/kernel/ksyms.c linux-2.5.67_patched/kernel/ksyms.c
--- linux-2.5.67/kernel/ksyms.c	Mon Apr  7 19:30:34 2003
+++ linux-2.5.67_patched/kernel/ksyms.c	Thu Apr 10 17:47:36 2003
@@ -58,6 +58,7 @@
 #include <linux/ptrace.h>
 #include <linux/time.h>
 #include <linux/backing-dev.h>
+#include <linux/swap_prefetch.h>
 #include <asm/checksum.h>
 
 #if defined(CONFIG_PROC_FS)
@@ -70,6 +71,11 @@
 extern struct timezone sys_tz;
 
 extern int panic_timeout;
+
+/* needed for swap prefetch support */
+EXPORT_SYMBOL(swapped_root);
+EXPORT_SYMBOL(nr_avail_pagecache_pages);
+EXPORT_SYMBOL(read_swap_cache_async);
 
 /* process memory management */
 EXPORT_SYMBOL(do_mmap_pgoff);
diff -urP linux-2.5.67/mm/Makefile linux-2.5.67_patched/mm/Makefile
--- linux-2.5.67/mm/Makefile	Mon Apr  7 19:31:52 2003
+++ linux-2.5.67_patched/mm/Makefile	Thu Apr 10 17:47:36 2003
@@ -12,3 +12,5 @@
 			   slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
+
+obj-$(CONFIG_SWAP_PREFETCH)	+= swap_prefetch.o
diff -urP linux-2.5.67/mm/page_alloc.c linux-2.5.67_patched/mm/page_alloc.c
--- linux-2.5.67/mm/page_alloc.c	Mon Apr  7 19:30:39 2003
+++ linux-2.5.67_patched/mm/page_alloc.c	Thu Apr 10 17:47:36 2003
@@ -787,6 +787,48 @@
 }
 #endif
 
+static unsigned int nr_avail_zone_pages(int offset)
+{
+	pg_data_t *pgdat;
+	unsigned long avail = 0;
+
+	for_each_pgdat(pgdat) {
+		struct zonelist *zonelist = pgdat->node_zonelists + offset;
+		struct zone **zonep = zonelist->zones;
+		struct zone *zone;
+		unsigned long low = 0;
+
+		for (zone = *zonep++; zone; zone = *zonep++) {
+			unsigned long local_free = zone->free_pages;
+			unsigned long local_low  = zone->pages_low;
+			
+			low += local_low;
+			if (local_free > low) {
+				avail = max(avail, local_free - low);
+			}
+			low += local_low * sysctl_lower_zone_protection;
+		}
+	}
+
+	return avail;
+}
+
+/*
+ * Amount of available RAM allocatable within ZONE_DMA and ZONE_NORMAL
+ */
+unsigned int nr_avail_buffer_pages(void)
+{
+	return nr_avail_zone_pages(GFP_USER & GFP_ZONEMASK);
+}
+
+/*
+ * Amount of available RAM allocatable within all zones
+ */
+unsigned int nr_avail_pagecache_pages(void)
+{
+	return nr_avail_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
+}
+
 #ifdef CONFIG_NUMA
 static void show_node(struct zone *zone)
 {
diff -urP linux-2.5.67/mm/swap_prefetch.c linux-2.5.67_patched/mm/swap_prefetch.c
--- linux-2.5.67/mm/swap_prefetch.c	Thu Jan  1 01:00:00 1970
+++ linux-2.5.67_patched/mm/swap_prefetch.c	Thu Apr 10 18:33:06 2003
@@ -0,0 +1,79 @@
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/swap_prefetch.h>
+
+#define INTERVAL	60		/* (secs) Default is 1 minute */
+
+static int interval       = INTERVAL;
+
+MODULE_PARM(interval,"i");
+MODULE_PARM_DESC(interval,
+	"delay in seconds to wait between memory checks (default 60)");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Schlichter <thomas.schlichter@web.de>");
+MODULE_DESCRIPTION("prefetches swap pages when there is free memory");
+
+/*
+ *	Our timer
+ */
+static void prefetch_timer_handler(unsigned long data);
+static struct timer_list prefetch_timer =
+		TIMER_INITIALIZER(prefetch_timer_handler, 0, 0);
+
+/*
+ *	Our work
+ */
+static void prefetch_work_handler(void *data);
+static DECLARE_WORK(prefetch_work, prefetch_work_handler, 0);
+
+/*
+ *	If the timer expires..
+ */
+static void prefetch_timer_handler(unsigned long data)
+{
+	schedule_work(&prefetch_work);
+	prefetch_timer.expires = jiffies + interval * HZ;
+	add_timer(&prefetch_timer);
+}
+
+/*
+ *	..do the work
+ */
+static void prefetch_work_handler(void *data)
+{
+	printk(KERN_INFO "Available pages before: %d\n", nr_avail_pagecache_pages());
+
+	while(nr_avail_pagecache_pages() != 0) {
+		struct swapped_entry_t *swapped_entry;
+		swp_entry_t entry;
+
+		spin_lock(&swapped_root.lock);
+		if(list_empty(&swapped_root.list)) {
+			spin_unlock(&swapped_root.lock);
+			break;
+		}
+		swapped_entry = list_entry(swapped_root.list.next,
+					struct swapped_entry_t, list);
+		entry = swapped_entry->entry;
+		spin_unlock(&swapped_root.lock);
+
+		read_swap_cache_async(entry);
+	}
+
+	printk(KERN_INFO "Available pages after: %d\n", nr_avail_pagecache_pages());
+}
+
+static int __init prefetch_init(void)
+{
+	prefetch_timer_handler(0);
+	return 0;
+}
+
+static void __exit prefetch_exit(void)
+{
+	del_timer(&prefetch_timer);
+}
+
+module_init(prefetch_init);
+module_exit(prefetch_exit);
diff -urP linux-2.5.67/mm/swap_state.c linux-2.5.67_patched/mm/swap_state.c
--- linux-2.5.67/mm/swap_state.c	Mon Apr  7 19:31:11 2003
+++ linux-2.5.67_patched/mm/swap_state.c	Thu Apr 10 18:32:32 2003
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>	/* block_sync_page() */
+#include <linux/swap_prefetch.h>
 
 #include <asm/pgtable.h>
 
@@ -49,6 +50,12 @@
 	.private_list	= LIST_HEAD_INIT(swapper_space.private_list),
 };
 
+struct swapped_root_t swapped_root = {
+	.lock = SPIN_LOCK_UNLOCKED,
+	.list = LIST_HEAD_INIT(swapped_root.list),
+	.tree = RADIX_TREE_INIT(GFP_ATOMIC),
+};
+
 #define INC_CACHE_INFO(x)	do { swap_cache_info.x++; } while (0)
 
 static struct {
@@ -344,6 +351,8 @@
 {
 	struct page *found_page, *new_page = NULL;
 	int err;
+
+	remove_from_swapped_list(entry);
 
 	do {
 		/*
diff -urP linux-2.5.67/mm/vmscan.c linux-2.5.67_patched/mm/vmscan.c
--- linux-2.5.67/mm/vmscan.c	Mon Apr  7 19:30:43 2003
+++ linux-2.5.67_patched/mm/vmscan.c	Thu Apr 10 18:31:02 2003
@@ -27,6 +27,7 @@
 #include <linux/pagevec.h>
 #include <linux/backing-dev.h>
 #include <linux/rmap-locking.h>
+#include <linux/swap_prefetch.h>
 
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
@@ -402,6 +403,7 @@
 			swp_entry_t swap = { .val = page->index };
 			__delete_from_swap_cache(page);
 			write_unlock(&mapping->page_lock);
+			add_to_swapped_list(swap);
 			swap_free(swap);
 			__put_page(page);	/* The pagecache ref */
 			goto free_it;

[-- Attachment #2: signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread
* Re: [RFC] first try for swap prefetch
@ 2003-04-11 16:57 Chuck Ebbert
  0 siblings, 0 replies; 12+ messages in thread
From: Chuck Ebbert @ 2003-04-11 16:57 UTC (permalink / raw)
  To: John Bradford; +Cc: linux-kernel

John Bradford wrote:


> We could possibly avoid this by swapping the pages back
>in after one minute of inactivity, then letting the
>disk spin down.


  Why not also write pages out to swap before it's really necessary?
If they were left mapped but marked as having up-to-date copies
on swap, they could be discarded immediately if the system needed
memory.  (Of course if they got written to they would have to be
paged out again.)

--
 "Let's fight until six, and then have dinner," said Tweedledum.
 --Lewis Carroll, _Through the Looking Glass_

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2003-04-17 15:50 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-04-10 17:47 [RFC] first try for swap prefetch Thomas Schlichter
2003-04-10 23:18 ` Andrew Morton
2003-04-11 11:51   ` Thomas Schlichter
2003-04-11 12:13     ` William Lee Irwin III
2003-04-11 12:21     ` John Bradford
2003-04-11 12:22       ` Zwane Mwaikambo
2003-04-11 13:29         ` John Bradford
2003-04-11 21:39     ` Andrew Morton
2003-04-12  5:05       ` Thomas Schlichter
2003-04-12  5:37         ` Andrew Morton
2003-04-17 16:02           ` [RFC] second try for swap prefetch (does Oops!) Thomas Schlichter
2003-04-11 16:57 [RFC] first try for swap prefetch Chuck Ebbert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).