From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S263757AbUDFLAx (ORCPT ); Tue, 6 Apr 2004 07:00:53 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S263763AbUDFLAw (ORCPT ); Tue, 6 Apr 2004 07:00:52 -0400 Received: from sv1.valinux.co.jp ([210.128.90.2]:29865 "EHLO sv1.valinux.co.jp") by vger.kernel.org with ESMTP id S263757AbUDFK4w (ORCPT ); Tue, 6 Apr 2004 06:56:52 -0400 Date: Tue, 06 Apr 2004 19:56:49 +0900 From: IWAMOTO Toshihiro To: linux-kernel@vger.kernel.org, lhms-devel@lists.sourceforge.net Subject: [patch 1/3] memory hotplug prototype In-Reply-To: <20040406105353.9BDE8705DE@sv1.valinux.co.jp> References: <20040406105353.9BDE8705DE@sv1.valinux.co.jp> User-Agent: Wanderlust/2.8.1 (Something) SEMI/1.14.3 (Ushinoya) FLIM/1.14.3 (=?ISO-8859-4?Q?Unebigory=F2mae?=) APEL/10.3 Emacs/21.2 (i386-debian-linux-gnu) MULE/5.0 (SAKAKI) MIME-Version: 1.0 (generated by SEMI 1.14.3 - "Ushinoya") Content-Type: text/plain; charset=US-ASCII Message-Id: <20040406105649.77F36705DE@sv1.valinux.co.jp> Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org memoryhotplug.patch: The main, most important patch. $Id: memoryhotplug.patch,v 1.72 2004/04/06 10:56:05 iwamoto Exp $ diff -dpurN linux-2.6.5/arch/i386/Kconfig linux-2.6.5-mh/arch/i386/Kconfig --- linux-2.6.5/arch/i386/Kconfig Sun Apr 4 12:36:25 2004 +++ linux-2.6.5-mh/arch/i386/Kconfig Mon Apr 5 12:44:53 2004 @@ -717,9 +717,19 @@ comment "NUMA (NUMA-Q) requires SMP, 64G comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) +config MEMHOTPLUG + bool "Memory hotplug test" + depends on !X86_PAE + default n + +config MEMHOTPLUG_BLKSIZE + int "Size of a memory hotplug unit (in MB, must be multiple of 256)." + range 256 1024 + depends on MEMHOTPLUG + config DISCONTIGMEM bool - depends on NUMA + depends on NUMA || MEMHOTPLUG default y config HAVE_ARCH_BOOTMEM_NODE diff -dpurN linux-2.6.5/include/linux/gfp.h linux-2.6.5-mh/include/linux/gfp.h --- linux-2.6.5/include/linux/gfp.h Sun Apr 4 12:36:52 2004 +++ linux-2.6.5-mh/include/linux/gfp.h Mon Apr 5 12:44:53 2004 @@ -7,9 +7,10 @@ /* * GFP bitmasks.. */ -/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ -#define __GFP_DMA 0x01 -#define __GFP_HIGHMEM 0x02 +/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */ +#define __GFP_DMA 0x01 +#define __GFP_HIGHMEM 0x02 +#define __GFP_HOTREMOVABLE 0x03 /* * Action modifiers - doesn't change the zoning @@ -41,7 +42,7 @@ #define GFP_NOFS (__GFP_WAIT | __GFP_IO) #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS) -#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM) +#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM | __GFP_HOTREMOVABLE) /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ diff -dpurN linux-2.6.5/include/linux/mm.h linux-2.6.5-mh/include/linux/mm.h --- linux-2.6.5/include/linux/mm.h Sun Apr 4 12:36:15 2004 +++ linux-2.6.5-mh/include/linux/mm.h Mon Apr 5 12:44:53 2004 @@ -228,7 +228,14 @@ struct page { */ #define put_page_testzero(p) \ ({ \ - BUG_ON(page_count(p) == 0); \ + if (page_count(p) == 0) { \ + int i; \ + printk("Page: %lx ", (long)p); \ + for(i = 0; i < sizeof(struct page); i++) \ + printk(" %02x", ((unsigned char *)p)[i]); \ + printk("\n"); \ + BUG(); \ + } \ atomic_dec_and_test(&(p)->count); \ }) @@ -286,6 +293,11 @@ static inline void put_page(struct page } #endif /* CONFIG_HUGETLB_PAGE */ + +static inline int is_page_cache_freeable(struct page *page) +{ + return page_count(page) - !!PagePrivate(page) == 2; +} /* * Multiple processes may "see" the same page. E.g. for untouched diff -dpurN linux-2.6.5/include/linux/mmzone.h linux-2.6.5-mh/include/linux/mmzone.h --- linux-2.6.5/include/linux/mmzone.h Sun Apr 4 12:37:23 2004 +++ linux-2.6.5-mh/include/linux/mmzone.h Mon Apr 5 12:46:47 2004 @@ -160,8 +160,10 @@ struct zone { #define ZONE_DMA 0 #define ZONE_NORMAL 1 #define ZONE_HIGHMEM 2 +#define ZONE_HOTREMOVABLE 3 /* only for zonelists */ #define MAX_NR_ZONES 3 /* Sync this with ZONES_SHIFT */ +#define MAX_NR_ZONELISTS 4 #define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ #define GFP_ZONEMASK 0x03 @@ -203,7 +205,7 @@ struct zonelist { struct bootmem_data; typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; - struct zonelist node_zonelists[MAX_NR_ZONES]; + struct zonelist node_zonelists[MAX_NR_ZONELISTS]; int nr_zones; struct page *node_mem_map; struct bootmem_data *bdata; @@ -215,6 +217,7 @@ typedef struct pglist_data { struct pglist_data *pgdat_next; wait_queue_head_t kswapd_wait; struct task_struct *kswapd; + char removable, enabled; } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) diff -dpurN linux-2.6.5/include/linux/page-flags.h linux-2.6.5-mh/include/linux/page-flags.h --- linux-2.6.5/include/linux/page-flags.h Sun Apr 4 12:37:37 2004 +++ linux-2.6.5-mh/include/linux/page-flags.h Mon Apr 5 12:44:53 2004 @@ -76,6 +76,8 @@ #define PG_reclaim 18 /* To be reclaimed asap */ #define PG_compound 19 /* Part of a compound page */ +#define PG_again 20 + /* * Global page accounting. One instance per CPU. Only unsigned longs are @@ -297,6 +299,10 @@ extern void get_full_page_state(struct p #define PageCompound(page) test_bit(PG_compound, &(page)->flags) #define SetPageCompound(page) set_bit(PG_compound, &(page)->flags) #define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags) + +#define PageAgain(page) test_bit(PG_again, &(page)->flags) +#define SetPageAgain(page) set_bit(PG_again, &(page)->flags) +#define ClearPageAgain(page) clear_bit(PG_again, &(page)->flags) /* * The PageSwapCache predicate doesn't use a PG_flag at this time, diff -dpurN linux-2.6.5/include/linux/swap.h linux-2.6.5-mh/include/linux/swap.h --- linux-2.6.5/include/linux/swap.h Sun Apr 4 12:36:15 2004 +++ linux-2.6.5-mh/include/linux/swap.h Mon Apr 5 12:44:53 2004 @@ -183,13 +183,13 @@ int FASTCALL(page_referenced(struct page struct pte_chain *FASTCALL(page_add_rmap(struct page *, pte_t *, struct pte_chain *)); void FASTCALL(page_remove_rmap(struct page *, pte_t *)); -int FASTCALL(try_to_unmap(struct page *)); +int FASTCALL(try_to_unmap(struct page *, struct list_head *)); /* linux/mm/shmem.c */ extern int shmem_unuse(swp_entry_t entry, struct page *page); #else #define page_referenced(page) TestClearPageReferenced(page) -#define try_to_unmap(page) SWAP_FAIL +#define try_to_unmap(page, force) SWAP_FAIL #endif /* CONFIG_MMU */ /* return values of try_to_unmap */ diff -dpurN linux-2.6.5/mm/Makefile linux-2.6.5-mh/mm/Makefile --- linux-2.6.5/mm/Makefile Sun Apr 4 12:37:36 2004 +++ linux-2.6.5-mh/mm/Makefile Mon Apr 5 12:44:53 2004 @@ -12,3 +12,5 @@ obj-y := bootmem.o filemap.o mempool.o slab.o swap.o truncate.o vmscan.o $(mmu-y) obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o + +obj-$(CONFIG_MEMHOTPLUG) += memhotplug.o diff -dpurN linux-2.6.5/mm/filemap.c linux-2.6.5-mh/mm/filemap.c --- linux-2.6.5/mm/filemap.c Sun Apr 4 12:36:55 2004 +++ linux-2.6.5-mh/mm/filemap.c Mon Apr 5 12:44:53 2004 @@ -248,7 +248,8 @@ EXPORT_SYMBOL(filemap_fdatawait); int add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t offset, int gfp_mask) { - int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); + int error = radix_tree_preload((gfp_mask & ~GFP_ZONEMASK) | + ((gfp_mask & GFP_ZONEMASK) == __GFP_DMA ? __GFP_DMA : 0)); if (error == 0) { page_cache_get(page); @@ -455,6 +456,7 @@ repeat: page_cache_release(page); goto repeat; } + BUG_ON(PageAgain(page)); } } spin_unlock(&mapping->page_lock); @@ -679,6 +681,8 @@ page_not_up_to_date: goto page_ok; } + BUG_ON(PageAgain(page)); + readpage: /* ... and start the actual read. The read will unlock the page. */ error = mapping->a_ops->readpage(filp, page); @@ -1135,6 +1139,8 @@ page_not_uptodate: goto success; } + BUG_ON(PageAgain(page)); + if (!mapping->a_ops->readpage(file, page)) { wait_on_page_locked(page); if (PageUptodate(page)) @@ -1243,6 +1249,8 @@ page_not_uptodate: goto success; } + BUG_ON(PageAgain(page)); + if (!mapping->a_ops->readpage(file, page)) { wait_on_page_locked(page); if (PageUptodate(page)) @@ -1451,6 +1459,8 @@ retry: unlock_page(page); goto out; } + BUG_ON(PageAgain(page)); + err = filler(data, page); if (err < 0) { page_cache_release(page); diff -dpurN linux-2.6.5/mm/memory.c linux-2.6.5-mh/mm/memory.c --- linux-2.6.5/mm/memory.c Sun Apr 4 12:36:58 2004 +++ linux-2.6.5-mh/mm/memory.c Mon Apr 5 12:44:53 2004 @@ -1248,6 +1248,7 @@ static int do_swap_page(struct mm_struct pte_unmap(page_table); spin_unlock(&mm->page_table_lock); +again: page = lookup_swap_cache(entry); if (!page) { swapin_readahead(entry); @@ -1280,6 +1281,14 @@ static int do_swap_page(struct mm_struct goto out; } lock_page(page); + if (page->mapping == NULL) { + BUG_ON(! PageAgain(page)); + unlock_page(page); + page_cache_release(page); + pte_chain_free(pte_chain); + goto again; + } + BUG_ON(PageAgain(page)); /* * Back out if somebody else faulted in this pte while we diff -dpurN linux-2.6.5/mm/page_alloc.c linux-2.6.5-mh/mm/page_alloc.c --- linux-2.6.5/mm/page_alloc.c Sun Apr 4 12:36:17 2004 +++ linux-2.6.5-mh/mm/page_alloc.c Tue Apr 6 13:27:58 2004 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -220,6 +221,7 @@ static inline void free_pages_check(cons 1 << PG_active | 1 << PG_reclaim | 1 << PG_slab | + 1 << PG_again | 1 << PG_writeback ))) bad_page(function, page); if (PageDirty(page)) @@ -327,12 +329,13 @@ static void prep_new_page(struct page *p 1 << PG_active | 1 << PG_dirty | 1 << PG_reclaim | + 1 << PG_again | 1 << PG_writeback ))) bad_page(__FUNCTION__, page); page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1 | - 1 << PG_checked | 1 << PG_mappedtodisk); + 1 << PG_checked | 1 << PG_mappedtodisk | 1 << PG_again); page->private = 0; set_page_refs(page, order); } @@ -390,7 +393,7 @@ static int rmqueue_bulk(struct zone *zon return allocated; } -#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) +#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_MEMHOTPLUG) static void __drain_pages(unsigned int cpu) { struct zone *zone; @@ -433,7 +436,9 @@ int is_head_of_free_region(struct page * spin_unlock_irqrestore(&zone->lock, flags); return 0; } +#endif +#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_MEMHOTPLUG) /* * Spill all of this CPU's per-cpu pages back into the buddy allocator. */ @@ -1106,13 +1111,21 @@ void show_free_areas(void) /* * Builds allocation fallback zone lists. */ -static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k) +static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k) { + + if (! pgdat->enabled) + return j; + if (k != ZONE_HOTREMOVABLE && + pgdat->removable) + return j; + switch (k) { struct zone *zone; default: BUG(); case ZONE_HIGHMEM: + case ZONE_HOTREMOVABLE: zone = pgdat->node_zones + ZONE_HIGHMEM; if (zone->present_pages) { #ifndef CONFIG_HIGHMEM @@ -1239,24 +1252,48 @@ static void __init build_zonelists(pg_da #else /* CONFIG_NUMA */ -static void __init build_zonelists(pg_data_t *pgdat) +static void build_zonelists(pg_data_t *pgdat) { int i, j, k, node, local_node; + int hotremovable; +#ifdef CONFIG_MEMHOTPLUG + struct zone *zone; +#endif local_node = pgdat->node_id; - for (i = 0; i < MAX_NR_ZONES; i++) { + for (i = 0; i < MAX_NR_ZONELISTS; i++) { struct zonelist *zonelist; zonelist = pgdat->node_zonelists + i; - memset(zonelist, 0, sizeof(*zonelist)); + /* memset(zonelist, 0, sizeof(*zonelist)); */ j = 0; k = ZONE_NORMAL; - if (i & __GFP_HIGHMEM) + hotremovable = 0; + switch (i) { + default: + BUG(); + return; + case 0: + k = ZONE_NORMAL; + break; + case __GFP_HIGHMEM: k = ZONE_HIGHMEM; - if (i & __GFP_DMA) + break; + case __GFP_DMA: k = ZONE_DMA; + break; + case __GFP_HOTREMOVABLE: +#ifdef CONFIG_MEMHOTPLUG + k = ZONE_HIGHMEM; +#else + k = ZONE_HOTREMOVABLE; +#endif + hotremovable = 1; + break; + } +#ifndef CONFIG_MEMHOTPLUG j = build_zonelists_node(pgdat, zonelist, j, k); /* * Now we build the zonelist so that it contains the zones @@ -1267,22 +1304,59 @@ static void __init build_zonelists(pg_da * node N+1 (modulo N) */ for (node = local_node + 1; node < numnodes; node++) - j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + j = build_zonelists_node(NODE_DATA(node), + zonelist, j, k); for (node = 0; node < local_node; node++) - j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); - - zonelist->zones[j++] = NULL; + j = build_zonelists_node(NODE_DATA(node), + zonelist, j, k); +#else + while (hotremovable >= 0) { + for(; k >= 0; k--) { + zone = pgdat->node_zones + k; + for (node = local_node; ;) { + if (NODE_DATA(node) == NULL || + ! NODE_DATA(node)->enabled || + (!! NODE_DATA(node)->removable) != + (!! hotremovable)) + goto next; + zone = NODE_DATA(node)->node_zones + k; + if (zone->present_pages) + zonelist->zones[j++] = zone; + next: + node = (node + 1) % numnodes; + if (node == local_node) + break; + } + } + if (hotremovable) { + /* place non-hotremovable after hotremovable */ + k = ZONE_HIGHMEM; + } + hotremovable--; + } +#endif + BUG_ON(j > sizeof(zonelist->zones) / + sizeof(zonelist->zones[0]) - 1); + for(; j < sizeof(zonelist->zones) / + sizeof(zonelist->zones[0]); j++) + zonelist->zones[j] = NULL; } } #endif /* CONFIG_NUMA */ -void __init build_all_zonelists(void) +#ifdef CONFIG_MEMHOTPLUG +void +#else +void __init +#endif +build_all_zonelists(void) { int i; for(i = 0 ; i < numnodes ; i++) - build_zonelists(NODE_DATA(i)); + if (NODE_DATA(i) != NULL) + build_zonelists(NODE_DATA(i)); printk("Built %i zonelists\n", numnodes); } @@ -1354,7 +1428,7 @@ static void __init calculate_zone_totalp * up by free_all_bootmem() once the early boot process is * done. Non-atomic initialization, single-pass. */ -void __init memmap_init_zone(struct page *start, unsigned long size, int nid, +void memmap_init_zone(struct page *start, unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { struct page *page; @@ -1392,10 +1466,13 @@ static void __init free_area_init_core(s int cpu, nid = pgdat->node_id; struct page *lmem_map = pgdat->node_mem_map; unsigned long zone_start_pfn = pgdat->node_start_pfn; +#ifdef CONFIG_MEMHOTPLUG + int cold = ! nid; +#endif pgdat->nr_zones = 0; init_waitqueue_head(&pgdat->kswapd_wait); - + for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize; @@ -1465,6 +1542,13 @@ static void __init free_area_init_core(s zone->wait_table_size = wait_table_size(size); zone->wait_table_bits = wait_table_bits(zone->wait_table_size); +#ifdef CONFIG_MEMHOTPLUG + if (! cold) + zone->wait_table = (wait_queue_head_t *) + kmalloc(zone->wait_table_size + * sizeof(wait_queue_head_t), GFP_KERNEL); + else +#endif zone->wait_table = (wait_queue_head_t *) alloc_bootmem_node(pgdat, zone->wait_table_size * sizeof(wait_queue_head_t)); @@ -1519,6 +1603,13 @@ static void __init free_area_init_core(s */ bitmap_size = (size-1) >> (i+4); bitmap_size = LONG_ALIGN(bitmap_size+1); +#ifdef CONFIG_MEMHOTPLUG + if (! cold) { + zone->free_area[i].map = + (unsigned long *)kmalloc(bitmap_size, GFP_KERNEL); + memset(zone->free_area[i].map, 0, bitmap_size); + } else +#endif zone->free_area[i].map = (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size); } @@ -1749,7 +1840,7 @@ void __init page_alloc_init(void) * that the pages_{min,low,high} values for each zone are set correctly * with respect to min_free_kbytes. */ -static void setup_per_zone_pages_min(void) +void setup_per_zone_pages_min(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; diff -dpurN linux-2.6.5/mm/rmap.c linux-2.6.5-mh/mm/rmap.c --- linux-2.6.5/mm/rmap.c Sun Apr 4 12:38:16 2004 +++ linux-2.6.5-mh/mm/rmap.c Mon Apr 5 12:44:53 2004 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -293,13 +294,18 @@ out_unlock: * pte_chain_lock shrink_list() * mm->page_table_lock try_to_unmap_one(), trylock */ -static int FASTCALL(try_to_unmap_one(struct page *, pte_addr_t)); -static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr) +static int FASTCALL(try_to_unmap_one(struct page *, pte_addr_t, + struct list_head *)); +static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr, + struct list_head *force) { pte_t *ptep = rmap_ptep_map(paddr); unsigned long address = ptep_to_address(ptep); struct mm_struct * mm = ptep_to_mm(ptep); struct vm_area_struct * vma; +#ifdef CONFIG_MEMHOTPLUG + struct page_va_list *vlist; +#endif pte_t pte; int ret; @@ -325,8 +331,16 @@ static int fastcall try_to_unmap_one(str /* The page is mlock()d, we cannot swap it out. */ if (vma->vm_flags & VM_LOCKED) { - ret = SWAP_FAIL; - goto out_unlock; + if (force == NULL) { + ret = SWAP_FAIL; + goto out_unlock; + } +#ifdef CONFIG_MEMHOTPLUG + vlist = kmalloc(sizeof(struct page_va_list), GFP_KERNEL); + vlist->mm = mm; + vlist->addr = address; + list_add(&vlist->list, force); +#endif } /* Nuke the page table entry. */ @@ -383,7 +397,7 @@ out_unlock: * SWAP_AGAIN - we missed a trylock, try again later * SWAP_FAIL - the page is unswappable */ -int fastcall try_to_unmap(struct page * page) +int fastcall try_to_unmap(struct page * page, struct list_head *force) { struct pte_chain *pc, *next_pc, *start; int ret = SWAP_SUCCESS; @@ -399,7 +413,7 @@ int fastcall try_to_unmap(struct page * BUG(); if (PageDirect(page)) { - ret = try_to_unmap_one(page, page->pte.direct); + ret = try_to_unmap_one(page, page->pte.direct, force); if (ret == SWAP_SUCCESS) { if (page_test_and_clear_dirty(page)) set_page_dirty(page); @@ -420,7 +434,7 @@ int fastcall try_to_unmap(struct page * for (i = pte_chain_idx(pc); i < NRPTE; i++) { pte_addr_t pte_paddr = pc->ptes[i]; - switch (try_to_unmap_one(page, pte_paddr)) { + switch (try_to_unmap_one(page, pte_paddr, force)) { case SWAP_SUCCESS: /* * Release a slot. If we're releasing the diff -dpurN linux-2.6.5/mm/swap_state.c linux-2.6.5-mh/mm/swap_state.c --- linux-2.6.5/mm/swap_state.c Sun Apr 4 12:36:57 2004 +++ linux-2.6.5-mh/mm/swap_state.c Mon Apr 5 12:44:53 2004 @@ -234,12 +234,21 @@ int move_from_swap_cache(struct page *pa spin_lock(&swapper_space.page_lock); spin_lock(&mapping->page_lock); + if (radix_tree_lookup(&page->mapping->page_tree, page->index) + != page) { + /* remap in progress */ + printk("move_from_swap_cache: under remap %p\n", page); + err = -EAGAIN; + goto out; + } + err = radix_tree_insert(&mapping->page_tree, index, page); if (!err) { __delete_from_swap_cache(page); ___add_to_page_cache(page, mapping, index); } +out: spin_unlock(&mapping->page_lock); spin_unlock(&swapper_space.page_lock); diff -dpurN linux-2.6.5/mm/swapfile.c linux-2.6.5-mh/mm/swapfile.c --- linux-2.6.5/mm/swapfile.c Sun Apr 4 12:36:26 2004 +++ linux-2.6.5-mh/mm/swapfile.c Mon Apr 5 12:44:53 2004 @@ -607,6 +607,7 @@ static int try_to_unuse(unsigned int typ */ swap_map = &si->swap_map[i]; entry = swp_entry(type, i); + again: page = read_swap_cache_async(entry); if (!page) { /* @@ -641,6 +642,13 @@ static int try_to_unuse(unsigned int typ wait_on_page_locked(page); wait_on_page_writeback(page); lock_page(page); + if (page->mapping != &swapper_space) { + BUG_ON(! PageAgain(page)); + unlock_page(page); + page_cache_release(page); + goto again; + } + BUG_ON(PageAgain(page)); wait_on_page_writeback(page); /* @@ -749,6 +757,7 @@ static int try_to_unuse(unsigned int typ swap_writepage(page, &wbc); lock_page(page); + BUG_ON(PageAgain(page)); wait_on_page_writeback(page); } if (PageSwapCache(page)) { diff -dpurN linux-2.6.5/mm/truncate.c linux-2.6.5-mh/mm/truncate.c --- linux-2.6.5/mm/truncate.c Sun Apr 4 12:38:18 2004 +++ linux-2.6.5-mh/mm/truncate.c Mon Apr 5 12:44:53 2004 @@ -132,6 +132,8 @@ void truncate_inode_pages(struct address next++; if (TestSetPageLocked(page)) continue; + /* no PageAgain(page) check; page->mapping check + * is done in truncate_complete_page */ if (PageWriteback(page)) { unlock_page(page); continue; @@ -165,6 +167,24 @@ void truncate_inode_pages(struct address struct page *page = pvec.pages[i]; lock_page(page); + if (page->mapping == NULL) { + /* XXX Is page->index still valid? */ + unsigned long index = page->index; + int again = PageAgain(page); + + unlock_page(page); + put_page(page); + page = find_lock_page(mapping, index); + if (page == NULL) { + BUG_ON(again); + /* XXX */ + if (page->index > next) + next = page->index; + next++; + } + BUG_ON(! again); + pvec.pages[i] = page; + } wait_on_page_writeback(page); if (page->index > next) next = page->index; @@ -257,14 +277,29 @@ void invalidate_inode_pages2(struct addr struct page *page = pvec.pages[i]; lock_page(page); - if (page->mapping == mapping) { /* truncate race? */ - wait_on_page_writeback(page); - next = page->index + 1; - if (page_mapped(page)) - clear_page_dirty(page); - else - invalidate_complete_page(mapping, page); + while (page->mapping != mapping) { + struct page *newpage; + unsigned long index = page->index; + + BUG_ON(page->mapping != NULL); + + unlock_page(page); + newpage = find_lock_page(mapping, index); + if (page == newpage) { + put_page(page); + break; + } + BUG_ON(! PageAgain(page)); + pvec.pages[i] = newpage; + put_page(page); + page = newpage; } + wait_on_page_writeback(page); + next = page->index + 1; + if (page_mapped(page)) + clear_page_dirty(page); + else + invalidate_complete_page(mapping, page); unlock_page(page); } pagevec_release(&pvec); diff -dpurN linux-2.6.5/mm/vmscan.c linux-2.6.5-mh/mm/vmscan.c --- linux-2.6.5/mm/vmscan.c Sun Apr 4 12:36:24 2004 +++ linux-2.6.5-mh/mm/vmscan.c Mon Apr 5 12:44:53 2004 @@ -199,11 +199,6 @@ static inline int page_mapping_inuse(str return 0; } -static inline int is_page_cache_freeable(struct page *page) -{ - return page_count(page) - !!PagePrivate(page) == 2; -} - static int may_write_to_queue(struct backing_dev_info *bdi) { if (current_is_kswapd()) @@ -311,7 +306,7 @@ shrink_list(struct list_head *page_list, * processes. Try to unmap it here. */ if (page_mapped(page) && mapping) { - switch (try_to_unmap(page)) { + switch (try_to_unmap(page, NULL)) { case SWAP_FAIL: pte_chain_unlock(page); goto activate_locked; @@ -1140,4 +1140,14 @@ static int __init kswapd_init(void) return 0; } +#ifdef CONFIG_MEMHOTPLUG +void +kswapd_start_one(pg_data_t *pgdat) +{ + pgdat->kswapd + = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL)); + total_memory = nr_free_pagecache_pages(); +} +#endif + module_init(kswapd_init) diff -dpurN linux-2.6.5/include/linux/memhotplug.h linux-2.6.5-mh/include/linux/memhotplug.h --- linux-2.6.5/include/linux/memhotplug.h Thu Jan 1 09:00:00 1970 +++ linux-2.6.5-mh/include/linux/memhotplug.h Mon Apr 5 12:44:53 2004 @@ -0,0 +1,32 @@ +#ifndef _LINUX_MEMHOTPLUG_H +#define _LINUX_MEMHOTPLUG_H + +#include +#include + +#ifdef __KERNEL__ + +struct page_va_list { + struct mm_struct *mm; + unsigned long addr; + struct list_head list; +}; + +struct remap_operations { + struct page * (*remap_alloc_page)(int); + int (*remap_delete_page)(struct page *); + int (*remap_copy_page)(struct page *, struct page *); + int (*remap_lru_add_page)(struct page *); + int (*remap_release_buffers)(struct page *); + int (*remap_prepare)(struct page *page, int fastmode); + int (*remap_stick_page)(struct list_head *vlist); +}; + +extern int remapd(void *p); +extern int remap_onepage(struct page *, int, int, struct remap_operations *); +extern int remap_onepage_normal(struct page *, int, int); + +#define REMAP_ANYNODE (-1) + +#endif /* __KERNEL__ */ +#endif /* _LINUX_MEMHOTPLUG_H */ diff -dpurN linux-2.6.5/mm/memhotplug.c linux-2.6.5-mh/mm/memhotplug.c --- linux-2.6.5/mm/memhotplug.c Thu Jan 1 09:00:00 1970 +++ linux-2.6.5-mh/mm/memhotplug.c Mon Apr 5 12:44:53 2004 @@ -0,0 +1,699 @@ +/* + * linux/mm/memhotplug.c + * + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Support of memory hotplug, Iwamoto + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_KDB +#include +#endif + +static void +print_buffer(struct page* page) +{ + struct address_space* mapping = page->mapping; + struct buffer_head *bh, *head; + + spin_lock(&mapping->private_lock); + bh = head = page_buffers(page); + printk("buffers:"); + do { + printk(" %lx %d", bh->b_state, atomic_read(&bh->b_count)); + + bh = bh->b_this_page; + } while (bh != head); + printk("\n"); + spin_unlock(&mapping->private_lock); +} + +static int +stick_mlocked_page(struct list_head *vlist) +{ + struct page_va_list *v1; + struct vm_area_struct *vma; + int error; + + while(!list_empty(vlist)) { + v1 = list_entry(vlist->next, struct page_va_list, list); + list_del(&v1->list); + vma = find_vma(v1->mm, v1->addr); + BUG_ON(! (vma->vm_flags & VM_LOCKED)); + error = get_user_pages(current, v1->mm, v1->addr, PAGE_SIZE, + (vma->vm_flags & VM_WRITE) != 0, 0, NULL, NULL); + BUG_ON(error <= 0); + kfree(v1); + } + return 0; +} + +/* helper function for remap_onepage */ +#define REMAPPREP_WB 1 +#define REMAPPREP_BUFFER 2 + +/* + * Try to free buffers if "page" has them. + */ +static int +remap_preparepage(struct page *page, int fastmode) +{ + struct address_space *mapping; + int waitcnt = fastmode ? 0 : 100; + + BUG_ON(! PageLocked(page)); + + mapping = page->mapping; + + if (! PagePrivate(page) && PageWriteback(page) && + page->mapping != &swapper_space) { + printk("remap_preparepage: mapping %p page %p\n", + page->mapping, page); + return -REMAPPREP_WB; + } + + while (PageWriteback(page)) { + if (!waitcnt) + return -REMAPPREP_WB; + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(10); + __set_current_state(TASK_RUNNING); + waitcnt--; + } + if (PagePrivate(page)) { + /* XXX copied from shrink_list() */ + if (PageDirty(page) && + is_page_cache_freeable(page) && + mapping != NULL && + mapping->a_ops->writepage != NULL) { + spin_lock(&mapping->page_lock); + if (test_clear_page_dirty(page)) { + int res; + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = SWAP_CLUSTER_MAX, + .nonblocking = 1, + .for_reclaim = 1, + }; + + list_move(&page->list, &mapping->locked_pages); + spin_unlock(&mapping->page_lock); + + SetPageReclaim(page); + res = mapping->a_ops->writepage(page, &wbc); + + if (res == WRITEPAGE_ACTIVATE) { + ClearPageReclaim(page); + return -REMAPPREP_WB; + } + if (!PageWriteback(page)) { + /* synchronous write or broken a_ops? */ + ClearPageReclaim(page); + } + lock_page(page); + mapping = page->mapping; + if (! PagePrivate(page)) + return 0; + } else + spin_unlock(&mapping->page_lock); + } + + while (1) { + if (try_to_release_page(page, GFP_KERNEL)) + break; + if (! waitcnt) + return -REMAPPREP_BUFFER; + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(10); + __set_current_state(TASK_RUNNING); + waitcnt--; + if (! waitcnt) + print_buffer(page); + } + } + return 0; +} + +/* + * Just assign swap space to a anonymous page if it doesn't have yet, + * so that the page can be handled like a page in the page cache + * since it in the swap cache. + */ +static struct address_space * +make_page_mapped(struct page *page) +{ + if (! page_mapped(page)) { + if (page_count(page) > 1) + printk("page %p not mapped: count %d\n", + page, page_count(page)); + return NULL; + } + /* The page is an anon page. Allocate its swap entry. */ + if (!add_to_swap(page)) + return NULL; + return page->mapping; +} + +/* + * Replace "page" with "newpage" on the radix tree. After that, all + * new access to "page" will be redirected to "newpage" and it + * will be blocked until remapping has been done. + */ +static int +radix_tree_replace_pages(struct page *page, struct page *newpage, + struct address_space *mapping) +{ + if (radix_tree_preload(GFP_KERNEL)) + return -1; + + if (PagePrivate(page)) /* XXX */ + BUG(); + + /* should {__add_to,__remove_from}_page_cache be used instead? */ + spin_lock(&mapping->page_lock); + if (mapping != page->mapping) + printk("mapping changed %p -> %p, page %p\n", + mapping, page->mapping, page); + if (radix_tree_delete(&mapping->page_tree, page->index) == NULL) { + /* Page truncated. */ + spin_unlock(&mapping->page_lock); + radix_tree_preload_end(); + return -1; + } + /* Don't __put_page(page) here. Truncate may be in progress. */ + newpage->flags |= page->flags & ~(1 << PG_uptodate) & + ~(1 << PG_highmem) & ~(1 << PG_chainlock) & + ~(1 << PG_direct) & ~(~0UL << NODEZONE_SHIFT); + + /* list_del(&page->list); XXX */ + radix_tree_insert(&mapping->page_tree, page->index, newpage); + page_cache_get(newpage); + newpage->mapping = mapping; + newpage->index = page->index; + spin_unlock(&mapping->page_lock); + radix_tree_preload_end(); + return 0; +} + +/* + * Remove all PTE mappings to "page". + */ +static int +unmap_page(struct page *page, struct list_head *vlist) +{ + int error; + pte_chain_lock(page); + if (page_mapped(page)) { + while ((error = try_to_unmap(page, vlist)) == SWAP_AGAIN) { + pte_chain_unlock(page); + __set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); + __set_current_state(TASK_RUNNING); + pte_chain_lock(page); + } + if (error == SWAP_FAIL) { + pte_chain_unlock(page); /* XXX */ + /* either during mremap or mlocked */ + return -1; + } + } + pte_chain_unlock(page); + return 0; +} + +/* + * Wait for "page" to become free. Almost same as waiting for its + * page count to drop to 2, but truncated pages are special. + */ +static int +wait_on_page_freeable(struct page *page, struct address_space *mapping, + struct list_head *vlist, int truncated, + int nretry, struct remap_operations *ops) +{ + while ((truncated + page_count(page)) > 2) { + if (nretry <= 0) + return -1; + /* no lock needed while waiting page count */ + unlock_page(page); + + while ((truncated + page_count(page)) > 2) { + nretry--; + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + if ((nretry % 5000) == 0) { + printk("remap_onepage: still waiting on %p %d\n", page, nretry); + break; + } + if (PagePrivate(page) || page_mapped(page)) + break; /* see below */ + } + + lock_page(page); + BUG_ON(page_count(page) == 0); + if (mapping != page->mapping && page->mapping != NULL) + printk("mapping changed %p -> %p, page %p\n", + mapping, page->mapping, page); + if (PagePrivate(page)) + ops->remap_release_buffers(page); + unmap_page(page, vlist); + } + return nretry; +} + +/* + * A file which "page" belongs to has been truncated. Free both pages. + */ +static void +free_truncated_pages(struct page *page, struct page *newpage, + struct address_space *mapping) +{ + void *p; + /* mapping->page_lock must be held. */ + p = radix_tree_lookup(&mapping->page_tree, newpage->index); + if (p != NULL) { + /* new cache page appeared after truncation */ + printk("page %p newpage %p radix %p\n", + page, newpage, p); + BUG_ON(p == newpage); + } + BUG_ON(page->mapping != NULL); + put_page(newpage); + if (page_count(newpage) != 1) { + printk("newpage count %d != 1, %p\n", + page_count(newpage), newpage); + BUG(); + } + /* No need to do page->list. remove_from_page_cache did. */ + newpage->mapping = page->mapping = NULL; + spin_unlock(&mapping->page_lock); + ClearPageActive(page); + ClearPageActive(newpage); + unlock_page(page); + unlock_page(newpage); + put_page(page); + put_page(newpage); +} + +static inline int +is_page_truncated(struct page *page, struct page *newpage, + struct address_space *mapping) +{ + void *p; + spin_lock(&mapping->page_lock); + if (page_count(page) == 1) { + /* page has been truncated. */ + return 0; + } + p = radix_tree_lookup(&mapping->page_tree, newpage->index); + spin_unlock(&mapping->page_lock); + if (p == NULL) { + BUG_ON(page->mapping != NULL); + return -1; + } + return 1; +} + +/* + * Replace "page" with "newpage" on the list of clean/dirty pages. + */ +static void +remap_exchange_pages(struct page *page, struct page *newpage, + struct address_space *mapping) +{ + spin_lock(&mapping->page_lock); + list_del(&page->list); /* XXX */ + if (PageDirty(page)) { + SetPageDirty(newpage); + list_add(&newpage->list, &mapping->dirty_pages); + } else + list_add(&newpage->list, &mapping->clean_pages); + page->mapping = NULL; + spin_unlock(&mapping->page_lock); + unlock_page(page); + + ClearPageActive(page); + __put_page(page); + + /* We are done. Finish and let the waiters run. */ + SetPageUptodate(newpage); +} + +/* + * Roll back all remapping operations. + */ +static int +radix_tree_rewind_page(struct page *page, struct page *newpage, + struct address_space *mapping) +{ + int waitcnt; + /* + * Try to unwind by notifying waiters. If someone misbehaves, + * we die. + */ + if (radix_tree_preload(GFP_KERNEL)) + BUG(); + /* should {__add_to,__remove_from}_page_cache be used instead? */ + spin_lock(&mapping->page_lock); + /* list_del(&newpage->list); */ + if (radix_tree_delete(&mapping->page_tree, page->index) == NULL) + /* Hold extra count to handle truncate */ + page_cache_get(newpage); + radix_tree_insert(&mapping->page_tree, page->index, page); + /* no page_cache_get(page); needed */ + radix_tree_preload_end(); + spin_unlock(&mapping->page_lock); + + SetPageAgain(newpage); + /* XXX unmap needed? No, it shouldn't. Handled by fault handlers. */ + unlock_page(newpage); + + waitcnt = 1; + for(; page_count(newpage) > 2; waitcnt++) { + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + if ((waitcnt % 10000) == 0) { + printk("You are hosed.\n"); + printk("newpage %p\n", newpage); + BUG(); + } + } + BUG_ON(PageUptodate(newpage)); + ClearPageDirty(newpage); + ClearPageActive(newpage); + spin_lock(&mapping->page_lock); + newpage->mapping = NULL; + if (page_count(newpage) == 1) { + printk("newpage %p truncated. page %p\n", newpage, page); + BUG(); + } + spin_unlock(&mapping->page_lock); + unlock_page(page); + BUG_ON(page_count(newpage) != 2); + ClearPageAgain(newpage); + __put_page(newpage); + return 1; +} + +/* + * Allocate a new page from specified node. + */ +static struct page * +remap_alloc_page(int nid) +{ + if (nid == REMAP_ANYNODE) + return alloc_page(GFP_HIGHUSER); + else + return alloc_pages_node(nid, GFP_HIGHUSER, 0); +} + +static int +remap_delete_page(struct page *page) +{ + BUG_ON(page_count(page) != 1); + put_page(page); + return 0; +} + +static int +remap_copy_page(struct page *to, struct page *from) +{ + copy_highpage(to, from); + return 0; +} + +static int +remap_lru_add_page(struct page *page) +{ +#if 1 + struct zone *zone; + /* XXX locking order correct? */ + zone = page_zone(page); + spin_lock_irq(&zone->lru_lock); + if (PageActive(page)) { + list_add(&page->lru, &zone->active_list); + zone->nr_active++; + } else { + list_add(&page->lru, &zone->inactive_list); + zone->nr_inactive++; + } + SetPageLRU(page); + spin_unlock_irq(&zone->lru_lock); +#endif +#if 0 + if (PageActive(page)) + lru_cache_add_active(page); + else + lru_cache_add(page); +#endif + return 0; +} + +static int +remap_release_buffer(struct page *page) +{ + try_to_release_page(page, GFP_KERNEL); + return 0; +} + +static struct remap_operations remap_ops = { + .remap_alloc_page = remap_alloc_page, + .remap_delete_page = remap_delete_page, + .remap_copy_page = remap_copy_page, + .remap_lru_add_page = remap_lru_add_page, + .remap_release_buffers = remap_release_buffer, + .remap_prepare = remap_preparepage, + .remap_stick_page = stick_mlocked_page +}; + +/* + * Try to remap a page. Returns non-zero on failure. + */ +int remap_onepage(struct page *page, int nodeid, int fastmode, + struct remap_operations *ops) +{ + struct page *newpage; + struct address_space *mapping; + LIST_HEAD(vlist); + int truncated = 0; + int nretry = fastmode ? HZ/50: HZ*10; /* XXXX */ + + if ((newpage = ops->remap_alloc_page(nodeid)) == NULL) + return -ENOMEM; + if (TestSetPageLocked(newpage)) + BUG(); + lock_page(page); + mapping = page->mapping; + + if (ops->remap_prepare && ops->remap_prepare(page, fastmode)) + goto radixfail; + if (mapping == NULL && (mapping = make_page_mapped(page)) == NULL) + goto radixfail; + if (radix_tree_replace_pages(page, newpage, mapping)) + goto radixfail; + if (unmap_page(page, &vlist)) + goto unmapfail; + if (PagePrivate(page)) + printk("buffer reappeared\n"); +wait_again: + if ((nretry = wait_on_page_freeable(page, mapping, &vlist, truncated, nretry, ops)) < 0) + goto unmapfail; + + if (PageReclaim(page) || PageWriteback(page) || PagePrivate(page)) +#ifdef CONFIG_KDB + KDB_ENTER(); +#else + BUG(); +#endif + switch (is_page_truncated(page, newpage, mapping)) { + case 0: + /* has been truncated */ + free_truncated_pages(page, newpage, mapping); + return 0; + case -1: + /* being truncated */ + truncated = 1; + BUG_ON(page->mapping != NULL); + goto wait_again; + default: + /* through */ + } + + BUG_ON(mapping != page->mapping); + + ops->remap_copy_page(newpage, page); + remap_exchange_pages(page, newpage, mapping); + if (ops->remap_lru_add_page) + ops->remap_lru_add_page(newpage); + ops->remap_delete_page(page); + + /* + * Wake up all waiters which are waiting for completion + * of remapping operations. + */ + unlock_page(newpage); + + if (ops->remap_stick_page) + ops->remap_stick_page(&vlist); + page_cache_release(newpage); + return 0; + +unmapfail: + radix_tree_rewind_page(page, newpage, mapping); + if (ops->remap_stick_page) + ops->remap_stick_page(&vlist); + ops->remap_delete_page(newpage); + return 1; + +radixfail: + unlock_page(page); + unlock_page(newpage); + if (ops->remap_stick_page) + ops->remap_stick_page(&vlist); + ops->remap_delete_page(newpage); + return 1; +} + +int remap_onepage_normal(struct page *page, int nodeid, int fastmode) +{ + return remap_onepage(page, nodeid, fastmode, &remap_ops); +} + +static struct work_struct lru_drain_wq[NR_CPUS]; +static void +lru_drain_schedule(void *p) +{ + int cpu = get_cpu(); + + schedule_work(&lru_drain_wq[cpu]); + put_cpu(); +} + +atomic_t remapd_count; +int remapd(void *p) +{ + struct zone *zone = p; + struct page *page, *page1; + struct list_head *l; + int active, i, nr_failed = 0; + int fastmode = 100; + LIST_HEAD(failedp); + + daemonize("remap%d", zone->zone_start_pfn); + if (atomic_read(&remapd_count) > 0) { + printk("remapd already running\n"); + return 0; + } + atomic_inc(&remapd_count); + on_each_cpu(lru_drain_schedule, NULL, 1, 1); + while(nr_failed < 100) { + spin_lock_irq(&zone->lru_lock); + for(active = 0; active < 2; active++) { + l = active ? &zone->active_list : + &zone->inactive_list; + for(i = 0; ! list_empty(l) && i < 10; i++) { + page = list_entry(l->prev, struct page, lru); + if (fastmode && PageLocked(page)) { + page1 = page; + while (fastmode && PageLocked(page)) { + page = + list_entry(page->lru.prev, + struct page, lru); + fastmode--; + if (&page->lru == l) { + /* scanned the whole + list */ + page = page1; + break; + } + if (page == page1) + BUG(); + } + if (! fastmode) { + printk("used up fastmode\n"); + page = page1; + } + } + if (! TestClearPageLRU(page)) + BUG(); + list_del(&page->lru); + if (page_count(page) == 0) { + /* the page is in pagevec_release(); + shrink_cache says so. */ + SetPageLRU(page); + list_add(&page->lru, l); + continue; + } + if (active) + zone->nr_active--; + else + zone->nr_inactive--; + page_cache_get(page); + spin_unlock_irq(&zone->lru_lock); + goto got_page; + } + } + spin_unlock_irq(&zone->lru_lock); + break; + + got_page: + if (remap_onepage(page, REMAP_ANYNODE, fastmode, &remap_ops)) { + nr_failed++; + if (fastmode) + fastmode--; + list_add(&page->lru, &failedp); + } + } + if (list_empty(&failedp)) + goto out; + + while (! list_empty(&failedp)) { + page = list_entry(failedp.prev, struct page, lru); + list_del(&page->lru); + if (! TestSetPageLocked(page)) { + if (remap_preparepage(page, 10 /* XXX */)) { + unlock_page(page); + } else { + ClearPageLocked(page); /* XXX */ + if (! remap_onepage(page, REMAP_ANYNODE, 0, &remap_ops)) + continue; + } + } + spin_lock_irq(&zone->lru_lock); + if (PageActive(page)) { + list_add(&page->lru, &zone->active_list); + zone->nr_active++; + } else { + list_add(&page->lru, &zone->inactive_list); + zone->nr_inactive++; + } + if (TestSetPageLRU(page)) + BUG(); + spin_unlock_irq(&zone->lru_lock); + page_cache_release(page); + } +out: + atomic_dec(&remapd_count); + return 0; +} + +static int __init remapd_init(void) +{ + int i; + + for(i = 0; i < NR_CPUS; i++) + INIT_WORK(&lru_drain_wq[i], (void (*)(void *))lru_add_drain, NULL); + return 0; +} + +module_init(remapd_init);