* [PATCH 1/5] generic __remove_pages() support
2008-03-28 0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
@ 2008-03-28 0:35 ` Badari Pulavarty
2008-03-28 2:26 ` Yasunori Goto
2008-03-28 0:37 ` [PATCH 2/5] [PPC] htab_remove_mapping() error handling Badari Pulavarty
` (3 subsequent siblings)
4 siblings, 1 reply; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-28 0:35 UTC (permalink / raw)
To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
Generic helper function to remove section mappings and sysfs entries
for the section of the memory we are removing. offline_pages() correctly
adjusted zone and marked the pages reserved.
TODO: Yasunori Goto is working on patches to freeup allocations from bootmem.
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
---
include/linux/memory_hotplug.h | 6 +++-
mm/memory_hotplug.c | 55 +++++++++++++++++++++++++++++++++++++++++
mm/sparse.c | 45 +++++++++++++++++++++++++++++++--
3 files changed, 102 insertions(+), 4 deletions(-)
Index: linux-2.6.25-rc5/mm/memory_hotplug.c
===================================================================
--- linux-2.6.25-rc5.orig/mm/memory_hotplug.c 2008-03-21 07:00:37.000000000 -0800
+++ linux-2.6.25-rc5/mm/memory_hotplug.c 2008-03-25 15:03:58.000000000 -0800
@@ -102,6 +102,25 @@ static int __add_section(struct zone *zo
return register_new_memory(__pfn_to_section(phys_start_pfn));
}
+static int __remove_section(struct zone *zone, struct mem_section *ms)
+{
+ unsigned long flags;
+ struct pglist_data *pgdat = zone->zone_pgdat;
+ int ret = -EINVAL;
+
+ if (!valid_section(ms))
+ return ret;
+
+ ret = unregister_memory_section(ms);
+ if (ret)
+ return ret;
+
+ pgdat_resize_lock(pgdat, &flags);
+ sparse_remove_one_section(zone, ms);
+ pgdat_resize_unlock(pgdat, &flags);
+ return 0;
+}
+
/*
* Reasonably generic function for adding memory. It is
* expected that archs that support memory hotplug will
@@ -135,6 +154,42 @@ int __add_pages(struct zone *zone, unsig
}
EXPORT_SYMBOL_GPL(__add_pages);
+/**
+ * __remove_pages() - remove sections of pages from a zone
+ * @zone: zone from which pages need to be removed
+ * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
+ * @nr_pages: number of pages to remove (must be multiple of section size)
+ *
+ * Generic helper function to remove section mappings and sysfs entries
+ * for the section of the memory we are removing. Caller needs to make
+ * sure that pages are marked reserved and zones are adjust properly by
+ * calling offline_pages().
+ */
+int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
+ unsigned long nr_pages)
+{
+ unsigned long i, ret = 0;
+ int sections_to_remove;
+
+ /*
+ * We can only remove entire sections
+ */
+ BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
+ BUG_ON(nr_pages % PAGES_PER_SECTION);
+
+ release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
+
+ sections_to_remove = nr_pages / PAGES_PER_SECTION;
+ for (i = 0; i < sections_to_remove; i++) {
+ unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+ ret = __remove_section(zone, __pfn_to_section(pfn));
+ if (ret)
+ break;
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__remove_pages);
+
static void grow_zone_span(struct zone *zone,
unsigned long start_pfn, unsigned long end_pfn)
{
Index: linux-2.6.25-rc5/mm/sparse.c
===================================================================
--- linux-2.6.25-rc5.orig/mm/sparse.c 2008-03-21 07:00:37.000000000 -0800
+++ linux-2.6.25-rc5/mm/sparse.c 2008-03-25 13:59:51.000000000 -0800
@@ -198,12 +198,13 @@ static unsigned long sparse_encode_mem_m
}
/*
- * We need this if we ever free the mem_maps. While not implemented yet,
- * this function is included for parity with its sibling.
+ * Decode mem_map from the coded memmap
*/
-static __attribute((unused))
+static
struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
{
+ /* mask off the extra low bits of information */
+ coded_mem_map &= SECTION_MAP_MASK;
return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
}
@@ -363,6 +364,28 @@ static void __kfree_section_memmap(struc
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+static void free_section_usemap(struct page *memmap, unsigned long *usemap)
+{
+ if (!usemap)
+ return;
+
+ /*
+ * Check to see if allocation came from hot-plug-add
+ */
+ if (PageSlab(virt_to_page(usemap))) {
+ kfree(usemap);
+ if (memmap)
+ __kfree_section_memmap(memmap, PAGES_PER_SECTION);
+ return;
+ }
+
+ /*
+ * TODO: Allocations came from bootmem - how do I free up ?
+ */
+ printk(KERN_WARNING "Not freeing up allocations from bootmem "
+ "- leaking memory\n");
+}
+
/*
* returns the number of sections whose mem_maps were properly
* set. If this is <=0, then that means that the passed-in
@@ -415,4 +438,20 @@ out:
}
return ret;
}
+
+void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
+{
+ struct page *memmap = NULL;
+ unsigned long *usemap = NULL;
+
+ if (ms->section_mem_map) {
+ usemap = ms->pageblock_flags;
+ memmap = sparse_decode_mem_map(ms->section_mem_map,
+ __section_nr(ms));
+ ms->section_mem_map = 0;
+ ms->pageblock_flags = NULL;
+ }
+
+ free_section_usemap(memmap, usemap);
+}
#endif
Index: linux-2.6.25-rc5/include/linux/memory_hotplug.h
===================================================================
--- linux-2.6.25-rc5.orig/include/linux/memory_hotplug.h 2008-03-21 07:00:36.000000000 -0800
+++ linux-2.6.25-rc5/include/linux/memory_hotplug.h 2008-03-25 13:59:51.000000000 -0800
@@ -8,6 +8,7 @@
struct page;
struct zone;
struct pglist_data;
+struct mem_section;
#ifdef CONFIG_MEMORY_HOTPLUG
/*
@@ -64,9 +65,11 @@ extern int offline_pages(unsigned long,
/* reasonably generic interface to expand the physical pages in a zone */
extern int __add_pages(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages);
+extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages);
/*
- * Walk thorugh all memory which is registered as resource.
+ * Walk through all memory which is registered as resource.
* arg is (start_pfn, nr_pages, private_arg_pointer)
*/
extern int walk_memory_resource(unsigned long start_pfn,
@@ -188,5 +191,6 @@ extern int arch_add_memory(int nid, u64
extern int remove_memory(u64 start, u64 size);
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
int nr_pages);
+extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
#endif /* __LINUX_MEMORY_HOTPLUG_H */
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-28 0:35 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
@ 2008-03-28 2:26 ` Yasunori Goto
0 siblings, 0 replies; 23+ messages in thread
From: Yasunori Goto @ 2008-03-28 2:26 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, paulus, Andrew Morton
Ok. Thanks.
Acked-by: Yasunori Goto <y-goto@jp.fujitsu.com>
> Generic helper function to remove section mappings and sysfs entries
> for the section of the memory we are removing. offline_pages() correctly
> adjusted zone and marked the pages reserved.
>
> TODO: Yasunori Goto is working on patches to freeup allocations from bootmem.
>
> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
>
> ---
> include/linux/memory_hotplug.h | 6 +++-
> mm/memory_hotplug.c | 55 +++++++++++++++++++++++++++++++++++++++++
> mm/sparse.c | 45 +++++++++++++++++++++++++++++++--
> 3 files changed, 102 insertions(+), 4 deletions(-)
>
> Index: linux-2.6.25-rc5/mm/memory_hotplug.c
> ===================================================================
> --- linux-2.6.25-rc5.orig/mm/memory_hotplug.c 2008-03-21 07:00:37.000000000 -0800
> +++ linux-2.6.25-rc5/mm/memory_hotplug.c 2008-03-25 15:03:58.000000000 -0800
> @@ -102,6 +102,25 @@ static int __add_section(struct zone *zo
> return register_new_memory(__pfn_to_section(phys_start_pfn));
> }
>
> +static int __remove_section(struct zone *zone, struct mem_section *ms)
> +{
> + unsigned long flags;
> + struct pglist_data *pgdat = zone->zone_pgdat;
> + int ret = -EINVAL;
> +
> + if (!valid_section(ms))
> + return ret;
> +
> + ret = unregister_memory_section(ms);
> + if (ret)
> + return ret;
> +
> + pgdat_resize_lock(pgdat, &flags);
> + sparse_remove_one_section(zone, ms);
> + pgdat_resize_unlock(pgdat, &flags);
> + return 0;
> +}
> +
> /*
> * Reasonably generic function for adding memory. It is
> * expected that archs that support memory hotplug will
> @@ -135,6 +154,42 @@ int __add_pages(struct zone *zone, unsig
> }
> EXPORT_SYMBOL_GPL(__add_pages);
>
> +/**
> + * __remove_pages() - remove sections of pages from a zone
> + * @zone: zone from which pages need to be removed
> + * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
> + * @nr_pages: number of pages to remove (must be multiple of section size)
> + *
> + * Generic helper function to remove section mappings and sysfs entries
> + * for the section of the memory we are removing. Caller needs to make
> + * sure that pages are marked reserved and zones are adjust properly by
> + * calling offline_pages().
> + */
> +int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
> + unsigned long nr_pages)
> +{
> + unsigned long i, ret = 0;
> + int sections_to_remove;
> +
> + /*
> + * We can only remove entire sections
> + */
> + BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
> + BUG_ON(nr_pages % PAGES_PER_SECTION);
> +
> + release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
> +
> + sections_to_remove = nr_pages / PAGES_PER_SECTION;
> + for (i = 0; i < sections_to_remove; i++) {
> + unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
> + ret = __remove_section(zone, __pfn_to_section(pfn));
> + if (ret)
> + break;
> + }
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(__remove_pages);
> +
> static void grow_zone_span(struct zone *zone,
> unsigned long start_pfn, unsigned long end_pfn)
> {
> Index: linux-2.6.25-rc5/mm/sparse.c
> ===================================================================
> --- linux-2.6.25-rc5.orig/mm/sparse.c 2008-03-21 07:00:37.000000000 -0800
> +++ linux-2.6.25-rc5/mm/sparse.c 2008-03-25 13:59:51.000000000 -0800
> @@ -198,12 +198,13 @@ static unsigned long sparse_encode_mem_m
> }
>
> /*
> - * We need this if we ever free the mem_maps. While not implemented yet,
> - * this function is included for parity with its sibling.
> + * Decode mem_map from the coded memmap
> */
> -static __attribute((unused))
> +static
> struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
> {
> + /* mask off the extra low bits of information */
> + coded_mem_map &= SECTION_MAP_MASK;
> return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
> }
>
> @@ -363,6 +364,28 @@ static void __kfree_section_memmap(struc
> }
> #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>
> +static void free_section_usemap(struct page *memmap, unsigned long *usemap)
> +{
> + if (!usemap)
> + return;
> +
> + /*
> + * Check to see if allocation came from hot-plug-add
> + */
> + if (PageSlab(virt_to_page(usemap))) {
> + kfree(usemap);
> + if (memmap)
> + __kfree_section_memmap(memmap, PAGES_PER_SECTION);
> + return;
> + }
> +
> + /*
> + * TODO: Allocations came from bootmem - how do I free up ?
> + */
> + printk(KERN_WARNING "Not freeing up allocations from bootmem "
> + "- leaking memory\n");
> +}
> +
> /*
> * returns the number of sections whose mem_maps were properly
> * set. If this is <=0, then that means that the passed-in
> @@ -415,4 +438,20 @@ out:
> }
> return ret;
> }
> +
> +void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
> +{
> + struct page *memmap = NULL;
> + unsigned long *usemap = NULL;
> +
> + if (ms->section_mem_map) {
> + usemap = ms->pageblock_flags;
> + memmap = sparse_decode_mem_map(ms->section_mem_map,
> + __section_nr(ms));
> + ms->section_mem_map = 0;
> + ms->pageblock_flags = NULL;
> + }
> +
> + free_section_usemap(memmap, usemap);
> +}
> #endif
> Index: linux-2.6.25-rc5/include/linux/memory_hotplug.h
> ===================================================================
> --- linux-2.6.25-rc5.orig/include/linux/memory_hotplug.h 2008-03-21 07:00:36.000000000 -0800
> +++ linux-2.6.25-rc5/include/linux/memory_hotplug.h 2008-03-25 13:59:51.000000000 -0800
> @@ -8,6 +8,7 @@
> struct page;
> struct zone;
> struct pglist_data;
> +struct mem_section;
>
> #ifdef CONFIG_MEMORY_HOTPLUG
> /*
> @@ -64,9 +65,11 @@ extern int offline_pages(unsigned long,
> /* reasonably generic interface to expand the physical pages in a zone */
> extern int __add_pages(struct zone *zone, unsigned long start_pfn,
> unsigned long nr_pages);
> +extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
> + unsigned long nr_pages);
>
> /*
> - * Walk thorugh all memory which is registered as resource.
> + * Walk through all memory which is registered as resource.
> * arg is (start_pfn, nr_pages, private_arg_pointer)
> */
> extern int walk_memory_resource(unsigned long start_pfn,
> @@ -188,5 +191,6 @@ extern int arch_add_memory(int nid, u64
> extern int remove_memory(u64 start, u64 size);
> extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
> int nr_pages);
> +extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
>
> #endif /* __LINUX_MEMORY_HOTPLUG_H */
>
>
--
Yasunori Goto
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 2/5] [PPC] htab_remove_mapping() error handling
2008-03-28 0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
2008-03-28 0:35 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
@ 2008-03-28 0:37 ` Badari Pulavarty
2008-03-28 0:38 ` [PATCH 3/5] [PPC] hotplug memory notifications for ppc Badari Pulavarty
` (2 subsequent siblings)
4 siblings, 0 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-28 0:37 UTC (permalink / raw)
To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
If the sub-arch doesn't support hpte_removebolted(), gracefully
return failure rather than success.
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
---
arch/powerpc/mm/hash_utils_64.c | 14 +++++++++-----
include/asm-powerpc/sparsemem.h | 2 +-
2 files changed, 10 insertions(+), 6 deletions(-)
Index: linux-2.6.25-rc3/arch/powerpc/mm/hash_utils_64.c
===================================================================
--- linux-2.6.25-rc3.orig/arch/powerpc/mm/hash_utils_64.c 2008-03-05 10:14:28.000000000 -0800
+++ linux-2.6.25-rc3/arch/powerpc/mm/hash_utils_64.c 2008-03-05 10:18:55.000000000 -0800
@@ -192,7 +192,7 @@ int htab_bolt_mapping(unsigned long vsta
return ret < 0 ? ret : 0;
}
-static void htab_remove_mapping(unsigned long vstart, unsigned long vend,
+static int htab_remove_mapping(unsigned long vstart, unsigned long vend,
int psize, int ssize)
{
unsigned long vaddr;
@@ -202,12 +202,15 @@ static void htab_remove_mapping(unsigned
step = 1 << shift;
if (!ppc_md.hpte_removebolted) {
- printk("Sub-arch doesn't implement hpte_removebolted\n");
- return;
+ printk(KERN_WARNING "Sub-arch doesn't implement "
+ "hpte_removebolted\n");
+ return -EINVAL;
}
for (vaddr = vstart; vaddr < vend; vaddr += step)
ppc_md.hpte_removebolted(vaddr, psize, ssize);
+
+ return 0;
}
static int __init htab_dt_scan_seg_sizes(unsigned long node,
@@ -449,9 +452,10 @@ void create_section_mapping(unsigned lon
mmu_linear_psize, mmu_kernel_ssize));
}
-void remove_section_mapping(unsigned long start, unsigned long end)
+int remove_section_mapping(unsigned long start, unsigned long end)
{
- htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize);
+ return htab_remove_mapping(start, end, mmu_linear_psize,
+ mmu_kernel_ssize);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
Index: linux-2.6.25-rc3/include/asm-powerpc/sparsemem.h
===================================================================
--- linux-2.6.25-rc3.orig/include/asm-powerpc/sparsemem.h 2008-03-05 10:14:31.000000000 -0800
+++ linux-2.6.25-rc3/include/asm-powerpc/sparsemem.h 2008-03-05 10:19:09.000000000 -0800
@@ -15,7 +15,7 @@
#ifdef CONFIG_MEMORY_HOTPLUG
extern void create_section_mapping(unsigned long start, unsigned long end);
-extern void remove_section_mapping(unsigned long start, unsigned long end);
+extern int remove_section_mapping(unsigned long start, unsigned long end);
#ifdef CONFIG_NUMA
extern int hot_add_scn_to_nid(unsigned long scn_addr);
#else
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 3/5] [PPC] hotplug memory notifications for ppc
2008-03-28 0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
2008-03-28 0:35 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
2008-03-28 0:37 ` [PATCH 2/5] [PPC] htab_remove_mapping() error handling Badari Pulavarty
@ 2008-03-28 0:38 ` Badari Pulavarty
2008-03-28 0:39 ` [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove Badari Pulavarty
2008-03-28 0:39 ` [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc Badari Pulavarty
4 siblings, 0 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-28 0:38 UTC (permalink / raw)
To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
Hotplug memory notifier for ppc64. This gets invoked by writing
the device-node that needs to be removed to /proc/ppc64/ofdt.
We need to adjust the sections and remove sysfs entries by
calling __remove_pages(). Then call arch specific code to
get rid of htab mappings for the section of memory.
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Reviewed-by: Michael Ellerman <michael@ellerman.id.au>
---
arch/powerpc/platforms/pseries/Makefile | 1
arch/powerpc/platforms/pseries/hotplug-memory.c | 98 ++++++++++++++++++++++++
2 files changed, 99 insertions(+)
Index: linux-2.6.25-rc2/arch/powerpc/platforms/pseries/hotplug-memory.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.25-rc2/arch/powerpc/platforms/pseries/hotplug-memory.c 2008-02-29 09:25:14.000000000 -0800
@@ -0,0 +1,98 @@
+/*
+ * pseries Memory Hotplug infrastructure.
+ *
+ * Copyright (C) 2008 Badari Pulavarty, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/of.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/pSeries_reconfig.h>
+
+static int pseries_remove_memory(struct device_node *np)
+{
+ const char *type;
+ const unsigned int *my_index;
+ const unsigned int *regs;
+ u64 start_pfn, start;
+ struct zone *zone;
+ int ret = -EINVAL;
+
+ /*
+ * Check to see if we are actually removing memory
+ */
+ type = of_get_property(np, "device_type", NULL);
+ if (type == NULL || strcmp(type, "memory") != 0)
+ return 0;
+
+ /*
+ * Find the memory index and size of the removing section
+ */
+ my_index = of_get_property(np, "ibm,my-drc-index", NULL);
+ if (!my_index)
+ return ret;
+
+ regs = of_get_property(np, "reg", NULL);
+ if (!regs)
+ return ret;
+
+ start_pfn = section_nr_to_pfn(*my_index & 0xffff);
+ zone = page_zone(pfn_to_page(start_pfn));
+
+ /*
+ * Remove section mappings and sysfs entries for the
+ * section of the memory we are removing.
+ *
+ * NOTE: Ideally, this should be done in generic code like
+ * remove_memory(). But remove_memory() gets called by writing
+ * to sysfs "state" file and we can't remove sysfs entries
+ * while writing to it. So we have to defer it to here.
+ */
+ ret = __remove_pages(zone, start_pfn, regs[3] >> PAGE_SHIFT);
+ if (ret)
+ return ret;
+
+ /*
+ * Remove htab bolted mappings for this section of memory
+ */
+ start = (unsigned long)__va(start_pfn << PAGE_SHIFT);
+ ret = remove_section_mapping(start, start + regs[3]);
+ return ret;
+}
+
+static int pseries_memory_notifier(struct notifier_block *nb,
+ unsigned long action, void *node)
+{
+ int err = NOTIFY_OK;
+
+ switch (action) {
+ case PSERIES_RECONFIG_ADD:
+ break;
+ case PSERIES_RECONFIG_REMOVE:
+ if (pseries_remove_memory(node))
+ err = NOTIFY_BAD;
+ break;
+ default:
+ err = NOTIFY_DONE;
+ break;
+ }
+ return err;
+}
+
+static struct notifier_block pseries_mem_nb = {
+ .notifier_call = pseries_memory_notifier,
+};
+
+static int __init pseries_memory_hotplug_init(void)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ pSeries_reconfig_notifier_register(&pseries_mem_nb);
+
+ return 0;
+}
+machine_device_initcall(pseries, pseries_memory_hotplug_init);
Index: linux-2.6.25-rc2/arch/powerpc/platforms/pseries/Makefile
===================================================================
--- linux-2.6.25-rc2.orig/arch/powerpc/platforms/pseries/Makefile 2008-02-28 08:15:53.000000000 -0800
+++ linux-2.6.25-rc2/arch/powerpc/platforms/pseries/Makefile 2008-02-28 08:17:57.000000000 -0800
@@ -14,6 +14,7 @@ obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
obj-$(CONFIG_PCI_MSI) += msi.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
+obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o
obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
obj-$(CONFIG_HVCS) += hvcserver.o
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove
2008-03-28 0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
` (2 preceding siblings ...)
2008-03-28 0:38 ` [PATCH 3/5] [PPC] hotplug memory notifications for ppc Badari Pulavarty
@ 2008-03-28 0:39 ` Badari Pulavarty
2008-03-28 2:40 ` Kumar Gala
2008-05-15 6:49 ` Benjamin Herrenschmidt
2008-03-28 0:39 ` [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc Badari Pulavarty
4 siblings, 2 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-28 0:39 UTC (permalink / raw)
To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
ppc kernel maintains information about logical memory blocks in
lmb.memory structure at the boot time. Its not updated for
hotplug memory add/remove. hotplug memory notifier for memory
add/remove now updates lmb.memory.
This information is useful for eHEA driver to find out the memory
layout and holes.
NOTE: No special locking is needed for lmb_add() and lmb_remove().
Calls to these are serialized by caller. (pSeries_reconfig_chain).
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
---
arch/powerpc/platforms/pseries/hotplug-memory.c | 43 +++++++++++++++
include/linux/lmb.h | 3 -
lib/lmb.c | 66 ++++++++++++++++++++----
3 files changed, 102 insertions(+), 10 deletions(-)
Index: linux-2.6.25-rc3/lib/lmb.c
===================================================================
--- linux-2.6.25-rc3.orig/lib/lmb.c 2008-03-05 10:44:29.000000000 -0800
+++ linux-2.6.25-rc3/lib/lmb.c 2008-03-05 10:44:56.000000000 -0800
@@ -54,14 +54,13 @@ void lmb_dump_all(void)
#endif /* DEBUG */
}
-static unsigned long __init lmb_addrs_overlap(u64 base1,
- u64 size1, u64 base2, u64 size2)
+static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
+ u64 size2)
{
return ((base1 < (base2+size2)) && (base2 < (base1+size1)));
}
-static long __init lmb_addrs_adjacent(u64 base1, u64 size1,
- u64 base2, u64 size2)
+static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
{
if (base2 == base1 + size1)
return 1;
@@ -71,7 +70,7 @@ static long __init lmb_addrs_adjacent(u6
return 0;
}
-static long __init lmb_regions_adjacent(struct lmb_region *rgn,
+static long lmb_regions_adjacent(struct lmb_region *rgn,
unsigned long r1, unsigned long r2)
{
u64 base1 = rgn->region[r1].base;
@@ -82,7 +81,7 @@ static long __init lmb_regions_adjacent(
return lmb_addrs_adjacent(base1, size1, base2, size2);
}
-static void __init lmb_remove_region(struct lmb_region *rgn, unsigned long r)
+static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
{
unsigned long i;
@@ -94,7 +93,7 @@ static void __init lmb_remove_region(str
}
/* Assumption: base addr of region 1 < base addr of region 2 */
-static void __init lmb_coalesce_regions(struct lmb_region *rgn,
+static void lmb_coalesce_regions(struct lmb_region *rgn,
unsigned long r1, unsigned long r2)
{
rgn->region[r1].size += rgn->region[r2].size;
@@ -129,7 +128,7 @@ void __init lmb_analyze(void)
}
/* This routine called with relocation disabled. */
-static long __init lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
+static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
{
unsigned long coalesced = 0;
long adjacent, i;
@@ -195,7 +194,7 @@ static long __init lmb_add_region(struct
}
/* This routine may be called with relocation disabled. */
-long __init lmb_add(u64 base, u64 size)
+long lmb_add(u64 base, u64 size)
{
struct lmb_region *_rgn = &(lmb.memory);
@@ -207,6 +206,55 @@ long __init lmb_add(u64 base, u64 size)
}
+long lmb_remove(u64 base, u64 size)
+{
+ struct lmb_region *rgn = &(lmb.memory);
+ u64 rgnbegin, rgnend;
+ u64 end = base + size;
+ int i;
+
+ rgnbegin = rgnend = 0; /* supress gcc warnings */
+
+ /* Find the region where (base, size) belongs to */
+ for (i=0; i < rgn->cnt; i++) {
+ rgnbegin = rgn->region[i].base;
+ rgnend = rgnbegin + rgn->region[i].size;
+
+ if ((rgnbegin <= base) && (end <= rgnend))
+ break;
+ }
+
+ /* Didn't find the region */
+ if (i == rgn->cnt)
+ return -1;
+
+ /* Check to see if we are removing entire region */
+ if ((rgnbegin == base) && (rgnend == end)) {
+ lmb_remove_region(rgn, i);
+ return 0;
+ }
+
+ /* Check to see if region is matching at the front */
+ if (rgnbegin == base) {
+ rgn->region[i].base = end;
+ rgn->region[i].size -= size;
+ return 0;
+ }
+
+ /* Check to see if the region is matching at the end */
+ if (rgnend == end) {
+ rgn->region[i].size -= size;
+ return 0;
+ }
+
+ /*
+ * We need to split the entry - adjust the current one to the
+ * beginging of the hole and add the region after hole.
+ */
+ rgn->region[i].size = base - rgn->region[i].base;
+ return lmb_add_region(rgn, end, rgnend - end);
+}
+
long __init lmb_reserve(u64 base, u64 size)
{
struct lmb_region *_rgn = &(lmb.reserved);
Index: linux-2.6.25-rc3/arch/powerpc/platforms/pseries/hotplug-memory.c
===================================================================
--- linux-2.6.25-rc3.orig/arch/powerpc/platforms/pseries/hotplug-memory.c 2008-03-05 10:44:51.000000000 -0800
+++ linux-2.6.25-rc3/arch/powerpc/platforms/pseries/hotplug-memory.c 2008-03-05 10:45:06.000000000 -0800
@@ -10,6 +10,7 @@
*/
#include <linux/of.h>
+#include <linux/lmb.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include <asm/pSeries_reconfig.h>
@@ -58,6 +59,11 @@ static int pseries_remove_memory(struct
return ret;
/*
+ * Update memory regions for memory remove
+ */
+ lmb_remove(start_pfn << PAGE_SHIFT, regs[3]);
+
+ /*
* Remove htab bolted mappings for this section of memory
*/
start = (unsigned long)__va(start_pfn << PAGE_SHIFT);
@@ -65,6 +71,41 @@ static int pseries_remove_memory(struct
return ret;
}
+static int pseries_add_memory(struct device_node *np)
+{
+ const char *type;
+ const unsigned int *my_index;
+ const unsigned int *regs;
+ u64 start_pfn;
+ int ret = -EINVAL;
+
+ /*
+ * Check to see if we are actually adding memory
+ */
+ type = of_get_property(np, "device_type", NULL);
+ if (type == NULL || strcmp(type, "memory") != 0)
+ return 0;
+
+ /*
+ * Find the memory index and size of the removing section
+ */
+ my_index = of_get_property(np, "ibm,my-drc-index", NULL);
+ if (!my_index)
+ return ret;
+
+ regs = of_get_property(np, "reg", NULL);
+ if (!regs)
+ return ret;
+
+ start_pfn = section_nr_to_pfn(*my_index & 0xffff);
+
+ /*
+ * Update memory region to represent the memory add
+ */
+ lmb_add(start_pfn << PAGE_SHIFT, regs[3]);
+ return 0;
+}
+
static int pseries_memory_notifier(struct notifier_block *nb,
unsigned long action, void *node)
{
@@ -72,6 +113,8 @@ static int pseries_memory_notifier(struc
switch (action) {
case PSERIES_RECONFIG_ADD:
+ if (pseries_add_memory(node))
+ err = NOTIFY_BAD;
break;
case PSERIES_RECONFIG_REMOVE:
if (pseries_remove_memory(node))
Index: linux-2.6.25-rc3/include/linux/lmb.h
===================================================================
--- linux-2.6.25-rc3.orig/include/linux/lmb.h 2008-03-05 10:44:29.000000000 -0800
+++ linux-2.6.25-rc3/include/linux/lmb.h 2008-03-05 10:44:56.000000000 -0800
@@ -40,7 +40,8 @@ extern struct lmb lmb;
extern void __init lmb_init(void);
extern void __init lmb_analyze(void);
-extern long __init lmb_add(u64 base, u64 size);
+extern long lmb_add(u64 base, u64 size);
+extern long lmb_remove(u64 base, u64 size);
extern long __init lmb_reserve(u64 base, u64 size);
extern u64 __init lmb_alloc(u64 size, u64 align);
extern u64 __init lmb_alloc_base(u64 size,
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove
2008-03-28 0:39 ` [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove Badari Pulavarty
@ 2008-03-28 2:40 ` Kumar Gala
2008-03-28 16:52 ` Badari Pulavarty
2008-05-15 6:49 ` Benjamin Herrenschmidt
1 sibling, 1 reply; 23+ messages in thread
From: Kumar Gala @ 2008-03-28 2:40 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, Andrew Morton, paulus, Yasunori Goto
On Mar 27, 2008, at 7:39 PM, Badari Pulavarty wrote:
> ppc kernel maintains information about logical memory blocks in
> lmb.memory structure at the boot time. Its not updated for
> hotplug memory add/remove. hotplug memory notifier for memory
> add/remove now updates lmb.memory.
>
> This information is useful for eHEA driver to find out the memory
> layout and holes.
>
> NOTE: No special locking is needed for lmb_add() and lmb_remove().
> Calls to these are serialized by caller. (pSeries_reconfig_chain).
>
> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
> ---
> arch/powerpc/platforms/pseries/hotplug-memory.c | 43 +++++++++++++++
> include/linux/lmb.h | 3 -
> lib/lmb.c | 66 ++++++++++++++
> ++++++----
> 3 files changed, 102 insertions(+), 10 deletions(-)
How is lmb_remove different than lmb_alloc?
- k
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove
2008-03-28 2:40 ` Kumar Gala
@ 2008-03-28 16:52 ` Badari Pulavarty
0 siblings, 0 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-28 16:52 UTC (permalink / raw)
To: Kumar Gala; +Cc: lkml, linuxppc-dev, Andrew Morton, paulus, Yasunori Goto
Kumar Gala wrote:
>
> On Mar 27, 2008, at 7:39 PM, Badari Pulavarty wrote:
>> ppc kernel maintains information about logical memory blocks in
>> lmb.memory structure at the boot time. Its not updated for
>> hotplug memory add/remove. hotplug memory notifier for memory
>> add/remove now updates lmb.memory.
>>
>> This information is useful for eHEA driver to find out the memory
>> layout and holes.
>>
>> NOTE: No special locking is needed for lmb_add() and lmb_remove().
>> Calls to these are serialized by caller. (pSeries_reconfig_chain).
>>
>> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
>> ---
>> arch/powerpc/platforms/pseries/hotplug-memory.c | 43 +++++++++++++++
>> include/linux/lmb.h | 3 -
>> lib/lmb.c | 66
>> ++++++++++++++++++++----
>> 3 files changed, 102 insertions(+), 10 deletions(-)
>
> How is lmb_remove different than lmb_alloc?
>
> - k
lmb_remove() can be used to punch a hole in to the existing memory block.
lmb_alloc() tries to allocate for a given alignment, I don't think it can
adjust the current entries. Isn't it ?
Thanks,
Badari
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove
2008-03-28 0:39 ` [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove Badari Pulavarty
2008-03-28 2:40 ` Kumar Gala
@ 2008-05-15 6:49 ` Benjamin Herrenschmidt
1 sibling, 0 replies; 23+ messages in thread
From: Benjamin Herrenschmidt @ 2008-05-15 6:49 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, Andrew Morton, paulus, Yasunori Goto
On Thu, 2008-03-27 at 16:39 -0800, Badari Pulavarty wrote:
> ppc kernel maintains information about logical memory blocks in
> lmb.memory structure at the boot time. Its not updated for
> hotplug memory add/remove. hotplug memory notifier for memory
> add/remove now updates lmb.memory.
>
> This information is useful for eHEA driver to find out the memory
> layout and holes.
>
> NOTE: No special locking is needed for lmb_add() and lmb_remove().
> Calls to these are serialized by caller. (pSeries_reconfig_chain).
My worry here is the lack of locking...
> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
> ---
> arch/powerpc/platforms/pseries/hotplug-memory.c | 43 +++++++++++++++
> include/linux/lmb.h | 3 -
> lib/lmb.c | 66 ++++++++++++++++++++----
> 3 files changed, 102 insertions(+), 10 deletions(-)
>
> Index: linux-2.6.25-rc3/lib/lmb.c
> ===================================================================
> --- linux-2.6.25-rc3.orig/lib/lmb.c 2008-03-05 10:44:29.000000000 -0800
> +++ linux-2.6.25-rc3/lib/lmb.c 2008-03-05 10:44:56.000000000 -0800
> @@ -54,14 +54,13 @@ void lmb_dump_all(void)
> #endif /* DEBUG */
> }
>
> -static unsigned long __init lmb_addrs_overlap(u64 base1,
> - u64 size1, u64 base2, u64 size2)
> +static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
> + u64 size2)
> {
> return ((base1 < (base2+size2)) && (base2 < (base1+size1)));
> }
>
> -static long __init lmb_addrs_adjacent(u64 base1, u64 size1,
> - u64 base2, u64 size2)
> +static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
> {
> if (base2 == base1 + size1)
> return 1;
> @@ -71,7 +70,7 @@ static long __init lmb_addrs_adjacent(u6
> return 0;
> }
>
> -static long __init lmb_regions_adjacent(struct lmb_region *rgn,
> +static long lmb_regions_adjacent(struct lmb_region *rgn,
> unsigned long r1, unsigned long r2)
> {
> u64 base1 = rgn->region[r1].base;
> @@ -82,7 +81,7 @@ static long __init lmb_regions_adjacent(
> return lmb_addrs_adjacent(base1, size1, base2, size2);
> }
>
> -static void __init lmb_remove_region(struct lmb_region *rgn, unsigned long r)
> +static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
> {
> unsigned long i;
>
> @@ -94,7 +93,7 @@ static void __init lmb_remove_region(str
> }
>
> /* Assumption: base addr of region 1 < base addr of region 2 */
> -static void __init lmb_coalesce_regions(struct lmb_region *rgn,
> +static void lmb_coalesce_regions(struct lmb_region *rgn,
> unsigned long r1, unsigned long r2)
> {
> rgn->region[r1].size += rgn->region[r2].size;
> @@ -129,7 +128,7 @@ void __init lmb_analyze(void)
> }
>
> /* This routine called with relocation disabled. */
> -static long __init lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
> +static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
> {
> unsigned long coalesced = 0;
> long adjacent, i;
> @@ -195,7 +194,7 @@ static long __init lmb_add_region(struct
> }
>
> /* This routine may be called with relocation disabled. */
> -long __init lmb_add(u64 base, u64 size)
> +long lmb_add(u64 base, u64 size)
> {
> struct lmb_region *_rgn = &(lmb.memory);
>
> @@ -207,6 +206,55 @@ long __init lmb_add(u64 base, u64 size)
>
> }
>
> +long lmb_remove(u64 base, u64 size)
> +{
> + struct lmb_region *rgn = &(lmb.memory);
> + u64 rgnbegin, rgnend;
> + u64 end = base + size;
> + int i;
> +
> + rgnbegin = rgnend = 0; /* supress gcc warnings */
> +
> + /* Find the region where (base, size) belongs to */
> + for (i=0; i < rgn->cnt; i++) {
> + rgnbegin = rgn->region[i].base;
> + rgnend = rgnbegin + rgn->region[i].size;
> +
> + if ((rgnbegin <= base) && (end <= rgnend))
> + break;
> + }
> +
> + /* Didn't find the region */
> + if (i == rgn->cnt)
> + return -1;
> +
> + /* Check to see if we are removing entire region */
> + if ((rgnbegin == base) && (rgnend == end)) {
> + lmb_remove_region(rgn, i);
> + return 0;
> + }
> +
> + /* Check to see if region is matching at the front */
> + if (rgnbegin == base) {
> + rgn->region[i].base = end;
> + rgn->region[i].size -= size;
> + return 0;
> + }
> +
> + /* Check to see if the region is matching at the end */
> + if (rgnend == end) {
> + rgn->region[i].size -= size;
> + return 0;
> + }
> +
> + /*
> + * We need to split the entry - adjust the current one to the
> + * beginging of the hole and add the region after hole.
> + */
> + rgn->region[i].size = base - rgn->region[i].base;
> + return lmb_add_region(rgn, end, rgnend - end);
> +}
> +
> long __init lmb_reserve(u64 base, u64 size)
> {
> struct lmb_region *_rgn = &(lmb.reserved);
> Index: linux-2.6.25-rc3/arch/powerpc/platforms/pseries/hotplug-memory.c
> ===================================================================
> --- linux-2.6.25-rc3.orig/arch/powerpc/platforms/pseries/hotplug-memory.c 2008-03-05 10:44:51.000000000 -0800
> +++ linux-2.6.25-rc3/arch/powerpc/platforms/pseries/hotplug-memory.c 2008-03-05 10:45:06.000000000 -0800
> @@ -10,6 +10,7 @@
> */
>
> #include <linux/of.h>
> +#include <linux/lmb.h>
> #include <asm/firmware.h>
> #include <asm/machdep.h>
> #include <asm/pSeries_reconfig.h>
> @@ -58,6 +59,11 @@ static int pseries_remove_memory(struct
> return ret;
>
> /*
> + * Update memory regions for memory remove
> + */
> + lmb_remove(start_pfn << PAGE_SHIFT, regs[3]);
> +
> + /*
> * Remove htab bolted mappings for this section of memory
> */
> start = (unsigned long)__va(start_pfn << PAGE_SHIFT);
> @@ -65,6 +71,41 @@ static int pseries_remove_memory(struct
> return ret;
> }
>
> +static int pseries_add_memory(struct device_node *np)
> +{
> + const char *type;
> + const unsigned int *my_index;
> + const unsigned int *regs;
> + u64 start_pfn;
> + int ret = -EINVAL;
> +
> + /*
> + * Check to see if we are actually adding memory
> + */
> + type = of_get_property(np, "device_type", NULL);
> + if (type == NULL || strcmp(type, "memory") != 0)
> + return 0;
> +
> + /*
> + * Find the memory index and size of the removing section
> + */
> + my_index = of_get_property(np, "ibm,my-drc-index", NULL);
> + if (!my_index)
> + return ret;
> +
> + regs = of_get_property(np, "reg", NULL);
> + if (!regs)
> + return ret;
> +
> + start_pfn = section_nr_to_pfn(*my_index & 0xffff);
> +
> + /*
> + * Update memory region to represent the memory add
> + */
> + lmb_add(start_pfn << PAGE_SHIFT, regs[3]);
> + return 0;
> +}
> +
> static int pseries_memory_notifier(struct notifier_block *nb,
> unsigned long action, void *node)
> {
> @@ -72,6 +113,8 @@ static int pseries_memory_notifier(struc
>
> switch (action) {
> case PSERIES_RECONFIG_ADD:
> + if (pseries_add_memory(node))
> + err = NOTIFY_BAD;
> break;
> case PSERIES_RECONFIG_REMOVE:
> if (pseries_remove_memory(node))
> Index: linux-2.6.25-rc3/include/linux/lmb.h
> ===================================================================
> --- linux-2.6.25-rc3.orig/include/linux/lmb.h 2008-03-05 10:44:29.000000000 -0800
> +++ linux-2.6.25-rc3/include/linux/lmb.h 2008-03-05 10:44:56.000000000 -0800
> @@ -40,7 +40,8 @@ extern struct lmb lmb;
>
> extern void __init lmb_init(void);
> extern void __init lmb_analyze(void);
> -extern long __init lmb_add(u64 base, u64 size);
> +extern long lmb_add(u64 base, u64 size);
> +extern long lmb_remove(u64 base, u64 size);
> extern long __init lmb_reserve(u64 base, u64 size);
> extern u64 __init lmb_alloc(u64 size, u64 align);
> extern u64 __init lmb_alloc_base(u64 size,
>
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc
2008-03-28 0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
` (3 preceding siblings ...)
2008-03-28 0:39 ` [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove Badari Pulavarty
@ 2008-03-28 0:39 ` Badari Pulavarty
2008-05-13 0:17 ` Geoff Levand
4 siblings, 1 reply; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-28 0:39 UTC (permalink / raw)
To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
Provide walk_memory_resource() for ppc64. PPC maintains
logic memory region mapping in lmb.memory structures. Walk
through these structures and do the callbacks for the
contiguous chunks.
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
---
arch/powerpc/mm/mem.c | 30 +++++++++++++++++++++++-------
include/linux/lmb.h | 1 +
lib/lmb.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 57 insertions(+), 7 deletions(-)
Index: linux-2.6.25-rc3/lib/lmb.c
===================================================================
--- linux-2.6.25-rc3.orig/lib/lmb.c 2008-03-05 10:28:55.000000000 -0800
+++ linux-2.6.25-rc3/lib/lmb.c 2008-03-05 10:34:01.000000000 -0800
@@ -416,3 +416,36 @@ int __init lmb_is_reserved(u64 addr)
}
return 0;
}
+
+/*
+ * Given a <base, len>, find which memory regions belong to this range.
+ * Adjust the request and return a contiguous chunk.
+ */
+int lmb_find(struct lmb_property *res)
+{
+ int i;
+ u64 rstart, rend;
+
+ rstart = res->base;
+ rend = rstart + res->size - 1;
+
+ for (i = 0; i < lmb.memory.cnt; i++) {
+ u64 start = lmb.memory.region[i].base;
+ u64 end = start + lmb.memory.region[i].size - 1;
+
+ if (start > rend)
+ return -1;
+
+ if ((end >= rstart) && (start < rend)) {
+ /* adjust the request */
+ if (rstart < start)
+ rstart = start;
+ if (rend > end)
+ rend = end;
+ res->base = rstart;
+ res->size = rend - rstart + 1;
+ return 0;
+ }
+ }
+ return -1;
+}
Index: linux-2.6.25-rc3/arch/powerpc/mm/mem.c
===================================================================
--- linux-2.6.25-rc3.orig/arch/powerpc/mm/mem.c 2008-03-05 10:14:28.000000000 -0800
+++ linux-2.6.25-rc3/arch/powerpc/mm/mem.c 2008-03-05 10:32:16.000000000 -0800
@@ -148,19 +148,35 @@ out:
/*
* walk_memory_resource() needs to make sure there is no holes in a given
- * memory range. On PPC64, since this range comes from /sysfs, the range
- * is guaranteed to be valid, non-overlapping and can not contain any
- * holes. By the time we get here (memory add or remove), /proc/device-tree
- * is updated and correct. Only reason we need to check against device-tree
- * would be if we allow user-land to specify a memory range through a
- * system call/ioctl etc. instead of doing offline/online through /sysfs.
+ * memory range. PPC64 does not maintain the memory layout in /proc/iomem.
+ * Instead it maintains it in lmb.memory structures. Walk through the
+ * memory regions, find holes and callback for contiguous regions.
*/
int
walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
int (*func)(unsigned long, unsigned long, void *))
{
- return (*func)(start_pfn, nr_pages, arg);
+ struct lmb_property res;
+ unsigned long pfn, len;
+ u64 end;
+ int ret = -1;
+
+ res.base = (u64) start_pfn << PAGE_SHIFT;
+ res.size = (u64) nr_pages << PAGE_SHIFT;
+
+ end = res.base + res.size - 1;
+ while ((res.base < end) && (lmb_find(&res) >= 0)) {
+ pfn = (unsigned long)(res.base >> PAGE_SHIFT);
+ len = (unsigned long)(res.size >> PAGE_SHIFT);
+ ret = (*func)(pfn, len, arg);
+ if (ret)
+ break;
+ res.base += (res.size + 1);
+ res.size = (end - res.base + 1);
+ }
+ return ret;
}
+EXPORT_SYMBOL_GPL(walk_memory_resource);
#endif /* CONFIG_MEMORY_HOTPLUG */
Index: linux-2.6.25-rc3/include/linux/lmb.h
===================================================================
--- linux-2.6.25-rc3.orig/include/linux/lmb.h 2008-03-05 10:30:06.000000000 -0800
+++ linux-2.6.25-rc3/include/linux/lmb.h 2008-03-05 10:33:12.000000000 -0800
@@ -52,6 +52,7 @@ extern u64 __init lmb_phys_mem_size(void
extern u64 __init lmb_end_of_DRAM(void);
extern void __init lmb_enforce_memory_limit(u64 memory_limit);
extern int __init lmb_is_reserved(u64 addr);
+extern int lmb_find(struct lmb_property *res);
extern void lmb_dump_all(void);
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc
2008-03-28 0:39 ` [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc Badari Pulavarty
@ 2008-05-13 0:17 ` Geoff Levand
2008-05-13 15:09 ` Badari Pulavarty
0 siblings, 1 reply; 23+ messages in thread
From: Geoff Levand @ 2008-05-13 0:17 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, paulus, Yasunori Goto
Hi,
I've had some trouble with this change.
Badari Pulavarty wrote:
> Provide walk_memory_resource() for ppc64. PPC maintains
> logic memory region mapping in lmb.memory structures. Walk
> through these structures and do the callbacks for the
> contiguous chunks.
...
> --- linux-2.6.25-rc3.orig/arch/powerpc/mm/mem.c 2008-03-05 10:14:28.000000000 -0800
> +++ linux-2.6.25-rc3/arch/powerpc/mm/mem.c 2008-03-05 10:32:16.000000000 -0800
> @@ -148,19 +148,35 @@ out:
>
> /*
> * walk_memory_resource() needs to make sure there is no holes in a given
> - * memory range. On PPC64, since this range comes from /sysfs, the range
> - * is guaranteed to be valid, non-overlapping and can not contain any
> - * holes. By the time we get here (memory add or remove), /proc/device-tree
> - * is updated and correct. Only reason we need to check against device-tree
> - * would be if we allow user-land to specify a memory range through a
> - * system call/ioctl etc. instead of doing offline/online through /sysfs.
> + * memory range. PPC64 does not maintain the memory layout in /proc/iomem.
> + * Instead it maintains it in lmb.memory structures. Walk through the
> + * memory regions, find holes and callback for contiguous regions.
> */
> int
> walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
> int (*func)(unsigned long, unsigned long, void *))
> {
> - return (*func)(start_pfn, nr_pages, arg);
> + struct lmb_property res;
> + unsigned long pfn, len;
> + u64 end;
> + int ret = -1;
> +
> + res.base = (u64) start_pfn << PAGE_SHIFT;
> + res.size = (u64) nr_pages << PAGE_SHIFT;
> +
> + end = res.base + res.size - 1;
> + while ((res.base < end) && (lmb_find(&res) >= 0)) {
^^^^^^^^^^^^^^
In the PS3 platform code (arch/pwerpc/platfroms/ps3/mm.c) the hotplug
memory is added like this:
...
result = add_memory(0, start_addr, map.r1.size);
...
result = online_pages(start_pfn, nr_pages);
...
In its work, online_pages() eventually calls walk_memory_resource(),
which has been changed as above to do a test on lmb_find(). I found
that this lmb_find() test always fails for PS3 since add_memory()
does not call lmb_add().
Is it the responsibility of the platform code to call lmb_add(), or
should that be done by add_memory()?
-Geoff
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc
2008-05-13 0:17 ` Geoff Levand
@ 2008-05-13 15:09 ` Badari Pulavarty
0 siblings, 0 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-05-13 15:09 UTC (permalink / raw)
To: Geoff Levand; +Cc: lkml, linuxppc-dev, paulus, Yasunori Goto
On Mon, 2008-05-12 at 17:17 -0700, Geoff Levand wrote:
> Hi,
>
> I've had some trouble with this change.
>
> Badari Pulavarty wrote:
> > Provide walk_memory_resource() for ppc64. PPC maintains
> > logic memory region mapping in lmb.memory structures. Walk
> > through these structures and do the callbacks for the
> > contiguous chunks.
>
> ...
>
> > --- linux-2.6.25-rc3.orig/arch/powerpc/mm/mem.c 2008-03-05 10:14:28.000000000 -0800
> > +++ linux-2.6.25-rc3/arch/powerpc/mm/mem.c 2008-03-05 10:32:16.000000000 -0800
> > @@ -148,19 +148,35 @@ out:
> >
> > /*
> > * walk_memory_resource() needs to make sure there is no holes in a given
> > - * memory range. On PPC64, since this range comes from /sysfs, the range
> > - * is guaranteed to be valid, non-overlapping and can not contain any
> > - * holes. By the time we get here (memory add or remove), /proc/device-tree
> > - * is updated and correct. Only reason we need to check against device-tree
> > - * would be if we allow user-land to specify a memory range through a
> > - * system call/ioctl etc. instead of doing offline/online through /sysfs.
> > + * memory range. PPC64 does not maintain the memory layout in /proc/iomem.
> > + * Instead it maintains it in lmb.memory structures. Walk through the
> > + * memory regions, find holes and callback for contiguous regions.
> > */
> > int
> > walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
> > int (*func)(unsigned long, unsigned long, void *))
> > {
> > - return (*func)(start_pfn, nr_pages, arg);
> > + struct lmb_property res;
> > + unsigned long pfn, len;
> > + u64 end;
> > + int ret = -1;
> > +
> > + res.base = (u64) start_pfn << PAGE_SHIFT;
> > + res.size = (u64) nr_pages << PAGE_SHIFT;
> > +
> > + end = res.base + res.size - 1;
> > + while ((res.base < end) && (lmb_find(&res) >= 0)) {
> ^^^^^^^^^^^^^^
>
> In the PS3 platform code (arch/pwerpc/platfroms/ps3/mm.c) the hotplug
> memory is added like this:
>
> ...
> result = add_memory(0, start_addr, map.r1.size);
> ...
> result = online_pages(start_pfn, nr_pages);
> ...
>
> In its work, online_pages() eventually calls walk_memory_resource(),
> which has been changed as above to do a test on lmb_find(). I found
> that this lmb_find() test always fails for PS3 since add_memory()
> does not call lmb_add().
>
> Is it the responsibility of the platform code to call lmb_add(), or
> should that be done by add_memory()?
Since "lmb" code is specific to architecture, I would prefer that
arch specific code is responsible for manipulating "lmb"s instead of
generic code.
In case of ppc64, I added lmb_add() call in /proc/device-tree
manipulation code. Are there any arch specific calls for PS3
when add/remove memory happens ? If there are no other calls,
you can do lmb_add() in ps3_mm_add_memory(). For remove, we
need to find a better place.
Thanks,
Badari
^ permalink raw reply [flat|nested] 23+ messages in thread
* [PATCH 1/5] generic __remove_pages() support
2008-03-06 18:54 [PATCH 0/5] 2.6.25-rc3-mm1 hotplug memory remove updates Badari Pulavarty
@ 2008-03-06 18:55 ` Badari Pulavarty
2008-03-06 19:08 ` Randy Dunlap
` (2 more replies)
0 siblings, 3 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-06 18:55 UTC (permalink / raw)
To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
Generic helper function to remove section mappings and sysfs entries
for the section of the memory we are removing. offline_pages() correctly
adjusted zone and marked the pages reserved.
Issue: If mem_map, usemap allocation could come from different places -
kmalloc, vmalloc, alloc_pages or bootmem. There is no easy way
to find and free up bootmem allocations.
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
---
include/linux/memory_hotplug.h | 4 +++
mm/memory_hotplug.c | 44 +++++++++++++++++++++++++++++++++++++++++
mm/sparse.c | 43 +++++++++++++++++++++++++++++++++++++---
3 files changed, 88 insertions(+), 3 deletions(-)
Index: linux-2.6.25-rc2/mm/memory_hotplug.c
===================================================================
--- linux-2.6.25-rc2.orig/mm/memory_hotplug.c 2008-02-27 12:58:17.000000000 -0800
+++ linux-2.6.25-rc2/mm/memory_hotplug.c 2008-02-27 16:06:50.000000000 -0800
@@ -102,6 +102,21 @@ static int __add_section(struct zone *zo
return register_new_memory(__pfn_to_section(phys_start_pfn));
}
+static int __remove_section(struct zone *zone, struct mem_section *ms)
+{
+ int ret = -EINVAL;
+
+ if (!valid_section(ms))
+ return ret;
+
+ ret = unregister_memory_section(ms);
+ if (ret)
+ return ret;
+
+ sparse_remove_one_section(zone, ms);
+ return 0;
+}
+
/*
* Reasonably generic function for adding memory. It is
* expected that archs that support memory hotplug will
@@ -135,6 +150,35 @@ int __add_pages(struct zone *zone, unsig
}
EXPORT_SYMBOL_GPL(__add_pages);
+int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
+ unsigned long nr_pages)
+{
+ unsigned long i, ret = 0;
+ int sections_to_remove;
+ unsigned long flags;
+ struct pglist_data *pgdat = zone->zone_pgdat;
+
+ /*
+ * We can only remove entire sections
+ */
+ BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
+ BUG_ON(nr_pages % PAGES_PER_SECTION);
+
+ release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
+
+ sections_to_remove = nr_pages / PAGES_PER_SECTION;
+ for (i = 0; i < sections_to_remove; i++) {
+ unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+ pgdat_resize_lock(pgdat, &flags);
+ ret = __remove_section(zone, __pfn_to_section(pfn));
+ pgdat_resize_unlock(pgdat, &flags);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__remove_pages);
+
static void grow_zone_span(struct zone *zone,
unsigned long start_pfn, unsigned long end_pfn)
{
Index: linux-2.6.25-rc2/mm/sparse.c
===================================================================
--- linux-2.6.25-rc2.orig/mm/sparse.c 2008-02-15 12:57:20.000000000 -0800
+++ linux-2.6.25-rc2/mm/sparse.c 2008-02-27 13:02:51.000000000 -0800
@@ -198,12 +198,13 @@ static unsigned long sparse_encode_mem_m
}
/*
- * We need this if we ever free the mem_maps. While not implemented yet,
- * this function is included for parity with its sibling.
+ * Decode mem_map from the coded memmap
*/
-static __attribute((unused))
+static
struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
{
+ /* mask off the extra low bits of information */
+ coded_mem_map &= SECTION_MAP_MASK;
return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
}
@@ -363,6 +364,26 @@ static void __kfree_section_memmap(struc
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+static void free_section_usemap(struct page *memmap, unsigned long *usemap)
+{
+ if (!usemap)
+ return;
+
+ /*
+ * Check to see if allocation came from hot-plug-add
+ */
+ if (PageSlab(virt_to_page(usemap))) {
+ kfree(usemap);
+ if (memmap)
+ __kfree_section_memmap(memmap, PAGES_PER_SECTION);
+ return;
+ }
+
+ /*
+ * Allocations came from bootmem - how do I free up ?
+ */
+}
+
/*
* returns the number of sections whose mem_maps were properly
* set. If this is <=0, then that means that the passed-in
@@ -415,4 +436,20 @@ out:
}
return ret;
}
+
+void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
+{
+ struct page *memmap = NULL;
+ unsigned long *usemap = NULL;
+
+ if (ms->section_mem_map) {
+ usemap = ms->pageblock_flags;
+ memmap = sparse_decode_mem_map(ms->section_mem_map,
+ __section_nr(ms));
+ ms->section_mem_map = 0;
+ ms->pageblock_flags = NULL;
+ }
+
+ free_section_usemap(memmap, usemap);
+}
#endif
Index: linux-2.6.25-rc2/include/linux/memory_hotplug.h
===================================================================
--- linux-2.6.25-rc2.orig/include/linux/memory_hotplug.h 2008-02-27 12:58:17.000000000 -0800
+++ linux-2.6.25-rc2/include/linux/memory_hotplug.h 2008-02-27 13:00:04.000000000 -0800
@@ -8,6 +8,7 @@
struct page;
struct zone;
struct pglist_data;
+struct mem_section;
#ifdef CONFIG_MEMORY_HOTPLUG
/*
@@ -64,6 +65,8 @@ extern int offline_pages(unsigned long,
/* reasonably generic interface to expand the physical pages in a zone */
extern int __add_pages(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages);
+extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages);
/*
* Walk thorugh all memory which is registered as resource.
@@ -188,5 +191,6 @@ extern int arch_add_memory(int nid, u64
extern int remove_memory(u64 start, u64 size);
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
int nr_pages);
+extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
#endif /* __LINUX_MEMORY_HOTPLUG_H */
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-06 18:55 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
@ 2008-03-06 19:08 ` Randy Dunlap
2008-03-06 19:29 ` Badari Pulavarty
2008-03-06 23:37 ` Badari Pulavarty
2008-03-06 20:54 ` Dave Hansen
2008-03-21 15:25 ` Yasunori Goto
2 siblings, 2 replies; 23+ messages in thread
From: Randy Dunlap @ 2008-03-06 19:08 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
On Thu, 06 Mar 2008 10:55:34 -0800 Badari Pulavarty wrote:
> Generic helper function to remove section mappings and sysfs entries
> for the section of the memory we are removing. offline_pages() correctly
> adjusted zone and marked the pages reserved.
Such generic (exported, non-static) interfaces could use some
(kernel-)docs, please.
> Issue: If mem_map, usemap allocation could come from different places -
> kmalloc, vmalloc, alloc_pages or bootmem. There is no easy way
> to find and free up bootmem allocations.
>
> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
>
> ---
> include/linux/memory_hotplug.h | 4 +++
> mm/memory_hotplug.c | 44 +++++++++++++++++++++++++++++++++++++++++
> mm/sparse.c | 43 +++++++++++++++++++++++++++++++++++++---
> 3 files changed, 88 insertions(+), 3 deletions(-)
> Index: linux-2.6.25-rc2/include/linux/memory_hotplug.h
> ===================================================================
> --- linux-2.6.25-rc2.orig/include/linux/memory_hotplug.h 2008-02-27 12:58:17.000000000 -0800
> +++ linux-2.6.25-rc2/include/linux/memory_hotplug.h 2008-02-27 13:00:04.000000000 -0800
> @@ -64,6 +65,8 @@ extern int offline_pages(unsigned long,
> /* reasonably generic interface to expand the physical pages in a zone */
> extern int __add_pages(struct zone *zone, unsigned long start_pfn,
> unsigned long nr_pages);
> +extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
> + unsigned long nr_pages);
>
> /*
> * Walk thorugh all memory which is registered as resource.
through :)
---
~Randy
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-06 19:08 ` Randy Dunlap
@ 2008-03-06 19:29 ` Badari Pulavarty
2008-03-06 23:37 ` Badari Pulavarty
1 sibling, 0 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-06 19:29 UTC (permalink / raw)
To: Randy Dunlap; +Cc: lkml, linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
On Thu, 2008-03-06 at 11:08 -0800, Randy Dunlap wrote:
> On Thu, 06 Mar 2008 10:55:34 -0800 Badari Pulavarty wrote:
>
> > Generic helper function to remove section mappings and sysfs entries
> > for the section of the memory we are removing. offline_pages() correctly
> > adjusted zone and marked the pages reserved.
>
> Such generic (exported, non-static) interfaces could use some
> (kernel-)docs, please.
Sure. Will do.
> > Issue: If mem_map, usemap allocation could come from different places -
> > kmalloc, vmalloc, alloc_pages or bootmem. There is no easy way
> > to find and free up bootmem allocations.
> >
> > Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
> >
> > ---
> > include/linux/memory_hotplug.h | 4 +++
> > mm/memory_hotplug.c | 44 +++++++++++++++++++++++++++++++++++++++++
> > mm/sparse.c | 43 +++++++++++++++++++++++++++++++++++++---
> > 3 files changed, 88 insertions(+), 3 deletions(-)
>
> > Index: linux-2.6.25-rc2/include/linux/memory_hotplug.h
> > ===================================================================
> > --- linux-2.6.25-rc2.orig/include/linux/memory_hotplug.h 2008-02-27 12:58:17.000000000 -0800
> > +++ linux-2.6.25-rc2/include/linux/memory_hotplug.h 2008-02-27 13:00:04.000000000 -0800
> > @@ -64,6 +65,8 @@ extern int offline_pages(unsigned long,
> > /* reasonably generic interface to expand the physical pages in a zone */
> > extern int __add_pages(struct zone *zone, unsigned long start_pfn,
> > unsigned long nr_pages);
> > +extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
> > + unsigned long nr_pages);
> >
> > /*
> > * Walk thorugh all memory which is registered as resource.
>
> through :)
I will fix that.
Thanks,
Badari
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-06 19:08 ` Randy Dunlap
2008-03-06 19:29 ` Badari Pulavarty
@ 2008-03-06 23:37 ` Badari Pulavarty
1 sibling, 0 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-06 23:37 UTC (permalink / raw)
To: Randy Dunlap; +Cc: lkml, linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
Here is the latest version, addressing Randy and Dave's comments.
Thanks,
Badari
Generic helper function to remove section mappings and sysfs entries
for the section of the memory we are removing. offline_pages() correctly
adjusted zone and marked the pages reserved.
Issue: If mem_map, usemap allocation could come from different places -
kmalloc, vmalloc, alloc_pages or bootmem. There is no easy way
to find and free up bootmem allocations.
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
---
include/linux/memory_hotplug.h | 6 +++-
mm/memory_hotplug.c | 55 +++++++++++++++++++++++++++++++++++++++++
mm/sparse.c | 45 +++++++++++++++++++++++++++++++--
3 files changed, 102 insertions(+), 4 deletions(-)
Index: linux-2.6.25-rc3.save/mm/memory_hotplug.c
===================================================================
--- linux-2.6.25-rc3.save.orig/mm/memory_hotplug.c 2008-03-05 10:44:30.000000000 -0800
+++ linux-2.6.25-rc3.save/mm/memory_hotplug.c 2008-03-06 15:08:45.000000000 -0800
@@ -102,6 +102,21 @@ static int __add_section(struct zone *zo
return register_new_memory(__pfn_to_section(phys_start_pfn));
}
+static int __remove_section(struct zone *zone, struct mem_section *ms)
+{
+ int ret = -EINVAL;
+
+ if (!valid_section(ms))
+ return ret;
+
+ ret = unregister_memory_section(ms);
+ if (ret)
+ return ret;
+
+ sparse_remove_one_section(zone, ms);
+ return 0;
+}
+
/*
* Reasonably generic function for adding memory. It is
* expected that archs that support memory hotplug will
@@ -135,6 +150,46 @@ int __add_pages(struct zone *zone, unsig
}
EXPORT_SYMBOL_GPL(__add_pages);
+/**
+ * __remove_pages() - remove sections of pages from a zone
+ * @zone: zone from which pages need to be removed
+ * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
+ * @nr_pages: number of pages to remove (must be multiple of section size)
+ *
+ * Generic helper function to remove section mappings and sysfs entries
+ * for the section of the memory we are removing. Caller needs to make
+ * sure that pages are marked reserved and zones are adjust properly by
+ * calling offline_pages().
+ */
+int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
+ unsigned long nr_pages)
+{
+ unsigned long i, ret = 0;
+ int sections_to_remove;
+ unsigned long flags;
+ struct pglist_data *pgdat = zone->zone_pgdat;
+
+ /*
+ * We can only remove entire sections
+ */
+ BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
+ BUG_ON(nr_pages % PAGES_PER_SECTION);
+
+ release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
+
+ sections_to_remove = nr_pages / PAGES_PER_SECTION;
+ for (i = 0; i < sections_to_remove; i++) {
+ unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+ pgdat_resize_lock(pgdat, &flags);
+ ret = __remove_section(zone, __pfn_to_section(pfn));
+ pgdat_resize_unlock(pgdat, &flags);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__remove_pages);
+
static void grow_zone_span(struct zone *zone,
unsigned long start_pfn, unsigned long end_pfn)
{
Index: linux-2.6.25-rc3.save/mm/sparse.c
===================================================================
--- linux-2.6.25-rc3.save.orig/mm/sparse.c 2008-03-05 10:44:30.000000000 -0800
+++ linux-2.6.25-rc3.save/mm/sparse.c 2008-03-06 15:15:18.000000000 -0800
@@ -198,12 +198,13 @@ static unsigned long sparse_encode_mem_m
}
/*
- * We need this if we ever free the mem_maps. While not implemented yet,
- * this function is included for parity with its sibling.
+ * Decode mem_map from the coded memmap
*/
-static __attribute((unused))
+static
struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
{
+ /* mask off the extra low bits of information */
+ coded_mem_map &= SECTION_MAP_MASK;
return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
}
@@ -363,6 +364,28 @@ static void __kfree_section_memmap(struc
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+static void free_section_usemap(struct page *memmap, unsigned long *usemap)
+{
+ if (!usemap)
+ return;
+
+ /*
+ * Check to see if allocation came from hot-plug-add
+ */
+ if (PageSlab(virt_to_page(usemap))) {
+ kfree(usemap);
+ if (memmap)
+ __kfree_section_memmap(memmap, PAGES_PER_SECTION);
+ return;
+ }
+
+ /*
+ * TODO: Allocations came from bootmem - how do I free up ?
+ */
+ printk(KERN_WARNING "Not freeing up allocations from bootmem "
+ "- leaking memory\n");
+}
+
/*
* returns the number of sections whose mem_maps were properly
* set. If this is <=0, then that means that the passed-in
@@ -415,4 +438,20 @@ out:
}
return ret;
}
+
+void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
+{
+ struct page *memmap = NULL;
+ unsigned long *usemap = NULL;
+
+ if (ms->section_mem_map) {
+ usemap = ms->pageblock_flags;
+ memmap = sparse_decode_mem_map(ms->section_mem_map,
+ __section_nr(ms));
+ ms->section_mem_map = 0;
+ ms->pageblock_flags = NULL;
+ }
+
+ free_section_usemap(memmap, usemap);
+}
#endif
Index: linux-2.6.25-rc3.save/include/linux/memory_hotplug.h
===================================================================
--- linux-2.6.25-rc3.save.orig/include/linux/memory_hotplug.h 2008-03-05 10:44:30.000000000 -0800
+++ linux-2.6.25-rc3.save/include/linux/memory_hotplug.h 2008-03-06 15:02:13.000000000 -0800
@@ -8,6 +8,7 @@
struct page;
struct zone;
struct pglist_data;
+struct mem_section;
#ifdef CONFIG_MEMORY_HOTPLUG
/*
@@ -64,9 +65,11 @@ extern int offline_pages(unsigned long,
/* reasonably generic interface to expand the physical pages in a zone */
extern int __add_pages(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages);
+extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages);
/*
- * Walk thorugh all memory which is registered as resource.
+ * Walk through all memory which is registered as resource.
* arg is (start_pfn, nr_pages, private_arg_pointer)
*/
extern int walk_memory_resource(unsigned long start_pfn,
@@ -188,5 +191,6 @@ extern int arch_add_memory(int nid, u64
extern int remove_memory(u64 start, u64 size);
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
int nr_pages);
+extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
#endif /* __LINUX_MEMORY_HOTPLUG_H */
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-06 18:55 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
2008-03-06 19:08 ` Randy Dunlap
@ 2008-03-06 20:54 ` Dave Hansen
2008-03-06 21:42 ` Badari Pulavarty
2008-03-21 15:25 ` Yasunori Goto
2 siblings, 1 reply; 23+ messages in thread
From: Dave Hansen @ 2008-03-06 20:54 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
On Thu, 2008-03-06 at 10:55 -0800, Badari Pulavarty wrote:
> + if (memmap)
> + __kfree_section_memmap(memmap, PAGES_PER_SECTION);
> + return;
> + }
> +
> + /*
> + * Allocations came from bootmem - how do I free up ?
> + */
> +
Shouldn't we figure this one out before merging?
I think we at least need a printk() there.
-- Dave
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-06 20:54 ` Dave Hansen
@ 2008-03-06 21:42 ` Badari Pulavarty
2008-03-07 1:35 ` Yasunori Goto
0 siblings, 1 reply; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-06 21:42 UTC (permalink / raw)
To: Dave Hansen; +Cc: lkml, linuxppc-dev, paulus, Yasunori Goto, Andrew Morton
On Thu, 2008-03-06 at 12:54 -0800, Dave Hansen wrote:
> On Thu, 2008-03-06 at 10:55 -0800, Badari Pulavarty wrote:
> > + if (memmap)
> > + __kfree_section_memmap(memmap, PAGES_PER_SECTION);
> > + return;
> > + }
> > +
> > + /*
> > + * Allocations came from bootmem - how do I free up ?
> > + */
> > +
>
> Shouldn't we figure this one out before merging?
>
> I think we at least need a printk() there.
I can add a printk(). I am hoping Yasunori Goto has something to
handle this, before we really merge into mainline.
Thanks,
Badari
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-06 21:42 ` Badari Pulavarty
@ 2008-03-07 1:35 ` Yasunori Goto
2008-03-07 16:36 ` Badari Pulavarty
2008-03-07 16:44 ` Badari Pulavarty
0 siblings, 2 replies; 23+ messages in thread
From: Yasunori Goto @ 2008-03-07 1:35 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: Dave Hansen, lkml, linuxppc-dev, paulus, Andrew Morton
Hi Badari-san.
> On Thu, 2008-03-06 at 12:54 -0800, Dave Hansen wrote:
> > On Thu, 2008-03-06 at 10:55 -0800, Badari Pulavarty wrote:
> > > + if (memmap)
> > > + __kfree_section_memmap(memmap, PAGES_PER_SECTION);
> > > + return;
> > > + }
> > > +
> > > + /*
> > > + * Allocations came from bootmem - how do I free up ?
> > > + */
> > > +
> >
> > Shouldn't we figure this one out before merging?
> >
> > I think we at least need a printk() there.
>
> I can add a printk(). I am hoping Yasunori Goto has something to
> handle this, before we really merge into mainline.
Ah, yes.
I'm making patches for around here. I'm sorry for your waiting.
BTW, do you hurry for merging your patch?
To be honest, I would like to solve not only here
but also some other issues.
But, if you hurry, I'll concentrate to solve only this.
Bye.
--
Yasunori Goto
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-07 1:35 ` Yasunori Goto
@ 2008-03-07 16:36 ` Badari Pulavarty
2008-03-07 16:44 ` Badari Pulavarty
1 sibling, 0 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-07 16:36 UTC (permalink / raw)
To: Yasunori Goto; +Cc: Dave Hansen, lkml, linuxppc-dev, paulus, Andrew Morton
Yasunori Goto wrote:
> Hi Badari-san.
>
>
>> On Thu, 2008-03-06 at 12:54 -0800, Dave Hansen wrote:
>>
>>> On Thu, 2008-03-06 at 10:55 -0800, Badari Pulavarty wrote:
>>>
>>>> + if (memmap)
>>>> + __kfree_section_memmap(memmap, PAGES_PER_SECTION);
>>>> + return;
>>>> + }
>>>> +
>>>> + /*
>>>> + * Allocations came from bootmem - how do I free up ?
>>>> + */
>>>> +
>>>>
>>> Shouldn't we figure this one out before merging?
>>>
>>> I think we at least need a printk() there.
>>>
>> I can add a printk(). I am hoping Yasunori Goto has something to
>> handle this, before we really merge into mainline.
>>
>
> Ah, yes.
> I'm making patches for around here. I'm sorry for your waiting.
>
> BTW, do you hurry for merging your patch?
> To be honest, I would like to solve not only here
> but also some other issues.
> But, if you hurry, I'll concentrate to solve only this.
>
> Bye.
>
>
I am hoping to get all of this merged into 2.6.26. Till then I would
like this to
be tested in -mm. I am not in a hurry, but I would like to make sure
some one is
working on the issue. Please let me know, if you have something to test
- I will
be happy to help out.
Thanks,
Badari
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-07 1:35 ` Yasunori Goto
2008-03-07 16:36 ` Badari Pulavarty
@ 2008-03-07 16:44 ` Badari Pulavarty
1 sibling, 0 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-07 16:44 UTC (permalink / raw)
To: Yasunori Goto; +Cc: Dave Hansen, lkml, linuxppc-dev, paulus, Andrew Morton
Yasunori Goto wrote:
> Hi Badari-san.
>
>
>> On Thu, 2008-03-06 at 12:54 -0800, Dave Hansen wrote:
>>
>>> On Thu, 2008-03-06 at 10:55 -0800, Badari Pulavarty wrote:
>>>
>>>> + if (memmap)
>>>> + __kfree_section_memmap(memmap, PAGES_PER_SECTION);
>>>> + return;
>>>> + }
>>>> +
>>>> + /*
>>>> + * Allocations came from bootmem - how do I free up ?
>>>> + */
>>>> +
>>>>
>>> Shouldn't we figure this one out before merging?
>>>
>>> I think we at least need a printk() there.
>>>
>> I can add a printk(). I am hoping Yasunori Goto has something to
>> handle this, before we really merge into mainline.
>>
>
> Ah, yes.
> I'm making patches for around here. I'm sorry for your waiting.
>
> BTW, do you hurry for merging your patch?
> To be honest, I would like to solve not only here
> but also some other issues.
> But, if you hurry, I'll concentrate to solve only this.
>
> Bye.
>
I am hoping to merge this into 2.6.26. I am not in a hurry, but would like
to make sure some one is working on the issue. If you have something
to test, feel free to pass it to me - I will be happy to test.
Thanks,
Badari
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-06 18:55 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
2008-03-06 19:08 ` Randy Dunlap
2008-03-06 20:54 ` Dave Hansen
@ 2008-03-21 15:25 ` Yasunori Goto
2008-03-21 16:55 ` Badari Pulavarty
2 siblings, 1 reply; 23+ messages in thread
From: Yasunori Goto @ 2008-03-21 15:25 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, paulus, Andrew Morton
Badari-san.
> Index: linux-2.6.25-rc2/mm/memory_hotplug.c
> ===================================================================
> --- linux-2.6.25-rc2.orig/mm/memory_hotplug.c 2008-02-27 12:58:17.000000000 -0800
> +++ linux-2.6.25-rc2/mm/memory_hotplug.c 2008-02-27 16:06:50.000000000 -0800
> @@ -102,6 +102,21 @@ static int __add_section(struct zone *zo
> return register_new_memory(__pfn_to_section(phys_start_pfn));
> }
>
> +static int __remove_section(struct zone *zone, struct mem_section *ms)
> +{
> + int ret = -EINVAL;
> +
> + if (!valid_section(ms))
> + return ret;
> +
> + ret = unregister_memory_section(ms);
> + if (ret)
> + return ret;
> +
> + sparse_remove_one_section(zone, ms);
> + return 0;
> +}
> +
> /*
> * Reasonably generic function for adding memory. It is
> * expected that archs that support memory hotplug will
> @@ -135,6 +150,35 @@ int __add_pages(struct zone *zone, unsig
> }
> EXPORT_SYMBOL_GPL(__add_pages);
>
> +int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
> + unsigned long nr_pages)
> +{
> + unsigned long i, ret = 0;
> + int sections_to_remove;
> + unsigned long flags;
> + struct pglist_data *pgdat = zone->zone_pgdat;
> +
> + /*
> + * We can only remove entire sections
> + */
> + BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
> + BUG_ON(nr_pages % PAGES_PER_SECTION);
> +
> + release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
> +
> + sections_to_remove = nr_pages / PAGES_PER_SECTION;
> + for (i = 0; i < sections_to_remove; i++) {
> + unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
> + pgdat_resize_lock(pgdat, &flags);
> + ret = __remove_section(zone, __pfn_to_section(pfn));
> + pgdat_resize_unlock(pgdat, &flags);
> + if (ret)
> + break;
> + }
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(__remove_pages);
Here may be a bug.
__remove_section() is called with pgdat_resize_lock() which is
spin_lock_irqsave().
__remove_section()
|
+--> unregister_memory_section()
|
+--> remove_memory_block()
|
+--> unregister_memory()
|
+--> sysdev_unregister()
sysdev_unregister() calls mutex_lock().
It might sleep with irq disable, right?
I found BUG()'s messages by this.
Bye.
--
Yasunori Goto
^ permalink raw reply [flat|nested] 23+ messages in thread
* Re: [PATCH 1/5] generic __remove_pages() support
2008-03-21 15:25 ` Yasunori Goto
@ 2008-03-21 16:55 ` Badari Pulavarty
0 siblings, 0 replies; 23+ messages in thread
From: Badari Pulavarty @ 2008-03-21 16:55 UTC (permalink / raw)
To: Yasunori Goto; +Cc: lkml, linuxppc-dev, paulus, Andrew Morton
On Sat, 2008-03-22 at 00:25 +0900, Yasunori Goto wrote:
> Badari-san.
>
> > Index: linux-2.6.25-rc2/mm/memory_hotplug.c
> > ===================================================================
> > --- linux-2.6.25-rc2.orig/mm/memory_hotplug.c 2008-02-27 12:58:17.000000000 -0800
> > +++ linux-2.6.25-rc2/mm/memory_hotplug.c 2008-02-27 16:06:50.000000000 -0800
> > @@ -102,6 +102,21 @@ static int __add_section(struct zone *zo
> > return register_new_memory(__pfn_to_section(phys_start_pfn));
> > }
> >
> > +static int __remove_section(struct zone *zone, struct mem_section *ms)
> > +{
> > + int ret = -EINVAL;
> > +
> > + if (!valid_section(ms))
> > + return ret;
> > +
> > + ret = unregister_memory_section(ms);
> > + if (ret)
> > + return ret;
> > +
> > + sparse_remove_one_section(zone, ms);
> > + return 0;
> > +}
> > +
> > /*
> > * Reasonably generic function for adding memory. It is
> > * expected that archs that support memory hotplug will
> > @@ -135,6 +150,35 @@ int __add_pages(struct zone *zone, unsig
> > }
> > EXPORT_SYMBOL_GPL(__add_pages);
> >
> > +int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
> > + unsigned long nr_pages)
> > +{
> > + unsigned long i, ret = 0;
> > + int sections_to_remove;
> > + unsigned long flags;
> > + struct pglist_data *pgdat = zone->zone_pgdat;
> > +
> > + /*
> > + * We can only remove entire sections
> > + */
> > + BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
> > + BUG_ON(nr_pages % PAGES_PER_SECTION);
> > +
> > + release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
> > +
> > + sections_to_remove = nr_pages / PAGES_PER_SECTION;
> > + for (i = 0; i < sections_to_remove; i++) {
> > + unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
> > + pgdat_resize_lock(pgdat, &flags);
> > + ret = __remove_section(zone, __pfn_to_section(pfn));
> > + pgdat_resize_unlock(pgdat, &flags);
> > + if (ret)
> > + break;
> > + }
> > + return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(__remove_pages);
>
> Here may be a bug.
> __remove_section() is called with pgdat_resize_lock() which is
> spin_lock_irqsave().
>
> __remove_section()
> |
> +--> unregister_memory_section()
> |
> +--> remove_memory_block()
> |
> +--> unregister_memory()
> |
> +--> sysdev_unregister()
>
> sysdev_unregister() calls mutex_lock().
> It might sleep with irq disable, right?
> I found BUG()'s messages by this.
Hmm. I think you are right. Let me take a look
and see if we can move sysdev_unregister() outside
the lock.
Thanks for finding it.
Thanks,
Badari
^ permalink raw reply [flat|nested] 23+ messages in thread