linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates
@ 2008-03-28  0:33 Badari Pulavarty
  2008-03-28  0:35 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
                   ` (4 more replies)
  0 siblings, 5 replies; 12+ messages in thread
From: Badari Pulavarty @ 2008-03-28  0:33 UTC (permalink / raw)
  To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton

Hi Andrew & Paul,

Here are the updates for hotplug memory remove. Most changes are
PPC specific, but I would like these to be included in next -mm
for easy testing and review, before including in Paul's tree.

eHEA driver folks are verifying if the exported interface
walk_memory_resource() is good enough for their needs. And also,
we are verifying the code on x86_64. Once that is done, we may
be able to cleanup some of the code (make remove_memory() arch
generic).

[PATCH 1/5] generic __remove_pages() support
[PATCH 2/5] [PPC] htab_remove_mapping() error handling
[PATCH 3/5] [PPC] hotplug memory notifications for ppc
[PATCH 4/5] [PPC] update lmb for hotplug memory add/remove
[PATCH 5/5] [PPC] provide walk_memory_resource() for ppc

Testing: All the patches are tested on p-series LPAR configuration,
writing to /sysfs & /proc, through DLPAR tools and through HMC.
Testing on x86_64 needs more work.

Thanks,
Badari



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/5] generic __remove_pages() support
  2008-03-28  0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
@ 2008-03-28  0:35 ` Badari Pulavarty
  2008-03-28  2:26   ` Yasunori Goto
  2008-03-28  0:37 ` [PATCH 2/5] [PPC] htab_remove_mapping() error handling Badari Pulavarty
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 12+ messages in thread
From: Badari Pulavarty @ 2008-03-28  0:35 UTC (permalink / raw)
  To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton

Generic helper function to remove section mappings and sysfs entries
for the section of the memory we are removing.  offline_pages() correctly 
adjusted zone and marked the pages reserved.

TODO: Yasunori Goto is working on patches to freeup allocations from bootmem.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>

---
 include/linux/memory_hotplug.h |    6 +++-
 mm/memory_hotplug.c            |   55 +++++++++++++++++++++++++++++++++++++++++
 mm/sparse.c                    |   45 +++++++++++++++++++++++++++++++--
 3 files changed, 102 insertions(+), 4 deletions(-)

Index: linux-2.6.25-rc5/mm/memory_hotplug.c
===================================================================
--- linux-2.6.25-rc5.orig/mm/memory_hotplug.c	2008-03-21 07:00:37.000000000 -0800
+++ linux-2.6.25-rc5/mm/memory_hotplug.c	2008-03-25 15:03:58.000000000 -0800
@@ -102,6 +102,25 @@ static int __add_section(struct zone *zo
 	return register_new_memory(__pfn_to_section(phys_start_pfn));
 }
 
+static int __remove_section(struct zone *zone, struct mem_section *ms)
+{
+	unsigned long flags;
+	struct pglist_data *pgdat = zone->zone_pgdat;
+	int ret = -EINVAL;
+
+	if (!valid_section(ms))
+		return ret;
+
+	ret = unregister_memory_section(ms);
+	if (ret)
+		return ret;
+
+	pgdat_resize_lock(pgdat, &flags);
+	sparse_remove_one_section(zone, ms);
+	pgdat_resize_unlock(pgdat, &flags);
+	return 0;
+}
+
 /*
  * Reasonably generic function for adding memory.  It is
  * expected that archs that support memory hotplug will
@@ -135,6 +154,42 @@ int __add_pages(struct zone *zone, unsig
 }
 EXPORT_SYMBOL_GPL(__add_pages);
 
+/**
+ * __remove_pages() - remove sections of pages from a zone
+ * @zone: zone from which pages need to be removed
+ * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
+ * @nr_pages: number of pages to remove (must be multiple of section size)
+ *
+ * Generic helper function to remove section mappings and sysfs entries
+ * for the section of the memory we are removing. Caller needs to make
+ * sure that pages are marked reserved and zones are adjust properly by
+ * calling offline_pages().
+ */
+int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
+		 unsigned long nr_pages)
+{
+	unsigned long i, ret = 0;
+	int sections_to_remove;
+
+	/*
+	 * We can only remove entire sections
+	 */
+	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
+	BUG_ON(nr_pages % PAGES_PER_SECTION);
+
+	release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
+
+	sections_to_remove = nr_pages / PAGES_PER_SECTION;
+	for (i = 0; i < sections_to_remove; i++) {
+		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
+		ret = __remove_section(zone, __pfn_to_section(pfn));
+		if (ret)
+			break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__remove_pages);
+
 static void grow_zone_span(struct zone *zone,
 		unsigned long start_pfn, unsigned long end_pfn)
 {
Index: linux-2.6.25-rc5/mm/sparse.c
===================================================================
--- linux-2.6.25-rc5.orig/mm/sparse.c	2008-03-21 07:00:37.000000000 -0800
+++ linux-2.6.25-rc5/mm/sparse.c	2008-03-25 13:59:51.000000000 -0800
@@ -198,12 +198,13 @@ static unsigned long sparse_encode_mem_m
 }
 
 /*
- * We need this if we ever free the mem_maps.  While not implemented yet,
- * this function is included for parity with its sibling.
+ * Decode mem_map from the coded memmap
  */
-static __attribute((unused))
+static
 struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
 {
+	/* mask off the extra low bits of information */
+	coded_mem_map &= SECTION_MAP_MASK;
 	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
 }
 
@@ -363,6 +364,28 @@ static void __kfree_section_memmap(struc
 }
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
+static void free_section_usemap(struct page *memmap, unsigned long *usemap)
+{
+	if (!usemap)
+		return;
+
+	/*
+	 * Check to see if allocation came from hot-plug-add
+	 */
+	if (PageSlab(virt_to_page(usemap))) {
+		kfree(usemap);
+		if (memmap)
+			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
+		return;
+	}
+
+	/*
+	 * TODO: Allocations came from bootmem - how do I free up ?
+	 */
+	printk(KERN_WARNING "Not freeing up allocations from bootmem "
+			"- leaking memory\n");
+}
+
 /*
  * returns the number of sections whose mem_maps were properly
  * set.  If this is <=0, then that means that the passed-in
@@ -415,4 +438,20 @@ out:
 	}
 	return ret;
 }
+
+void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
+{
+	struct page *memmap = NULL;
+	unsigned long *usemap = NULL;
+
+	if (ms->section_mem_map) {
+		usemap = ms->pageblock_flags;
+		memmap = sparse_decode_mem_map(ms->section_mem_map,
+						__section_nr(ms));
+		ms->section_mem_map = 0;
+		ms->pageblock_flags = NULL;
+	}
+
+	free_section_usemap(memmap, usemap);
+}
 #endif
Index: linux-2.6.25-rc5/include/linux/memory_hotplug.h
===================================================================
--- linux-2.6.25-rc5.orig/include/linux/memory_hotplug.h	2008-03-21 07:00:36.000000000 -0800
+++ linux-2.6.25-rc5/include/linux/memory_hotplug.h	2008-03-25 13:59:51.000000000 -0800
@@ -8,6 +8,7 @@
 struct page;
 struct zone;
 struct pglist_data;
+struct mem_section;
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
@@ -64,9 +65,11 @@ extern int offline_pages(unsigned long, 
 /* reasonably generic interface to expand the physical pages in a zone  */
 extern int __add_pages(struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
+extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
+	unsigned long nr_pages);
 
 /*
- * Walk thorugh all memory which is registered as resource.
+ * Walk through all memory which is registered as resource.
  * arg is (start_pfn, nr_pages, private_arg_pointer)
  */
 extern int walk_memory_resource(unsigned long start_pfn,
@@ -188,5 +191,6 @@ extern int arch_add_memory(int nid, u64 
 extern int remove_memory(u64 start, u64 size);
 extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 								int nr_pages);
+extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
 
 #endif /* __LINUX_MEMORY_HOTPLUG_H */



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/5] [PPC] htab_remove_mapping() error handling
  2008-03-28  0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
  2008-03-28  0:35 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
@ 2008-03-28  0:37 ` Badari Pulavarty
  2008-03-28  0:38 ` [PATCH 3/5] [PPC] hotplug memory notifications for ppc Badari Pulavarty
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 12+ messages in thread
From: Badari Pulavarty @ 2008-03-28  0:37 UTC (permalink / raw)
  To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton

If the sub-arch doesn't support hpte_removebolted(), gracefully
return failure rather than success.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
---
 arch/powerpc/mm/hash_utils_64.c |   14 +++++++++-----
 include/asm-powerpc/sparsemem.h |    2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

Index: linux-2.6.25-rc3/arch/powerpc/mm/hash_utils_64.c
===================================================================
--- linux-2.6.25-rc3.orig/arch/powerpc/mm/hash_utils_64.c	2008-03-05 10:14:28.000000000 -0800
+++ linux-2.6.25-rc3/arch/powerpc/mm/hash_utils_64.c	2008-03-05 10:18:55.000000000 -0800
@@ -192,7 +192,7 @@ int htab_bolt_mapping(unsigned long vsta
 	return ret < 0 ? ret : 0;
 }
 
-static void htab_remove_mapping(unsigned long vstart, unsigned long vend,
+static int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 		      int psize, int ssize)
 {
 	unsigned long vaddr;
@@ -202,12 +202,15 @@ static void htab_remove_mapping(unsigned
 	step = 1 << shift;
 
 	if (!ppc_md.hpte_removebolted) {
-		printk("Sub-arch doesn't implement hpte_removebolted\n");
-		return;
+		printk(KERN_WARNING "Sub-arch doesn't implement "
+				"hpte_removebolted\n");
+		return -EINVAL;
 	}
 
 	for (vaddr = vstart; vaddr < vend; vaddr += step)
 		ppc_md.hpte_removebolted(vaddr, psize, ssize);
+
+	return 0;
 }
 
 static int __init htab_dt_scan_seg_sizes(unsigned long node,
@@ -449,9 +452,10 @@ void create_section_mapping(unsigned lon
 			mmu_linear_psize, mmu_kernel_ssize));
 }
 
-void remove_section_mapping(unsigned long start, unsigned long end)
+int remove_section_mapping(unsigned long start, unsigned long end)
 {
-	htab_remove_mapping(start, end, mmu_linear_psize, mmu_kernel_ssize);
+	return htab_remove_mapping(start, end, mmu_linear_psize,
+			mmu_kernel_ssize);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
Index: linux-2.6.25-rc3/include/asm-powerpc/sparsemem.h
===================================================================
--- linux-2.6.25-rc3.orig/include/asm-powerpc/sparsemem.h	2008-03-05 10:14:31.000000000 -0800
+++ linux-2.6.25-rc3/include/asm-powerpc/sparsemem.h	2008-03-05 10:19:09.000000000 -0800
@@ -15,7 +15,7 @@
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 extern void create_section_mapping(unsigned long start, unsigned long end);
-extern void remove_section_mapping(unsigned long start, unsigned long end);
+extern int remove_section_mapping(unsigned long start, unsigned long end);
 #ifdef CONFIG_NUMA
 extern int hot_add_scn_to_nid(unsigned long scn_addr);
 #else



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/5] [PPC] hotplug memory notifications for ppc
  2008-03-28  0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
  2008-03-28  0:35 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
  2008-03-28  0:37 ` [PATCH 2/5] [PPC] htab_remove_mapping() error handling Badari Pulavarty
@ 2008-03-28  0:38 ` Badari Pulavarty
  2008-03-28  0:39 ` [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove Badari Pulavarty
  2008-03-28  0:39 ` [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc Badari Pulavarty
  4 siblings, 0 replies; 12+ messages in thread
From: Badari Pulavarty @ 2008-03-28  0:38 UTC (permalink / raw)
  To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton

Hotplug memory notifier for ppc64. This gets invoked by writing
the device-node that needs to be removed to /proc/ppc64/ofdt.
We need to adjust the sections and remove sysfs entries by
calling __remove_pages(). Then call arch specific code to
get rid of htab mappings for the section of memory.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Reviewed-by: Michael Ellerman <michael@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/Makefile         |    1 
 arch/powerpc/platforms/pseries/hotplug-memory.c |   98 ++++++++++++++++++++++++
 2 files changed, 99 insertions(+)

Index: linux-2.6.25-rc2/arch/powerpc/platforms/pseries/hotplug-memory.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.25-rc2/arch/powerpc/platforms/pseries/hotplug-memory.c	2008-02-29 09:25:14.000000000 -0800
@@ -0,0 +1,98 @@
+/*
+ * pseries Memory Hotplug infrastructure.
+ *
+ * Copyright (C) 2008 Badari Pulavarty, IBM Corporation
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/of.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/pSeries_reconfig.h>
+
+static int pseries_remove_memory(struct device_node *np)
+{
+	const char *type;
+	const unsigned int *my_index;
+	const unsigned int *regs;
+	u64 start_pfn, start;
+	struct zone *zone;
+	int ret = -EINVAL;
+
+	/*
+	 * Check to see if we are actually removing memory
+	 */
+	type = of_get_property(np, "device_type", NULL);
+	if (type == NULL || strcmp(type, "memory") != 0)
+		return 0;
+
+	/*
+	 * Find the memory index and size of the removing section
+	 */
+	my_index = of_get_property(np, "ibm,my-drc-index", NULL);
+	if (!my_index)
+		return ret;
+
+	regs = of_get_property(np, "reg", NULL);
+	if (!regs)
+		return ret;
+
+	start_pfn = section_nr_to_pfn(*my_index & 0xffff);
+	zone = page_zone(pfn_to_page(start_pfn));
+
+	/*
+	 * Remove section mappings and sysfs entries for the
+	 * section of the memory we are removing.
+	 *
+	 * NOTE: Ideally, this should be done in generic code like
+	 * remove_memory(). But remove_memory() gets called by writing
+	 * to sysfs "state" file and we can't remove sysfs entries
+	 * while writing to it. So we have to defer it to here.
+	 */
+	ret = __remove_pages(zone, start_pfn, regs[3] >> PAGE_SHIFT);
+	if (ret)
+		return ret;
+
+	/*
+	 * Remove htab bolted mappings for this section of memory
+	 */
+ 	start = (unsigned long)__va(start_pfn << PAGE_SHIFT);
+ 	ret = remove_section_mapping(start, start + regs[3]);
+	return ret;
+}
+
+static int pseries_memory_notifier(struct notifier_block *nb,
+				unsigned long action, void *node)
+{
+	int err = NOTIFY_OK;
+
+	switch (action) {
+	case PSERIES_RECONFIG_ADD:
+		break;
+	case PSERIES_RECONFIG_REMOVE:
+		if (pseries_remove_memory(node))
+			err = NOTIFY_BAD;
+		break;
+	default:
+		err = NOTIFY_DONE;
+		break;
+	}
+	return err;
+}
+
+static struct notifier_block pseries_mem_nb = {
+	.notifier_call = pseries_memory_notifier,
+};
+
+static int __init pseries_memory_hotplug_init(void)
+{
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		pSeries_reconfig_notifier_register(&pseries_mem_nb);
+
+	return 0;
+}
+machine_device_initcall(pseries, pseries_memory_hotplug_init);
Index: linux-2.6.25-rc2/arch/powerpc/platforms/pseries/Makefile
===================================================================
--- linux-2.6.25-rc2.orig/arch/powerpc/platforms/pseries/Makefile	2008-02-28 08:15:53.000000000 -0800
+++ linux-2.6.25-rc2/arch/powerpc/platforms/pseries/Makefile	2008-02-28 08:17:57.000000000 -0800
@@ -14,6 +14,7 @@ obj-$(CONFIG_PCI)	+= pci.o pci_dlpar.o
 obj-$(CONFIG_PCI_MSI)	+= msi.o
 
 obj-$(CONFIG_HOTPLUG_CPU)	+= hotplug-cpu.o
+obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o
 
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove
  2008-03-28  0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
                   ` (2 preceding siblings ...)
  2008-03-28  0:38 ` [PATCH 3/5] [PPC] hotplug memory notifications for ppc Badari Pulavarty
@ 2008-03-28  0:39 ` Badari Pulavarty
  2008-03-28  2:40   ` Kumar Gala
  2008-05-15  6:49   ` Benjamin Herrenschmidt
  2008-03-28  0:39 ` [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc Badari Pulavarty
  4 siblings, 2 replies; 12+ messages in thread
From: Badari Pulavarty @ 2008-03-28  0:39 UTC (permalink / raw)
  To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton

ppc kernel maintains information about logical memory blocks in
lmb.memory structure at the boot time. Its not updated for
hotplug memory add/remove. hotplug memory notifier for memory
add/remove now updates lmb.memory.

This information is useful for eHEA driver to find out the memory 
layout and holes.

NOTE: No special locking is needed for lmb_add() and lmb_remove().
Calls to these are serialized by caller. (pSeries_reconfig_chain).

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |   43 +++++++++++++++
 include/linux/lmb.h                             |    3 -
 lib/lmb.c                                       |   66 ++++++++++++++++++++----
 3 files changed, 102 insertions(+), 10 deletions(-)

Index: linux-2.6.25-rc3/lib/lmb.c
===================================================================
--- linux-2.6.25-rc3.orig/lib/lmb.c	2008-03-05 10:44:29.000000000 -0800
+++ linux-2.6.25-rc3/lib/lmb.c	2008-03-05 10:44:56.000000000 -0800
@@ -54,14 +54,13 @@ void lmb_dump_all(void)
 #endif /* DEBUG */
 }
 
-static unsigned long __init lmb_addrs_overlap(u64 base1,
-		u64 size1, u64 base2, u64 size2)
+static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
+		u64 size2)
 {
 	return ((base1 < (base2+size2)) && (base2 < (base1+size1)));
 }
 
-static long __init lmb_addrs_adjacent(u64 base1, u64 size1,
-		u64 base2, u64 size2)
+static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
 {
 	if (base2 == base1 + size1)
 		return 1;
@@ -71,7 +70,7 @@ static long __init lmb_addrs_adjacent(u6
 	return 0;
 }
 
-static long __init lmb_regions_adjacent(struct lmb_region *rgn,
+static long lmb_regions_adjacent(struct lmb_region *rgn,
 		unsigned long r1, unsigned long r2)
 {
 	u64 base1 = rgn->region[r1].base;
@@ -82,7 +81,7 @@ static long __init lmb_regions_adjacent(
 	return lmb_addrs_adjacent(base1, size1, base2, size2);
 }
 
-static void __init lmb_remove_region(struct lmb_region *rgn, unsigned long r)
+static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
 {
 	unsigned long i;
 
@@ -94,7 +93,7 @@ static void __init lmb_remove_region(str
 }
 
 /* Assumption: base addr of region 1 < base addr of region 2 */
-static void __init lmb_coalesce_regions(struct lmb_region *rgn,
+static void lmb_coalesce_regions(struct lmb_region *rgn,
 		unsigned long r1, unsigned long r2)
 {
 	rgn->region[r1].size += rgn->region[r2].size;
@@ -129,7 +128,7 @@ void __init lmb_analyze(void)
 }
 
 /* This routine called with relocation disabled. */
-static long __init lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
+static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
 {
 	unsigned long coalesced = 0;
 	long adjacent, i;
@@ -195,7 +194,7 @@ static long __init lmb_add_region(struct
 }
 
 /* This routine may be called with relocation disabled. */
-long __init lmb_add(u64 base, u64 size)
+long lmb_add(u64 base, u64 size)
 {
 	struct lmb_region *_rgn = &(lmb.memory);
 
@@ -207,6 +206,55 @@ long __init lmb_add(u64 base, u64 size)
 
 }
 
+long lmb_remove(u64 base, u64 size)
+{
+	struct lmb_region *rgn = &(lmb.memory);
+	u64 rgnbegin, rgnend;
+	u64 end = base + size;
+	int i;
+
+	rgnbegin = rgnend = 0; /* supress gcc warnings */
+
+	/* Find the region where (base, size) belongs to */
+	for (i=0; i < rgn->cnt; i++) {
+		rgnbegin = rgn->region[i].base;
+		rgnend = rgnbegin + rgn->region[i].size;
+
+		if ((rgnbegin <= base) && (end <= rgnend))
+			break;
+	}
+
+	/* Didn't find the region */
+	if (i == rgn->cnt)
+		return -1;
+
+	/* Check to see if we are removing entire region */
+	if ((rgnbegin == base) && (rgnend == end)) {
+		lmb_remove_region(rgn, i);
+		return 0;
+	}
+
+	/* Check to see if region is matching at the front */
+	if (rgnbegin == base) {
+		rgn->region[i].base = end;
+		rgn->region[i].size -= size;
+		return 0;
+	}
+
+	/* Check to see if the region is matching at the end */
+	if (rgnend == end) {
+		rgn->region[i].size -= size;
+		return 0;
+	}
+
+	/*
+	 * We need to split the entry -  adjust the current one to the
+	 * beginging of the hole and add the region after hole.
+	 */
+	rgn->region[i].size = base - rgn->region[i].base;
+	return lmb_add_region(rgn, end, rgnend - end);
+}
+
 long __init lmb_reserve(u64 base, u64 size)
 {
 	struct lmb_region *_rgn = &(lmb.reserved);
Index: linux-2.6.25-rc3/arch/powerpc/platforms/pseries/hotplug-memory.c
===================================================================
--- linux-2.6.25-rc3.orig/arch/powerpc/platforms/pseries/hotplug-memory.c	2008-03-05 10:44:51.000000000 -0800
+++ linux-2.6.25-rc3/arch/powerpc/platforms/pseries/hotplug-memory.c	2008-03-05 10:45:06.000000000 -0800
@@ -10,6 +10,7 @@
  */
 
 #include <linux/of.h>
+#include <linux/lmb.h>
 #include <asm/firmware.h>
 #include <asm/machdep.h>
 #include <asm/pSeries_reconfig.h>
@@ -58,6 +59,11 @@ static int pseries_remove_memory(struct 
 		return ret;
 
 	/*
+	 * Update memory regions for memory remove
+	 */
+	lmb_remove(start_pfn << PAGE_SHIFT, regs[3]);
+
+	/*
 	 * Remove htab bolted mappings for this section of memory
 	 */
  	start = (unsigned long)__va(start_pfn << PAGE_SHIFT);
@@ -65,6 +71,41 @@ static int pseries_remove_memory(struct 
 	return ret;
 }
 
+static int pseries_add_memory(struct device_node *np)
+{
+	const char *type;
+	const unsigned int *my_index;
+	const unsigned int *regs;
+	u64 start_pfn;
+	int ret = -EINVAL;
+
+	/*
+	 * Check to see if we are actually adding memory
+	 */
+	type = of_get_property(np, "device_type", NULL);
+	if (type == NULL || strcmp(type, "memory") != 0)
+		return 0;
+
+	/*
+	 * Find the memory index and size of the removing section
+	 */
+	my_index = of_get_property(np, "ibm,my-drc-index", NULL);
+	if (!my_index)
+		return ret;
+
+	regs = of_get_property(np, "reg", NULL);
+	if (!regs)
+		return ret;
+
+	start_pfn = section_nr_to_pfn(*my_index & 0xffff);
+
+	/*
+	 * Update memory region to represent the memory add
+	 */
+	lmb_add(start_pfn << PAGE_SHIFT, regs[3]);
+	return 0;
+}
+
 static int pseries_memory_notifier(struct notifier_block *nb,
 				unsigned long action, void *node)
 {
@@ -72,6 +113,8 @@ static int pseries_memory_notifier(struc
 
 	switch (action) {
 	case PSERIES_RECONFIG_ADD:
+		if (pseries_add_memory(node))
+			err = NOTIFY_BAD;
 		break;
 	case PSERIES_RECONFIG_REMOVE:
 		if (pseries_remove_memory(node))
Index: linux-2.6.25-rc3/include/linux/lmb.h
===================================================================
--- linux-2.6.25-rc3.orig/include/linux/lmb.h	2008-03-05 10:44:29.000000000 -0800
+++ linux-2.6.25-rc3/include/linux/lmb.h	2008-03-05 10:44:56.000000000 -0800
@@ -40,7 +40,8 @@ extern struct lmb lmb;
 
 extern void __init lmb_init(void);
 extern void __init lmb_analyze(void);
-extern long __init lmb_add(u64 base, u64 size);
+extern long lmb_add(u64 base, u64 size);
+extern long lmb_remove(u64 base, u64 size);
 extern long __init lmb_reserve(u64 base, u64 size);
 extern u64 __init lmb_alloc(u64 size, u64 align);
 extern u64 __init lmb_alloc_base(u64 size,



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc
  2008-03-28  0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
                   ` (3 preceding siblings ...)
  2008-03-28  0:39 ` [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove Badari Pulavarty
@ 2008-03-28  0:39 ` Badari Pulavarty
  2008-05-13  0:17   ` Geoff Levand
  4 siblings, 1 reply; 12+ messages in thread
From: Badari Pulavarty @ 2008-03-28  0:39 UTC (permalink / raw)
  To: lkml; +Cc: linuxppc-dev, paulus, Yasunori Goto, Andrew Morton

Provide walk_memory_resource() for ppc64. PPC maintains 
logic memory region mapping in lmb.memory structures. Walk
through these structures and do the callbacks for the
contiguous chunks.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
---
 arch/powerpc/mm/mem.c |   30 +++++++++++++++++++++++-------
 include/linux/lmb.h   |    1 +
 lib/lmb.c             |   33 +++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 7 deletions(-)

Index: linux-2.6.25-rc3/lib/lmb.c
===================================================================
--- linux-2.6.25-rc3.orig/lib/lmb.c	2008-03-05 10:28:55.000000000 -0800
+++ linux-2.6.25-rc3/lib/lmb.c	2008-03-05 10:34:01.000000000 -0800
@@ -416,3 +416,36 @@ int __init lmb_is_reserved(u64 addr)
 	}
 	return 0;
 }
+
+/*
+ * Given a <base, len>, find which memory regions belong to this range.
+ * Adjust the request and return a contiguous chunk.
+ */
+int lmb_find(struct lmb_property *res)
+{
+	int i;
+	u64 rstart, rend;
+
+	rstart = res->base;
+	rend = rstart + res->size - 1;
+
+	for (i = 0; i < lmb.memory.cnt; i++) {
+		u64 start = lmb.memory.region[i].base;
+		u64 end = start + lmb.memory.region[i].size - 1;
+
+		if (start > rend)
+			return -1;
+
+		if ((end >= rstart) && (start < rend)) {
+			/* adjust the request */
+			if (rstart < start)
+				rstart = start;
+			if (rend > end)
+				rend = end;
+			res->base = rstart;
+			res->size = rend - rstart + 1;
+			return 0;
+		}
+	}
+	return -1;
+}
Index: linux-2.6.25-rc3/arch/powerpc/mm/mem.c
===================================================================
--- linux-2.6.25-rc3.orig/arch/powerpc/mm/mem.c	2008-03-05 10:14:28.000000000 -0800
+++ linux-2.6.25-rc3/arch/powerpc/mm/mem.c	2008-03-05 10:32:16.000000000 -0800
@@ -148,19 +148,35 @@ out:
 
 /*
  * walk_memory_resource() needs to make sure there is no holes in a given
- * memory range. On PPC64, since this range comes from /sysfs, the range
- * is guaranteed to be valid, non-overlapping and can not contain any
- * holes. By the time we get here (memory add or remove), /proc/device-tree
- * is updated and correct. Only reason we need to check against device-tree
- * would be if we allow user-land to specify a memory range through a
- * system call/ioctl etc. instead of doing offline/online through /sysfs.
+ * memory range. PPC64 does not maintain the memory layout in /proc/iomem.
+ * Instead it maintains it in lmb.memory structures. Walk through the
+ * memory regions, find holes and callback for contiguous regions.
  */
 int
 walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
 			int (*func)(unsigned long, unsigned long, void *))
 {
-	return  (*func)(start_pfn, nr_pages, arg);
+	struct lmb_property res;
+	unsigned long pfn, len;
+	u64 end;
+	int ret = -1;
+
+	res.base = (u64) start_pfn << PAGE_SHIFT;
+	res.size = (u64) nr_pages << PAGE_SHIFT;
+
+	end = res.base + res.size - 1;
+	while ((res.base < end) && (lmb_find(&res) >= 0)) {
+		pfn = (unsigned long)(res.base >> PAGE_SHIFT);
+		len = (unsigned long)(res.size >> PAGE_SHIFT);
+		ret = (*func)(pfn, len, arg);
+		if (ret)
+			break;
+		res.base += (res.size + 1);
+		res.size = (end - res.base + 1);
+	}
+	return ret;
 }
+EXPORT_SYMBOL_GPL(walk_memory_resource);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
Index: linux-2.6.25-rc3/include/linux/lmb.h
===================================================================
--- linux-2.6.25-rc3.orig/include/linux/lmb.h	2008-03-05 10:30:06.000000000 -0800
+++ linux-2.6.25-rc3/include/linux/lmb.h	2008-03-05 10:33:12.000000000 -0800
@@ -52,6 +52,7 @@ extern u64 __init lmb_phys_mem_size(void
 extern u64 __init lmb_end_of_DRAM(void);
 extern void __init lmb_enforce_memory_limit(u64 memory_limit);
 extern int __init lmb_is_reserved(u64 addr);
+extern int lmb_find(struct lmb_property *res);
 
 extern void lmb_dump_all(void);
 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/5] generic __remove_pages() support
  2008-03-28  0:35 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
@ 2008-03-28  2:26   ` Yasunori Goto
  0 siblings, 0 replies; 12+ messages in thread
From: Yasunori Goto @ 2008-03-28  2:26 UTC (permalink / raw)
  To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, paulus, Andrew Morton

Ok. Thanks.

Acked-by: Yasunori Goto <y-goto@jp.fujitsu.com>


> Generic helper function to remove section mappings and sysfs entries
> for the section of the memory we are removing.  offline_pages() correctly 
> adjusted zone and marked the pages reserved.
> 
> TODO: Yasunori Goto is working on patches to freeup allocations from bootmem.
> 
> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
> 
> ---
>  include/linux/memory_hotplug.h |    6 +++-
>  mm/memory_hotplug.c            |   55 +++++++++++++++++++++++++++++++++++++++++
>  mm/sparse.c                    |   45 +++++++++++++++++++++++++++++++--
>  3 files changed, 102 insertions(+), 4 deletions(-)
> 
> Index: linux-2.6.25-rc5/mm/memory_hotplug.c
> ===================================================================
> --- linux-2.6.25-rc5.orig/mm/memory_hotplug.c	2008-03-21 07:00:37.000000000 -0800
> +++ linux-2.6.25-rc5/mm/memory_hotplug.c	2008-03-25 15:03:58.000000000 -0800
> @@ -102,6 +102,25 @@ static int __add_section(struct zone *zo
>  	return register_new_memory(__pfn_to_section(phys_start_pfn));
>  }
>  
> +static int __remove_section(struct zone *zone, struct mem_section *ms)
> +{
> +	unsigned long flags;
> +	struct pglist_data *pgdat = zone->zone_pgdat;
> +	int ret = -EINVAL;
> +
> +	if (!valid_section(ms))
> +		return ret;
> +
> +	ret = unregister_memory_section(ms);
> +	if (ret)
> +		return ret;
> +
> +	pgdat_resize_lock(pgdat, &flags);
> +	sparse_remove_one_section(zone, ms);
> +	pgdat_resize_unlock(pgdat, &flags);
> +	return 0;
> +}
> +
>  /*
>   * Reasonably generic function for adding memory.  It is
>   * expected that archs that support memory hotplug will
> @@ -135,6 +154,42 @@ int __add_pages(struct zone *zone, unsig
>  }
>  EXPORT_SYMBOL_GPL(__add_pages);
>  
> +/**
> + * __remove_pages() - remove sections of pages from a zone
> + * @zone: zone from which pages need to be removed
> + * @phys_start_pfn: starting pageframe (must be aligned to start of a section)
> + * @nr_pages: number of pages to remove (must be multiple of section size)
> + *
> + * Generic helper function to remove section mappings and sysfs entries
> + * for the section of the memory we are removing. Caller needs to make
> + * sure that pages are marked reserved and zones are adjust properly by
> + * calling offline_pages().
> + */
> +int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
> +		 unsigned long nr_pages)
> +{
> +	unsigned long i, ret = 0;
> +	int sections_to_remove;
> +
> +	/*
> +	 * We can only remove entire sections
> +	 */
> +	BUG_ON(phys_start_pfn & ~PAGE_SECTION_MASK);
> +	BUG_ON(nr_pages % PAGES_PER_SECTION);
> +
> +	release_mem_region(phys_start_pfn << PAGE_SHIFT, nr_pages * PAGE_SIZE);
> +
> +	sections_to_remove = nr_pages / PAGES_PER_SECTION;
> +	for (i = 0; i < sections_to_remove; i++) {
> +		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
> +		ret = __remove_section(zone, __pfn_to_section(pfn));
> +		if (ret)
> +			break;
> +	}
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(__remove_pages);
> +
>  static void grow_zone_span(struct zone *zone,
>  		unsigned long start_pfn, unsigned long end_pfn)
>  {
> Index: linux-2.6.25-rc5/mm/sparse.c
> ===================================================================
> --- linux-2.6.25-rc5.orig/mm/sparse.c	2008-03-21 07:00:37.000000000 -0800
> +++ linux-2.6.25-rc5/mm/sparse.c	2008-03-25 13:59:51.000000000 -0800
> @@ -198,12 +198,13 @@ static unsigned long sparse_encode_mem_m
>  }
>  
>  /*
> - * We need this if we ever free the mem_maps.  While not implemented yet,
> - * this function is included for parity with its sibling.
> + * Decode mem_map from the coded memmap
>   */
> -static __attribute((unused))
> +static
>  struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
>  {
> +	/* mask off the extra low bits of information */
> +	coded_mem_map &= SECTION_MAP_MASK;
>  	return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
>  }
>  
> @@ -363,6 +364,28 @@ static void __kfree_section_memmap(struc
>  }
>  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>  
> +static void free_section_usemap(struct page *memmap, unsigned long *usemap)
> +{
> +	if (!usemap)
> +		return;
> +
> +	/*
> +	 * Check to see if allocation came from hot-plug-add
> +	 */
> +	if (PageSlab(virt_to_page(usemap))) {
> +		kfree(usemap);
> +		if (memmap)
> +			__kfree_section_memmap(memmap, PAGES_PER_SECTION);
> +		return;
> +	}
> +
> +	/*
> +	 * TODO: Allocations came from bootmem - how do I free up ?
> +	 */
> +	printk(KERN_WARNING "Not freeing up allocations from bootmem "
> +			"- leaking memory\n");
> +}
> +
>  /*
>   * returns the number of sections whose mem_maps were properly
>   * set.  If this is <=0, then that means that the passed-in
> @@ -415,4 +438,20 @@ out:
>  	}
>  	return ret;
>  }
> +
> +void sparse_remove_one_section(struct zone *zone, struct mem_section *ms)
> +{
> +	struct page *memmap = NULL;
> +	unsigned long *usemap = NULL;
> +
> +	if (ms->section_mem_map) {
> +		usemap = ms->pageblock_flags;
> +		memmap = sparse_decode_mem_map(ms->section_mem_map,
> +						__section_nr(ms));
> +		ms->section_mem_map = 0;
> +		ms->pageblock_flags = NULL;
> +	}
> +
> +	free_section_usemap(memmap, usemap);
> +}
>  #endif
> Index: linux-2.6.25-rc5/include/linux/memory_hotplug.h
> ===================================================================
> --- linux-2.6.25-rc5.orig/include/linux/memory_hotplug.h	2008-03-21 07:00:36.000000000 -0800
> +++ linux-2.6.25-rc5/include/linux/memory_hotplug.h	2008-03-25 13:59:51.000000000 -0800
> @@ -8,6 +8,7 @@
>  struct page;
>  struct zone;
>  struct pglist_data;
> +struct mem_section;
>  
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  /*
> @@ -64,9 +65,11 @@ extern int offline_pages(unsigned long, 
>  /* reasonably generic interface to expand the physical pages in a zone  */
>  extern int __add_pages(struct zone *zone, unsigned long start_pfn,
>  	unsigned long nr_pages);
> +extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
> +	unsigned long nr_pages);
>  
>  /*
> - * Walk thorugh all memory which is registered as resource.
> + * Walk through all memory which is registered as resource.
>   * arg is (start_pfn, nr_pages, private_arg_pointer)
>   */
>  extern int walk_memory_resource(unsigned long start_pfn,
> @@ -188,5 +191,6 @@ extern int arch_add_memory(int nid, u64 
>  extern int remove_memory(u64 start, u64 size);
>  extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
>  								int nr_pages);
> +extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
>  
>  #endif /* __LINUX_MEMORY_HOTPLUG_H */
> 
> 

-- 
Yasunori Goto 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove
  2008-03-28  0:39 ` [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove Badari Pulavarty
@ 2008-03-28  2:40   ` Kumar Gala
  2008-03-28 16:52     ` Badari Pulavarty
  2008-05-15  6:49   ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 12+ messages in thread
From: Kumar Gala @ 2008-03-28  2:40 UTC (permalink / raw)
  To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, Andrew Morton, paulus, Yasunori Goto


On Mar 27, 2008, at 7:39 PM, Badari Pulavarty wrote:
> ppc kernel maintains information about logical memory blocks in
> lmb.memory structure at the boot time. Its not updated for
> hotplug memory add/remove. hotplug memory notifier for memory
> add/remove now updates lmb.memory.
>
> This information is useful for eHEA driver to find out the memory
> layout and holes.
>
> NOTE: No special locking is needed for lmb_add() and lmb_remove().
> Calls to these are serialized by caller. (pSeries_reconfig_chain).
>
> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
> ---
> arch/powerpc/platforms/pseries/hotplug-memory.c |   43 +++++++++++++++
> include/linux/lmb.h                             |    3 -
> lib/lmb.c                                       |   66 ++++++++++++++ 
> ++++++----
> 3 files changed, 102 insertions(+), 10 deletions(-)

How is lmb_remove different than lmb_alloc?

- k

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove
  2008-03-28  2:40   ` Kumar Gala
@ 2008-03-28 16:52     ` Badari Pulavarty
  0 siblings, 0 replies; 12+ messages in thread
From: Badari Pulavarty @ 2008-03-28 16:52 UTC (permalink / raw)
  To: Kumar Gala; +Cc: lkml, linuxppc-dev, Andrew Morton, paulus, Yasunori Goto

Kumar Gala wrote:
>
> On Mar 27, 2008, at 7:39 PM, Badari Pulavarty wrote:
>> ppc kernel maintains information about logical memory blocks in
>> lmb.memory structure at the boot time. Its not updated for
>> hotplug memory add/remove. hotplug memory notifier for memory
>> add/remove now updates lmb.memory.
>>
>> This information is useful for eHEA driver to find out the memory
>> layout and holes.
>>
>> NOTE: No special locking is needed for lmb_add() and lmb_remove().
>> Calls to these are serialized by caller. (pSeries_reconfig_chain).
>>
>> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
>> ---
>> arch/powerpc/platforms/pseries/hotplug-memory.c |   43 +++++++++++++++
>> include/linux/lmb.h                             |    3 -
>> lib/lmb.c                                       |   66 
>> ++++++++++++++++++++----
>> 3 files changed, 102 insertions(+), 10 deletions(-)
>
> How is lmb_remove different than lmb_alloc?
>
> - k
lmb_remove() can be used to punch a hole in to the existing memory block.
lmb_alloc() tries to allocate for a given alignment, I don't think it can
adjust the current entries. Isn't it ?

Thanks,
Badari


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc
  2008-03-28  0:39 ` [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc Badari Pulavarty
@ 2008-05-13  0:17   ` Geoff Levand
  2008-05-13 15:09     ` Badari Pulavarty
  0 siblings, 1 reply; 12+ messages in thread
From: Geoff Levand @ 2008-05-13  0:17 UTC (permalink / raw)
  To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, paulus, Yasunori Goto

Hi,

I've had some trouble with this change.

Badari Pulavarty wrote:
> Provide walk_memory_resource() for ppc64. PPC maintains 
> logic memory region mapping in lmb.memory structures. Walk
> through these structures and do the callbacks for the
> contiguous chunks.

...

> --- linux-2.6.25-rc3.orig/arch/powerpc/mm/mem.c	2008-03-05 10:14:28.000000000 -0800
> +++ linux-2.6.25-rc3/arch/powerpc/mm/mem.c	2008-03-05 10:32:16.000000000 -0800
> @@ -148,19 +148,35 @@ out:
>  
>  /*
>   * walk_memory_resource() needs to make sure there is no holes in a given
> - * memory range. On PPC64, since this range comes from /sysfs, the range
> - * is guaranteed to be valid, non-overlapping and can not contain any
> - * holes. By the time we get here (memory add or remove), /proc/device-tree
> - * is updated and correct. Only reason we need to check against device-tree
> - * would be if we allow user-land to specify a memory range through a
> - * system call/ioctl etc. instead of doing offline/online through /sysfs.
> + * memory range. PPC64 does not maintain the memory layout in /proc/iomem.
> + * Instead it maintains it in lmb.memory structures. Walk through the
> + * memory regions, find holes and callback for contiguous regions.
>   */
>  int
>  walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
>  			int (*func)(unsigned long, unsigned long, void *))
>  {
> -	return  (*func)(start_pfn, nr_pages, arg);
> +	struct lmb_property res;
> +	unsigned long pfn, len;
> +	u64 end;
> +	int ret = -1;
> +
> +	res.base = (u64) start_pfn << PAGE_SHIFT;
> +	res.size = (u64) nr_pages << PAGE_SHIFT;
> +
> +	end = res.base + res.size - 1;
> +	while ((res.base < end) && (lmb_find(&res) >= 0)) {
                                    ^^^^^^^^^^^^^^

In the PS3 platform code (arch/pwerpc/platfroms/ps3/mm.c) the hotplug
memory is added like this:

	...
	result = add_memory(0, start_addr, map.r1.size);
	...
	result = online_pages(start_pfn, nr_pages);
	...

In its work, online_pages() eventually calls walk_memory_resource(),
which has been changed as above to do a test on lmb_find(). I found
that this lmb_find() test always fails for PS3 since add_memory()
does not call lmb_add().

Is it the responsibility of the platform code to call lmb_add(), or
should that be done by add_memory()?

-Geoff




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc
  2008-05-13  0:17   ` Geoff Levand
@ 2008-05-13 15:09     ` Badari Pulavarty
  0 siblings, 0 replies; 12+ messages in thread
From: Badari Pulavarty @ 2008-05-13 15:09 UTC (permalink / raw)
  To: Geoff Levand; +Cc: lkml, linuxppc-dev, paulus, Yasunori Goto


On Mon, 2008-05-12 at 17:17 -0700, Geoff Levand wrote:
> Hi,
> 
> I've had some trouble with this change.
> 
> Badari Pulavarty wrote:
> > Provide walk_memory_resource() for ppc64. PPC maintains 
> > logic memory region mapping in lmb.memory structures. Walk
> > through these structures and do the callbacks for the
> > contiguous chunks.
> 
> ...
> 
> > --- linux-2.6.25-rc3.orig/arch/powerpc/mm/mem.c	2008-03-05 10:14:28.000000000 -0800
> > +++ linux-2.6.25-rc3/arch/powerpc/mm/mem.c	2008-03-05 10:32:16.000000000 -0800
> > @@ -148,19 +148,35 @@ out:
> >  
> >  /*
> >   * walk_memory_resource() needs to make sure there is no holes in a given
> > - * memory range. On PPC64, since this range comes from /sysfs, the range
> > - * is guaranteed to be valid, non-overlapping and can not contain any
> > - * holes. By the time we get here (memory add or remove), /proc/device-tree
> > - * is updated and correct. Only reason we need to check against device-tree
> > - * would be if we allow user-land to specify a memory range through a
> > - * system call/ioctl etc. instead of doing offline/online through /sysfs.
> > + * memory range. PPC64 does not maintain the memory layout in /proc/iomem.
> > + * Instead it maintains it in lmb.memory structures. Walk through the
> > + * memory regions, find holes and callback for contiguous regions.
> >   */
> >  int
> >  walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
> >  			int (*func)(unsigned long, unsigned long, void *))
> >  {
> > -	return  (*func)(start_pfn, nr_pages, arg);
> > +	struct lmb_property res;
> > +	unsigned long pfn, len;
> > +	u64 end;
> > +	int ret = -1;
> > +
> > +	res.base = (u64) start_pfn << PAGE_SHIFT;
> > +	res.size = (u64) nr_pages << PAGE_SHIFT;
> > +
> > +	end = res.base + res.size - 1;
> > +	while ((res.base < end) && (lmb_find(&res) >= 0)) {
>                                     ^^^^^^^^^^^^^^
> 
> In the PS3 platform code (arch/pwerpc/platfroms/ps3/mm.c) the hotplug
> memory is added like this:
> 
> 	...
> 	result = add_memory(0, start_addr, map.r1.size);
> 	...
> 	result = online_pages(start_pfn, nr_pages);
> 	...
> 
> In its work, online_pages() eventually calls walk_memory_resource(),
> which has been changed as above to do a test on lmb_find(). I found
> that this lmb_find() test always fails for PS3 since add_memory()
> does not call lmb_add().
> 
> Is it the responsibility of the platform code to call lmb_add(), or
> should that be done by add_memory()?

Since "lmb" code is specific to architecture, I would prefer that
arch specific code is responsible for manipulating "lmb"s instead of
generic code.

In case of ppc64, I added lmb_add() call in /proc/device-tree
manipulation code. Are there any arch specific calls for PS3
when add/remove memory happens ? If there are no other calls,
you can do lmb_add() in ps3_mm_add_memory(). For remove, we
need to find a better place.

Thanks,
Badari


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove
  2008-03-28  0:39 ` [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove Badari Pulavarty
  2008-03-28  2:40   ` Kumar Gala
@ 2008-05-15  6:49   ` Benjamin Herrenschmidt
  1 sibling, 0 replies; 12+ messages in thread
From: Benjamin Herrenschmidt @ 2008-05-15  6:49 UTC (permalink / raw)
  To: Badari Pulavarty; +Cc: lkml, linuxppc-dev, Andrew Morton, paulus, Yasunori Goto


On Thu, 2008-03-27 at 16:39 -0800, Badari Pulavarty wrote:
> ppc kernel maintains information about logical memory blocks in
> lmb.memory structure at the boot time. Its not updated for
> hotplug memory add/remove. hotplug memory notifier for memory
> add/remove now updates lmb.memory.
> 
> This information is useful for eHEA driver to find out the memory 
> layout and holes.
> 
> NOTE: No special locking is needed for lmb_add() and lmb_remove().
> Calls to these are serialized by caller. (pSeries_reconfig_chain).

My worry here is the lack of locking...

> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
> ---
>  arch/powerpc/platforms/pseries/hotplug-memory.c |   43 +++++++++++++++
>  include/linux/lmb.h                             |    3 -
>  lib/lmb.c                                       |   66 ++++++++++++++++++++----
>  3 files changed, 102 insertions(+), 10 deletions(-)
> 
> Index: linux-2.6.25-rc3/lib/lmb.c
> ===================================================================
> --- linux-2.6.25-rc3.orig/lib/lmb.c	2008-03-05 10:44:29.000000000 -0800
> +++ linux-2.6.25-rc3/lib/lmb.c	2008-03-05 10:44:56.000000000 -0800
> @@ -54,14 +54,13 @@ void lmb_dump_all(void)
>  #endif /* DEBUG */
>  }
>  
> -static unsigned long __init lmb_addrs_overlap(u64 base1,
> -		u64 size1, u64 base2, u64 size2)
> +static unsigned long lmb_addrs_overlap(u64 base1, u64 size1, u64 base2,
> +		u64 size2)
>  {
>  	return ((base1 < (base2+size2)) && (base2 < (base1+size1)));
>  }
>  
> -static long __init lmb_addrs_adjacent(u64 base1, u64 size1,
> -		u64 base2, u64 size2)
> +static long lmb_addrs_adjacent(u64 base1, u64 size1, u64 base2, u64 size2)
>  {
>  	if (base2 == base1 + size1)
>  		return 1;
> @@ -71,7 +70,7 @@ static long __init lmb_addrs_adjacent(u6
>  	return 0;
>  }
>  
> -static long __init lmb_regions_adjacent(struct lmb_region *rgn,
> +static long lmb_regions_adjacent(struct lmb_region *rgn,
>  		unsigned long r1, unsigned long r2)
>  {
>  	u64 base1 = rgn->region[r1].base;
> @@ -82,7 +81,7 @@ static long __init lmb_regions_adjacent(
>  	return lmb_addrs_adjacent(base1, size1, base2, size2);
>  }
>  
> -static void __init lmb_remove_region(struct lmb_region *rgn, unsigned long r)
> +static void lmb_remove_region(struct lmb_region *rgn, unsigned long r)
>  {
>  	unsigned long i;
>  
> @@ -94,7 +93,7 @@ static void __init lmb_remove_region(str
>  }
>  
>  /* Assumption: base addr of region 1 < base addr of region 2 */
> -static void __init lmb_coalesce_regions(struct lmb_region *rgn,
> +static void lmb_coalesce_regions(struct lmb_region *rgn,
>  		unsigned long r1, unsigned long r2)
>  {
>  	rgn->region[r1].size += rgn->region[r2].size;
> @@ -129,7 +128,7 @@ void __init lmb_analyze(void)
>  }
>  
>  /* This routine called with relocation disabled. */
> -static long __init lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
> +static long lmb_add_region(struct lmb_region *rgn, u64 base, u64 size)
>  {
>  	unsigned long coalesced = 0;
>  	long adjacent, i;
> @@ -195,7 +194,7 @@ static long __init lmb_add_region(struct
>  }
>  
>  /* This routine may be called with relocation disabled. */
> -long __init lmb_add(u64 base, u64 size)
> +long lmb_add(u64 base, u64 size)
>  {
>  	struct lmb_region *_rgn = &(lmb.memory);
>  
> @@ -207,6 +206,55 @@ long __init lmb_add(u64 base, u64 size)
>  
>  }
>  
> +long lmb_remove(u64 base, u64 size)
> +{
> +	struct lmb_region *rgn = &(lmb.memory);
> +	u64 rgnbegin, rgnend;
> +	u64 end = base + size;
> +	int i;
> +
> +	rgnbegin = rgnend = 0; /* supress gcc warnings */
> +
> +	/* Find the region where (base, size) belongs to */
> +	for (i=0; i < rgn->cnt; i++) {
> +		rgnbegin = rgn->region[i].base;
> +		rgnend = rgnbegin + rgn->region[i].size;
> +
> +		if ((rgnbegin <= base) && (end <= rgnend))
> +			break;
> +	}
> +
> +	/* Didn't find the region */
> +	if (i == rgn->cnt)
> +		return -1;
> +
> +	/* Check to see if we are removing entire region */
> +	if ((rgnbegin == base) && (rgnend == end)) {
> +		lmb_remove_region(rgn, i);
> +		return 0;
> +	}
> +
> +	/* Check to see if region is matching at the front */
> +	if (rgnbegin == base) {
> +		rgn->region[i].base = end;
> +		rgn->region[i].size -= size;
> +		return 0;
> +	}
> +
> +	/* Check to see if the region is matching at the end */
> +	if (rgnend == end) {
> +		rgn->region[i].size -= size;
> +		return 0;
> +	}
> +
> +	/*
> +	 * We need to split the entry -  adjust the current one to the
> +	 * beginging of the hole and add the region after hole.
> +	 */
> +	rgn->region[i].size = base - rgn->region[i].base;
> +	return lmb_add_region(rgn, end, rgnend - end);
> +}
> +
>  long __init lmb_reserve(u64 base, u64 size)
>  {
>  	struct lmb_region *_rgn = &(lmb.reserved);
> Index: linux-2.6.25-rc3/arch/powerpc/platforms/pseries/hotplug-memory.c
> ===================================================================
> --- linux-2.6.25-rc3.orig/arch/powerpc/platforms/pseries/hotplug-memory.c	2008-03-05 10:44:51.000000000 -0800
> +++ linux-2.6.25-rc3/arch/powerpc/platforms/pseries/hotplug-memory.c	2008-03-05 10:45:06.000000000 -0800
> @@ -10,6 +10,7 @@
>   */
>  
>  #include <linux/of.h>
> +#include <linux/lmb.h>
>  #include <asm/firmware.h>
>  #include <asm/machdep.h>
>  #include <asm/pSeries_reconfig.h>
> @@ -58,6 +59,11 @@ static int pseries_remove_memory(struct 
>  		return ret;
>  
>  	/*
> +	 * Update memory regions for memory remove
> +	 */
> +	lmb_remove(start_pfn << PAGE_SHIFT, regs[3]);
> +
> +	/*
>  	 * Remove htab bolted mappings for this section of memory
>  	 */
>   	start = (unsigned long)__va(start_pfn << PAGE_SHIFT);
> @@ -65,6 +71,41 @@ static int pseries_remove_memory(struct 
>  	return ret;
>  }
>  
> +static int pseries_add_memory(struct device_node *np)
> +{
> +	const char *type;
> +	const unsigned int *my_index;
> +	const unsigned int *regs;
> +	u64 start_pfn;
> +	int ret = -EINVAL;
> +
> +	/*
> +	 * Check to see if we are actually adding memory
> +	 */
> +	type = of_get_property(np, "device_type", NULL);
> +	if (type == NULL || strcmp(type, "memory") != 0)
> +		return 0;
> +
> +	/*
> +	 * Find the memory index and size of the removing section
> +	 */
> +	my_index = of_get_property(np, "ibm,my-drc-index", NULL);
> +	if (!my_index)
> +		return ret;
> +
> +	regs = of_get_property(np, "reg", NULL);
> +	if (!regs)
> +		return ret;
> +
> +	start_pfn = section_nr_to_pfn(*my_index & 0xffff);
> +
> +	/*
> +	 * Update memory region to represent the memory add
> +	 */
> +	lmb_add(start_pfn << PAGE_SHIFT, regs[3]);
> +	return 0;
> +}
> +
>  static int pseries_memory_notifier(struct notifier_block *nb,
>  				unsigned long action, void *node)
>  {
> @@ -72,6 +113,8 @@ static int pseries_memory_notifier(struc
>  
>  	switch (action) {
>  	case PSERIES_RECONFIG_ADD:
> +		if (pseries_add_memory(node))
> +			err = NOTIFY_BAD;
>  		break;
>  	case PSERIES_RECONFIG_REMOVE:
>  		if (pseries_remove_memory(node))
> Index: linux-2.6.25-rc3/include/linux/lmb.h
> ===================================================================
> --- linux-2.6.25-rc3.orig/include/linux/lmb.h	2008-03-05 10:44:29.000000000 -0800
> +++ linux-2.6.25-rc3/include/linux/lmb.h	2008-03-05 10:44:56.000000000 -0800
> @@ -40,7 +40,8 @@ extern struct lmb lmb;
>  
>  extern void __init lmb_init(void);
>  extern void __init lmb_analyze(void);
> -extern long __init lmb_add(u64 base, u64 size);
> +extern long lmb_add(u64 base, u64 size);
> +extern long lmb_remove(u64 base, u64 size);
>  extern long __init lmb_reserve(u64 base, u64 size);
>  extern u64 __init lmb_alloc(u64 size, u64 align);
>  extern u64 __init lmb_alloc_base(u64 size,
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2008-05-15  6:51 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-03-28  0:33 [PATCH 0/5] 2.6.25-rc5-mm1 hotplug memory remove updates Badari Pulavarty
2008-03-28  0:35 ` [PATCH 1/5] generic __remove_pages() support Badari Pulavarty
2008-03-28  2:26   ` Yasunori Goto
2008-03-28  0:37 ` [PATCH 2/5] [PPC] htab_remove_mapping() error handling Badari Pulavarty
2008-03-28  0:38 ` [PATCH 3/5] [PPC] hotplug memory notifications for ppc Badari Pulavarty
2008-03-28  0:39 ` [PATCH 4/5] [PPC] update lmb for hotplug memory add/remove Badari Pulavarty
2008-03-28  2:40   ` Kumar Gala
2008-03-28 16:52     ` Badari Pulavarty
2008-05-15  6:49   ` Benjamin Herrenschmidt
2008-03-28  0:39 ` [PATCH 5/5] [PPC] provide walk_memory_resource() for ppc Badari Pulavarty
2008-05-13  0:17   ` Geoff Levand
2008-05-13 15:09     ` Badari Pulavarty

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).