All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
@ 2013-07-02  5:45 ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:45 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm-ppc, kvm, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We want to use CMA for allocating hash page table and real mode area for
PPC64. Hence move DMA contiguous related changes into a seperate config
so that ppc64 can enable CMA without requiring DMA contiguous.

Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/arm/configs/omap2plus_defconfig  |  2 +-
 arch/arm/configs/tegra_defconfig      |  2 +-
 arch/arm/include/asm/dma-contiguous.h |  2 +-
 arch/arm/mm/dma-mapping.c             |  6 +++---
 drivers/base/Kconfig                  | 20 ++++----------------
 drivers/base/Makefile                 |  2 +-
 include/linux/dma-contiguous.h        |  2 +-
 mm/Kconfig                            | 24 ++++++++++++++++++++++++
 8 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index abbe319..098268f 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -71,7 +71,7 @@ CONFIG_MAC80211=m
 CONFIG_MAC80211_RC_PID=y
 CONFIG_MAC80211_RC_DEFAULT_PID=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_CMA=y
+CONFIG_DMA_CMA=y
 CONFIG_CONNECTOR=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index f7ba3161..34ae8f2 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -79,7 +79,7 @@ CONFIG_RFKILL_GPIO=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_CMA=y
+CONFIG_DMA_CMA=y
 CONFIG_MTD=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_M25P80=y
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
index 3ed37b4..e072bb2 100644
--- a/arch/arm/include/asm/dma-contiguous.h
+++ b/arch/arm/include/asm/dma-contiguous.h
@@ -2,7 +2,7 @@
 #define ASMARM_DMA_CONTIGUOUS_H
 
 #ifdef __KERNEL__
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 #include <linux/types.h>
 #include <asm-generic/dma-contiguous.h>
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ef3e0f3..1fb40dc 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
 	if (!pages)
 		goto no_pages;
 
-	if (IS_ENABLED(CONFIG_CMA))
+	if (IS_ENABLED(CONFIG_DMA_CMA))
 		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
 					      atomic_pool_init);
 	else
@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
 	else if (!(gfp & __GFP_WAIT))
 		addr = __alloc_from_pool(size, &page);
-	else if (!IS_ENABLED(CONFIG_CMA))
+	else if (!IS_ENABLED(CONFIG_DMA_CMA))
 		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
 	else
 		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		__dma_free_buffer(page, size);
 	} else if (__free_from_pool(cpu_addr, size)) {
 		return;
-	} else if (!IS_ENABLED(CONFIG_CMA)) {
+	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
 		__dma_free_remap(cpu_addr, size);
 		__dma_free_buffer(page, size);
 	} else {
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 07abd9d..10cd80a 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -202,11 +202,9 @@ config DMA_SHARED_BUFFER
 	  APIs extension; the file's descriptor can then be passed on to other
 	  driver.
 
-config CMA
-	bool "Contiguous Memory Allocator"
-	depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK
-	select MIGRATION
-	select MEMORY_ISOLATION
+config DMA_CMA
+	bool "DMA Contiguous Memory Allocator"
+	depends on HAVE_DMA_CONTIGUOUS && CMA
 	help
 	  This enables the Contiguous Memory Allocator which allows drivers
 	  to allocate big physically-contiguous blocks of memory for use with
@@ -215,17 +213,7 @@ config CMA
 	  For more information see <include/linux/dma-contiguous.h>.
 	  If unsure, say "n".
 
-if CMA
-
-config CMA_DEBUG
-	bool "CMA debug messages (DEVELOPMENT)"
-	depends on DEBUG_KERNEL
-	help
-	  Turns on debug messages in CMA.  This produces KERN_DEBUG
-	  messages for every CMA call as well as various messages while
-	  processing calls such as dma_alloc_from_contiguous().
-	  This option does not affect warning and error messages.
-
+if  DMA_CMA
 comment "Default contiguous memory area size:"
 
 config CMA_SIZE_MBYTES
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 4e22ce3..5d93bb5 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -6,7 +6,7 @@ obj-y			:= core.o bus.o dd.o syscore.o \
 			   attribute_container.o transport_class.o \
 			   topology.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
-obj-$(CONFIG_CMA) += dma-contiguous.o
+obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 01b5c84..00141d3 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -57,7 +57,7 @@ struct cma;
 struct page;
 struct device;
 
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 /*
  * There is always at least global CMA area and a few optional device
diff --git a/mm/Kconfig b/mm/Kconfig
index e742d06..26a5f81 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -477,3 +477,27 @@ config FRONTSWAP
 	  and swap data is stored as normal on the matching swap device.
 
 	  If unsure, say Y to enable frontswap.
+
+config CMA
+	bool "Contiguous Memory Allocator"
+	depends on HAVE_MEMBLOCK
+	select MIGRATION
+	select MEMORY_ISOLATION
+	help
+	  This enables the Contiguous Memory Allocator which allows other
+	  subsystems to allocate big physically-contiguous blocks of memory.
+	  CMA reserves a region of memory and allows only movable pages to
+	  be allocated from it. This way, the kernel can use the memory for
+	  pagecache and when a subsystem requests for contiguous area, the
+	  allocated pages are migrated away to serve the contiguous request.
+
+	  If unsure, say "n".
+
+config CMA_DEBUG
+	bool "CMA debug messages (DEVELOPMENT)"
+	depends on DEBUG_KERNEL && CMA
+	help
+	  Turns on debug messages in CMA.  This produces KERN_DEBUG
+	  messages for every CMA call as well as various messages while
+	  processing calls such as dma_alloc_from_contiguous().
+	  This option does not affect warning and error messages.
-- 
1.8.1.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
@ 2013-07-02  5:45 ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:45 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We want to use CMA for allocating hash page table and real mode area for
PPC64. Hence move DMA contiguous related changes into a seperate config
so that ppc64 can enable CMA without requiring DMA contiguous.

Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/arm/configs/omap2plus_defconfig  |  2 +-
 arch/arm/configs/tegra_defconfig      |  2 +-
 arch/arm/include/asm/dma-contiguous.h |  2 +-
 arch/arm/mm/dma-mapping.c             |  6 +++---
 drivers/base/Kconfig                  | 20 ++++----------------
 drivers/base/Makefile                 |  2 +-
 include/linux/dma-contiguous.h        |  2 +-
 mm/Kconfig                            | 24 ++++++++++++++++++++++++
 8 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index abbe319..098268f 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -71,7 +71,7 @@ CONFIG_MAC80211=m
 CONFIG_MAC80211_RC_PID=y
 CONFIG_MAC80211_RC_DEFAULT_PID=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_CMA=y
+CONFIG_DMA_CMA=y
 CONFIG_CONNECTOR=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index f7ba3161..34ae8f2 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -79,7 +79,7 @@ CONFIG_RFKILL_GPIO=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_CMA=y
+CONFIG_DMA_CMA=y
 CONFIG_MTD=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_M25P80=y
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
index 3ed37b4..e072bb2 100644
--- a/arch/arm/include/asm/dma-contiguous.h
+++ b/arch/arm/include/asm/dma-contiguous.h
@@ -2,7 +2,7 @@
 #define ASMARM_DMA_CONTIGUOUS_H
 
 #ifdef __KERNEL__
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 #include <linux/types.h>
 #include <asm-generic/dma-contiguous.h>
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ef3e0f3..1fb40dc 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
 	if (!pages)
 		goto no_pages;
 
-	if (IS_ENABLED(CONFIG_CMA))
+	if (IS_ENABLED(CONFIG_DMA_CMA))
 		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
 					      atomic_pool_init);
 	else
@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
 	else if (!(gfp & __GFP_WAIT))
 		addr = __alloc_from_pool(size, &page);
-	else if (!IS_ENABLED(CONFIG_CMA))
+	else if (!IS_ENABLED(CONFIG_DMA_CMA))
 		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
 	else
 		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		__dma_free_buffer(page, size);
 	} else if (__free_from_pool(cpu_addr, size)) {
 		return;
-	} else if (!IS_ENABLED(CONFIG_CMA)) {
+	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
 		__dma_free_remap(cpu_addr, size);
 		__dma_free_buffer(page, size);
 	} else {
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 07abd9d..10cd80a 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -202,11 +202,9 @@ config DMA_SHARED_BUFFER
 	  APIs extension; the file's descriptor can then be passed on to other
 	  driver.
 
-config CMA
-	bool "Contiguous Memory Allocator"
-	depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK
-	select MIGRATION
-	select MEMORY_ISOLATION
+config DMA_CMA
+	bool "DMA Contiguous Memory Allocator"
+	depends on HAVE_DMA_CONTIGUOUS && CMA
 	help
 	  This enables the Contiguous Memory Allocator which allows drivers
 	  to allocate big physically-contiguous blocks of memory for use with
@@ -215,17 +213,7 @@ config CMA
 	  For more information see <include/linux/dma-contiguous.h>.
 	  If unsure, say "n".
 
-if CMA
-
-config CMA_DEBUG
-	bool "CMA debug messages (DEVELOPMENT)"
-	depends on DEBUG_KERNEL
-	help
-	  Turns on debug messages in CMA.  This produces KERN_DEBUG
-	  messages for every CMA call as well as various messages while
-	  processing calls such as dma_alloc_from_contiguous().
-	  This option does not affect warning and error messages.
-
+if  DMA_CMA
 comment "Default contiguous memory area size:"
 
 config CMA_SIZE_MBYTES
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 4e22ce3..5d93bb5 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -6,7 +6,7 @@ obj-y			:= core.o bus.o dd.o syscore.o \
 			   attribute_container.o transport_class.o \
 			   topology.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
-obj-$(CONFIG_CMA) += dma-contiguous.o
+obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 01b5c84..00141d3 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -57,7 +57,7 @@ struct cma;
 struct page;
 struct device;
 
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 /*
  * There is always at least global CMA area and a few optional device
diff --git a/mm/Kconfig b/mm/Kconfig
index e742d06..26a5f81 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -477,3 +477,27 @@ config FRONTSWAP
 	  and swap data is stored as normal on the matching swap device.
 
 	  If unsure, say Y to enable frontswap.
+
+config CMA
+	bool "Contiguous Memory Allocator"
+	depends on HAVE_MEMBLOCK
+	select MIGRATION
+	select MEMORY_ISOLATION
+	help
+	  This enables the Contiguous Memory Allocator which allows other
+	  subsystems to allocate big physically-contiguous blocks of memory.
+	  CMA reserves a region of memory and allows only movable pages to
+	  be allocated from it. This way, the kernel can use the memory for
+	  pagecache and when a subsystem requests for contiguous area, the
+	  allocated pages are migrated away to serve the contiguous request.
+
+	  If unsure, say "n".
+
+config CMA_DEBUG
+	bool "CMA debug messages (DEVELOPMENT)"
+	depends on DEBUG_KERNEL && CMA
+	help
+	  Turns on debug messages in CMA.  This produces KERN_DEBUG
+	  messages for every CMA call as well as various messages while
+	  processing calls such as dma_alloc_from_contiguous().
+	  This option does not affect warning and error messages.
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
  2013-07-02  5:45 ` Aneesh Kumar K.V
  (?)
@ 2013-07-02  5:45   ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:45 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm-ppc, kvm, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Powerpc architecture uses a hash based page table mechanism for mapping virtual
addresses to physical address. The architecture require this hash page table to
be physically contiguous. With KVM on Powerpc currently we use early reservation
mechanism for allocating guest hash page table. This implies that we need to
reserve a big memory region to ensure we can create large number of guest
simultaneously with KVM on Power. Another disadvantage is that the reserved memory
is not available to rest of the subsystems and and that implies we limit the total
available memory in the host.

This patch series switch the guest hash page table allocation to use
contiguous memory allocator.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   1 -
 arch/powerpc/include/asm/kvm_host.h      |   2 +-
 arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
 arch/powerpc/kernel/setup_64.c           |   2 +
 arch/powerpc/kvm/Kconfig                 |   1 +
 arch/powerpc/kvm/Makefile                |   1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |  37 +++--
 arch/powerpc/kvm/book3s_hv_builtin.c     |  91 +++++++++----
 arch/powerpc/kvm/book3s_hv_cma.c         | 227 +++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_cma.h         |  22 +++
 10 files changed, 341 insertions(+), 51 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_cma.c
 create mode 100644 arch/powerpc/kvm/book3s_hv_cma.h

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9c1ff33..f8355a9 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
-extern int kvm_hpt_order;		/* order of preallocated HPTs */
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index af326cd..0097dab 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -259,7 +259,7 @@ struct kvm_arch {
 	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
-	struct kvmppc_linear_info *hpt_li;
+	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe..b5ef7a3 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -139,8 +139,8 @@ extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
 				struct kvm_allocate_rma *rma);
 extern struct kvmppc_linear_info *kvm_alloc_rma(void);
 extern void kvm_release_rma(struct kvmppc_linear_info *ri);
-extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
-extern void kvm_release_hpt(struct kvmppc_linear_info *li);
+extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
+extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
 extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
@@ -261,6 +261,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 struct openpic;
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
+extern void kvm_cma_reserve(void) __init;
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
 	paca[cpu].kvm_hstate.xics_phys = addr;
@@ -284,6 +285,9 @@ extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
 extern void kvm_linear_init(void);
 
 #else
+static inline void __init kvm_cma_reserve(void)
+{}
+
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e379d3f..ee28d1f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -229,6 +229,8 @@ void __init early_setup(unsigned long dt_ptr)
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
 
+	kvm_cma_reserve();
+
 	/*
 	 * Reserve any gigantic pages requested on the command line.
 	 * memblock needs to have been initialized by the time this is
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f8..ffaef2c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV
 	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	depends on KVM_BOOK3S_64
 	select MMU_NOTIFIER
+	select CMA
 	---help---
 	  Support running unmodified book3s_64 guest kernels in
 	  virtual machines on POWER7 and PPC970 processors that have
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 422de3f..6640393 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -80,6 +80,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 	book3s_64_vio_hv.o \
 	book3s_hv_ras.o \
 	book3s_hv_builtin.o \
+	book3s_hv_cma.o \
 	$(kvm-book3s_64-builtin-xics-objs-y)
 
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 5880dfb..354f4bb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -52,8 +52,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 {
 	unsigned long hpt;
 	struct revmap_entry *rev;
-	struct kvmppc_linear_info *li;
-	long order = kvm_hpt_order;
+	struct page *page = NULL;
+	long order = KVM_DEFAULT_HPT_ORDER;
 
 	if (htab_orderp) {
 		order = *htab_orderp;
@@ -61,26 +61,22 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 			order = PPC_MIN_HPT_ORDER;
 	}
 
+	kvm->arch.hpt_cma_alloc = 0;
 	/*
-	 * If the user wants a different size from default,
 	 * try first to allocate it from the kernel page allocator.
+	 * We keep the CMA reserved for failed allocation.
 	 */
-	hpt = 0;
-	if (order != kvm_hpt_order) {
-		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
-				       __GFP_NOWARN, order - PAGE_SHIFT);
-		if (!hpt)
-			--order;
-	}
+	hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
+			       __GFP_NOWARN, order - PAGE_SHIFT);
 
 	/* Next try to allocate from the preallocated pool */
 	if (!hpt) {
-		li = kvm_alloc_hpt();
-		if (li) {
-			hpt = (ulong)li->base_virt;
-			kvm->arch.hpt_li = li;
-			order = kvm_hpt_order;
-		}
+		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+		if (page) {
+			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+			kvm->arch.hpt_cma_alloc = 1;
+		} else
+			--order;
 	}
 
 	/* Lastly try successively smaller sizes from the page allocator */
@@ -118,8 +114,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 	return 0;
 
  out_freehpt:
-	if (kvm->arch.hpt_li)
-		kvm_release_hpt(kvm->arch.hpt_li);
+	if (kvm->arch.hpt_cma_alloc)
+		kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
 	else
 		free_pages(hpt, order - PAGE_SHIFT);
 	return -ENOMEM;
@@ -165,8 +161,9 @@ void kvmppc_free_hpt(struct kvm *kvm)
 {
 	kvmppc_free_lpid(kvm->arch.lpid);
 	vfree(kvm->arch.revmap);
-	if (kvm->arch.hpt_li)
-		kvm_release_hpt(kvm->arch.hpt_li);
+	if (kvm->arch.hpt_cma_alloc)
+		kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
+				1 << (kvm->arch.hpt_order - PAGE_SHIFT));
 	else
 		free_pages(kvm->arch.hpt_virt,
 			   kvm->arch.hpt_order - PAGE_SHIFT);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ec0a9e5..4b865c5 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -13,20 +13,30 @@
 #include <linux/spinlock.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 
+#include "book3s_hv_cma.h"
+
 #define KVM_LINEAR_RMA		0
 #define KVM_LINEAR_HPT		1
 
 static void __init kvm_linear_init_one(ulong size, int count, int type);
 static struct kvmppc_linear_info *kvm_alloc_linear(int type);
 static void kvm_release_linear(struct kvmppc_linear_info *ri);
-
-int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
-EXPORT_SYMBOL_GPL(kvm_hpt_order);
+/*
+ * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
+ * should be power of 2.
+ */
+#define HPT_ALIGN_PAGES		((1 << 18) >> PAGE_SHIFT) /* 256k */
+/*
+ * By default we reserve 5% of memory for hash pagetable allocation.
+ */
+static unsigned long kvm_cma_resv_ratio = 5;
 
 /*************** RMA *************/
 
@@ -101,36 +111,29 @@ void kvm_release_rma(struct kvmppc_linear_info *ri)
 }
 EXPORT_SYMBOL_GPL(kvm_release_rma);
 
-/*************** HPT *************/
-
-/*
- * This maintains a list of big linear HPT tables that contain the GVA->HPA
- * memory mappings. If we don't reserve those early on, we might not be able
- * to get a big (usually 16MB) linear memory region from the kernel anymore.
- */
-
-static unsigned long kvm_hpt_count;
-
-static int __init early_parse_hpt_count(char *p)
+static int __init early_parse_kvm_cma_resv(char *p)
 {
+	pr_debug("%s(%s)\n", __func__, p);
 	if (!p)
-		return 1;
-
-	kvm_hpt_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
+		return -EINVAL;
+	return kstrtoul(p, 0, &kvm_cma_resv_ratio);
 }
-early_param("kvm_hpt_count", early_parse_hpt_count);
+early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
-struct kvmppc_linear_info *kvm_alloc_hpt(void)
+struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
-	return kvm_alloc_linear(KVM_LINEAR_HPT);
+	unsigned long align_pages = HPT_ALIGN_PAGES;
+
+	/* Old CPUs require HPT aligned on a multiple of its size */
+	if (!cpu_has_feature(CPU_FTR_ARCH_206))
+		align_pages = nr_pages;
+	return kvm_alloc_cma(nr_pages, align_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
-void kvm_release_hpt(struct kvmppc_linear_info *li)
+void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 {
-	kvm_release_linear(li);
+	kvm_release_cma(page, nr_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
@@ -211,9 +214,6 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri)
  */
 void __init kvm_linear_init(void)
 {
-	/* HPT */
-	kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
-
 	/* RMA */
 	/* Only do this on PPC970 in HV mode */
 	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
@@ -231,3 +231,40 @@ void __init kvm_linear_init(void)
 
 	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
 }
+
+/**
+ * kvm_cma_reserve() - reserve area for kvm hash pagetable
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
+ */
+void __init kvm_cma_reserve(void)
+{
+	unsigned long align_size;
+	struct memblock_region *reg;
+	phys_addr_t selected_size = 0;
+	/*
+	 * We cannot use memblock_phys_mem_size() here, because
+	 * memblock_analyze() has not been called yet.
+	 */
+	for_each_memblock(memory, reg)
+		selected_size += memblock_region_memory_end_pfn(reg) -
+				 memblock_region_memory_base_pfn(reg);
+
+	selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
+	if (selected_size) {
+		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+			 (unsigned long)selected_size / SZ_1M);
+		/*
+		 * Old CPUs require HPT aligned on a multiple of its size. So for them
+		 * make the alignment as max size we could request.
+		 */
+		if (!cpu_has_feature(CPU_FTR_ARCH_206))
+			align_size = __rounddown_pow_of_two(selected_size);
+		else
+			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+		kvm_cma_declare_contiguous(selected_size, align_size);
+	}
+}
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
new file mode 100644
index 0000000..e04b269
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -0,0 +1,227 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+#define pr_fmt(fmt) "kvm_cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+struct kvm_cma {
+	unsigned long	base_pfn;
+	unsigned long	count;
+	unsigned long	*bitmap;
+};
+
+static DEFINE_MUTEX(kvm_cma_mutex);
+static struct kvm_cma kvm_cma_area;
+
+/**
+ * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
+ *			          for kvm hash pagetable
+ * @size:  Size of the reserved memory.
+ * @alignment:  Alignment for the contiguous memory area
+ *
+ * This function reserves memory for kvm cma area. It should be
+ * called by arch code when early allocator (memblock or bootmem)
+ * is still activate.
+ */
+long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
+{
+	long base_pfn;
+	phys_addr_t addr;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
+
+	if (!size)
+		return -EINVAL;
+	/*
+	 * Sanitise input arguments.
+	 * We should be pageblock aligned for CMA.
+	 */
+	alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
+	size = ALIGN(size, alignment);
+	/*
+	 * Reserve memory
+	 * Use __memblock_alloc_base() since
+	 * memblock_alloc_base() panic()s.
+	 */
+	addr = __memblock_alloc_base(size, alignment, 0);
+	if (!addr) {
+		base_pfn = -ENOMEM;
+		goto err;
+	} else
+		base_pfn = PFN_DOWN(addr);
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma->base_pfn = base_pfn;
+	cma->count    = size >> PAGE_SHIFT;
+	pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
+	return 0;
+err:
+	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+	return base_pfn;
+}
+
+/**
+ * kvm_alloc_cma() - allocate pages from contiguous area
+ * @nr_pages: Requested number of pages.
+ * @align_pages: Requested alignment in number of pages
+ *
+ * This function allocates memory buffer for hash pagetable.
+ */
+struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
+{
+	int ret;
+	struct page *page = NULL;
+	struct kvm_cma *cma = &kvm_cma_area;
+	unsigned long mask, pfn, pageno, start = 0;
+
+
+	if (!cma || !cma->count)
+		return NULL;
+
+	pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
+		 (void *)cma, nr_pages, align_pages);
+
+	if (!nr_pages)
+		return NULL;
+
+	VM_BUG_ON(!is_power_of_2(align_pages));
+	mask = align_pages - 1;
+
+	mutex_lock(&kvm_cma_mutex);
+	for (;;) {
+		pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
+						    start, nr_pages, mask);
+		if (pageno >= cma->count)
+			break;
+
+		pfn = cma->base_pfn + pageno;
+		ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
+		if (ret == 0) {
+			bitmap_set(cma->bitmap, pageno, nr_pages);
+			page = pfn_to_page(pfn);
+			memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
+			break;
+		} else if (ret != -EBUSY) {
+			break;
+		}
+		pr_debug("%s(): memory range at %p is busy, retrying\n",
+			 __func__, pfn_to_page(pfn));
+		/* try again with a bit different memory target */
+		start = pageno + mask + 1;
+	}
+	mutex_unlock(&kvm_cma_mutex);
+	pr_debug("%s(): returned %p\n", __func__, page);
+	return page;
+}
+
+/**
+ * kvm_release_cma() - release allocated pages for hash pagetable
+ * @pages: Allocated pages.
+ * @nr_pages: Number of allocated pages.
+ *
+ * This function releases memory allocated by kvm_alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
+{
+	unsigned long pfn;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+
+	if (!cma || !pages)
+		return false;
+
+	pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+		return false;
+
+	VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+
+	mutex_lock(&kvm_cma_mutex);
+	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, nr_pages);
+	free_contig_range(pfn, nr_pages);
+	mutex_unlock(&kvm_cma_mutex);
+
+	return true;
+}
+
+static int __init kvm_cma_activate_area(unsigned long base_pfn,
+					unsigned long count)
+{
+	unsigned long pfn = base_pfn;
+	unsigned i = count >> pageblock_order;
+	struct zone *zone;
+
+	WARN_ON_ONCE(!pfn_valid(pfn));
+	zone = page_zone(pfn_to_page(pfn));
+	do {
+		unsigned j;
+		base_pfn = pfn;
+		for (j = pageblock_nr_pages; j; --j, pfn++) {
+			WARN_ON_ONCE(!pfn_valid(pfn));
+			/*
+			 * alloc_contig_range requires the pfn range
+			 * specified to be in the same zone. Make this
+			 * simple by forcing the entire CMA resv range
+			 * to be in the same zone.
+			 */
+			if (page_zone(pfn_to_page(pfn)) != zone)
+				return -EINVAL;
+		}
+		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+	} while (--i);
+	return 0;
+}
+
+static int __init kvm_cma_init_reserved_areas(void)
+{
+	int bitmap_size, ret;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	pr_debug("%s()\n", __func__);
+	if (!cma->count)
+		return 0;
+
+	bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!cma->bitmap)
+		return -ENOMEM;
+
+	ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
+	if (ret)
+		goto error;
+	return 0;
+
+error:
+	kfree(cma->bitmap);
+	return ret;
+}
+core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
new file mode 100644
index 0000000..788bc3b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -0,0 +1,22 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_CMA_ALLOC_H__
+#define __POWERPC_KVM_CMA_ALLOC_H__
+extern struct page *kvm_alloc_cma(unsigned long nr_pages,
+				  unsigned long align_pages);
+extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
+extern long kvm_cma_declare_contiguous(phys_addr_t size,
+				       phys_addr_t alignment) __init;
+#endif
-- 
1.8.1.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02  5:45   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:45 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Powerpc architecture uses a hash based page table mechanism for mapping virtual
addresses to physical address. The architecture require this hash page table to
be physically contiguous. With KVM on Powerpc currently we use early reservation
mechanism for allocating guest hash page table. This implies that we need to
reserve a big memory region to ensure we can create large number of guest
simultaneously with KVM on Power. Another disadvantage is that the reserved memory
is not available to rest of the subsystems and and that implies we limit the total
available memory in the host.

This patch series switch the guest hash page table allocation to use
contiguous memory allocator.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   1 -
 arch/powerpc/include/asm/kvm_host.h      |   2 +-
 arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
 arch/powerpc/kernel/setup_64.c           |   2 +
 arch/powerpc/kvm/Kconfig                 |   1 +
 arch/powerpc/kvm/Makefile                |   1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |  37 +++--
 arch/powerpc/kvm/book3s_hv_builtin.c     |  91 +++++++++----
 arch/powerpc/kvm/book3s_hv_cma.c         | 227 +++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_cma.h         |  22 +++
 10 files changed, 341 insertions(+), 51 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_cma.c
 create mode 100644 arch/powerpc/kvm/book3s_hv_cma.h

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9c1ff33..f8355a9 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
-extern int kvm_hpt_order;		/* order of preallocated HPTs */
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index af326cd..0097dab 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -259,7 +259,7 @@ struct kvm_arch {
 	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
-	struct kvmppc_linear_info *hpt_li;
+	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe..b5ef7a3 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -139,8 +139,8 @@ extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
 				struct kvm_allocate_rma *rma);
 extern struct kvmppc_linear_info *kvm_alloc_rma(void);
 extern void kvm_release_rma(struct kvmppc_linear_info *ri);
-extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
-extern void kvm_release_hpt(struct kvmppc_linear_info *li);
+extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
+extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
 extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
@@ -261,6 +261,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 struct openpic;
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
+extern void kvm_cma_reserve(void) __init;
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
 	paca[cpu].kvm_hstate.xics_phys = addr;
@@ -284,6 +285,9 @@ extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
 extern void kvm_linear_init(void);
 
 #else
+static inline void __init kvm_cma_reserve(void)
+{}
+
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e379d3f..ee28d1f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -229,6 +229,8 @@ void __init early_setup(unsigned long dt_ptr)
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
 
+	kvm_cma_reserve();
+
 	/*
 	 * Reserve any gigantic pages requested on the command line.
 	 * memblock needs to have been initialized by the time this is
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f8..ffaef2c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV
 	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	depends on KVM_BOOK3S_64
 	select MMU_NOTIFIER
+	select CMA
 	---help---
 	  Support running unmodified book3s_64 guest kernels in
 	  virtual machines on POWER7 and PPC970 processors that have
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 422de3f..6640393 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -80,6 +80,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 	book3s_64_vio_hv.o \
 	book3s_hv_ras.o \
 	book3s_hv_builtin.o \
+	book3s_hv_cma.o \
 	$(kvm-book3s_64-builtin-xics-objs-y)
 
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 5880dfb..354f4bb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -52,8 +52,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 {
 	unsigned long hpt;
 	struct revmap_entry *rev;
-	struct kvmppc_linear_info *li;
-	long order = kvm_hpt_order;
+	struct page *page = NULL;
+	long order = KVM_DEFAULT_HPT_ORDER;
 
 	if (htab_orderp) {
 		order = *htab_orderp;
@@ -61,26 +61,22 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 			order = PPC_MIN_HPT_ORDER;
 	}
 
+	kvm->arch.hpt_cma_alloc = 0;
 	/*
-	 * If the user wants a different size from default,
 	 * try first to allocate it from the kernel page allocator.
+	 * We keep the CMA reserved for failed allocation.
 	 */
-	hpt = 0;
-	if (order != kvm_hpt_order) {
-		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
-				       __GFP_NOWARN, order - PAGE_SHIFT);
-		if (!hpt)
-			--order;
-	}
+	hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
+			       __GFP_NOWARN, order - PAGE_SHIFT);
 
 	/* Next try to allocate from the preallocated pool */
 	if (!hpt) {
-		li = kvm_alloc_hpt();
-		if (li) {
-			hpt = (ulong)li->base_virt;
-			kvm->arch.hpt_li = li;
-			order = kvm_hpt_order;
-		}
+		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+		if (page) {
+			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+			kvm->arch.hpt_cma_alloc = 1;
+		} else
+			--order;
 	}
 
 	/* Lastly try successively smaller sizes from the page allocator */
@@ -118,8 +114,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 	return 0;
 
  out_freehpt:
-	if (kvm->arch.hpt_li)
-		kvm_release_hpt(kvm->arch.hpt_li);
+	if (kvm->arch.hpt_cma_alloc)
+		kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
 	else
 		free_pages(hpt, order - PAGE_SHIFT);
 	return -ENOMEM;
@@ -165,8 +161,9 @@ void kvmppc_free_hpt(struct kvm *kvm)
 {
 	kvmppc_free_lpid(kvm->arch.lpid);
 	vfree(kvm->arch.revmap);
-	if (kvm->arch.hpt_li)
-		kvm_release_hpt(kvm->arch.hpt_li);
+	if (kvm->arch.hpt_cma_alloc)
+		kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
+				1 << (kvm->arch.hpt_order - PAGE_SHIFT));
 	else
 		free_pages(kvm->arch.hpt_virt,
 			   kvm->arch.hpt_order - PAGE_SHIFT);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ec0a9e5..4b865c5 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -13,20 +13,30 @@
 #include <linux/spinlock.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 
+#include "book3s_hv_cma.h"
+
 #define KVM_LINEAR_RMA		0
 #define KVM_LINEAR_HPT		1
 
 static void __init kvm_linear_init_one(ulong size, int count, int type);
 static struct kvmppc_linear_info *kvm_alloc_linear(int type);
 static void kvm_release_linear(struct kvmppc_linear_info *ri);
-
-int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
-EXPORT_SYMBOL_GPL(kvm_hpt_order);
+/*
+ * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
+ * should be power of 2.
+ */
+#define HPT_ALIGN_PAGES		((1 << 18) >> PAGE_SHIFT) /* 256k */
+/*
+ * By default we reserve 5% of memory for hash pagetable allocation.
+ */
+static unsigned long kvm_cma_resv_ratio = 5;
 
 /*************** RMA *************/
 
@@ -101,36 +111,29 @@ void kvm_release_rma(struct kvmppc_linear_info *ri)
 }
 EXPORT_SYMBOL_GPL(kvm_release_rma);
 
-/*************** HPT *************/
-
-/*
- * This maintains a list of big linear HPT tables that contain the GVA->HPA
- * memory mappings. If we don't reserve those early on, we might not be able
- * to get a big (usually 16MB) linear memory region from the kernel anymore.
- */
-
-static unsigned long kvm_hpt_count;
-
-static int __init early_parse_hpt_count(char *p)
+static int __init early_parse_kvm_cma_resv(char *p)
 {
+	pr_debug("%s(%s)\n", __func__, p);
 	if (!p)
-		return 1;
-
-	kvm_hpt_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
+		return -EINVAL;
+	return kstrtoul(p, 0, &kvm_cma_resv_ratio);
 }
-early_param("kvm_hpt_count", early_parse_hpt_count);
+early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
-struct kvmppc_linear_info *kvm_alloc_hpt(void)
+struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
-	return kvm_alloc_linear(KVM_LINEAR_HPT);
+	unsigned long align_pages = HPT_ALIGN_PAGES;
+
+	/* Old CPUs require HPT aligned on a multiple of its size */
+	if (!cpu_has_feature(CPU_FTR_ARCH_206))
+		align_pages = nr_pages;
+	return kvm_alloc_cma(nr_pages, align_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
-void kvm_release_hpt(struct kvmppc_linear_info *li)
+void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 {
-	kvm_release_linear(li);
+	kvm_release_cma(page, nr_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
@@ -211,9 +214,6 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri)
  */
 void __init kvm_linear_init(void)
 {
-	/* HPT */
-	kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
-
 	/* RMA */
 	/* Only do this on PPC970 in HV mode */
 	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
@@ -231,3 +231,40 @@ void __init kvm_linear_init(void)
 
 	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
 }
+
+/**
+ * kvm_cma_reserve() - reserve area for kvm hash pagetable
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
+ */
+void __init kvm_cma_reserve(void)
+{
+	unsigned long align_size;
+	struct memblock_region *reg;
+	phys_addr_t selected_size = 0;
+	/*
+	 * We cannot use memblock_phys_mem_size() here, because
+	 * memblock_analyze() has not been called yet.
+	 */
+	for_each_memblock(memory, reg)
+		selected_size += memblock_region_memory_end_pfn(reg) -
+				 memblock_region_memory_base_pfn(reg);
+
+	selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
+	if (selected_size) {
+		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+			 (unsigned long)selected_size / SZ_1M);
+		/*
+		 * Old CPUs require HPT aligned on a multiple of its size. So for them
+		 * make the alignment as max size we could request.
+		 */
+		if (!cpu_has_feature(CPU_FTR_ARCH_206))
+			align_size = __rounddown_pow_of_two(selected_size);
+		else
+			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+		kvm_cma_declare_contiguous(selected_size, align_size);
+	}
+}
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
new file mode 100644
index 0000000..e04b269
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -0,0 +1,227 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+#define pr_fmt(fmt) "kvm_cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+struct kvm_cma {
+	unsigned long	base_pfn;
+	unsigned long	count;
+	unsigned long	*bitmap;
+};
+
+static DEFINE_MUTEX(kvm_cma_mutex);
+static struct kvm_cma kvm_cma_area;
+
+/**
+ * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
+ *			          for kvm hash pagetable
+ * @size:  Size of the reserved memory.
+ * @alignment:  Alignment for the contiguous memory area
+ *
+ * This function reserves memory for kvm cma area. It should be
+ * called by arch code when early allocator (memblock or bootmem)
+ * is still activate.
+ */
+long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
+{
+	long base_pfn;
+	phys_addr_t addr;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
+
+	if (!size)
+		return -EINVAL;
+	/*
+	 * Sanitise input arguments.
+	 * We should be pageblock aligned for CMA.
+	 */
+	alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
+	size = ALIGN(size, alignment);
+	/*
+	 * Reserve memory
+	 * Use __memblock_alloc_base() since
+	 * memblock_alloc_base() panic()s.
+	 */
+	addr = __memblock_alloc_base(size, alignment, 0);
+	if (!addr) {
+		base_pfn = -ENOMEM;
+		goto err;
+	} else
+		base_pfn = PFN_DOWN(addr);
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma->base_pfn = base_pfn;
+	cma->count    = size >> PAGE_SHIFT;
+	pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
+	return 0;
+err:
+	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+	return base_pfn;
+}
+
+/**
+ * kvm_alloc_cma() - allocate pages from contiguous area
+ * @nr_pages: Requested number of pages.
+ * @align_pages: Requested alignment in number of pages
+ *
+ * This function allocates memory buffer for hash pagetable.
+ */
+struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
+{
+	int ret;
+	struct page *page = NULL;
+	struct kvm_cma *cma = &kvm_cma_area;
+	unsigned long mask, pfn, pageno, start = 0;
+
+
+	if (!cma || !cma->count)
+		return NULL;
+
+	pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
+		 (void *)cma, nr_pages, align_pages);
+
+	if (!nr_pages)
+		return NULL;
+
+	VM_BUG_ON(!is_power_of_2(align_pages));
+	mask = align_pages - 1;
+
+	mutex_lock(&kvm_cma_mutex);
+	for (;;) {
+		pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
+						    start, nr_pages, mask);
+		if (pageno >= cma->count)
+			break;
+
+		pfn = cma->base_pfn + pageno;
+		ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
+		if (ret == 0) {
+			bitmap_set(cma->bitmap, pageno, nr_pages);
+			page = pfn_to_page(pfn);
+			memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
+			break;
+		} else if (ret != -EBUSY) {
+			break;
+		}
+		pr_debug("%s(): memory range at %p is busy, retrying\n",
+			 __func__, pfn_to_page(pfn));
+		/* try again with a bit different memory target */
+		start = pageno + mask + 1;
+	}
+	mutex_unlock(&kvm_cma_mutex);
+	pr_debug("%s(): returned %p\n", __func__, page);
+	return page;
+}
+
+/**
+ * kvm_release_cma() - release allocated pages for hash pagetable
+ * @pages: Allocated pages.
+ * @nr_pages: Number of allocated pages.
+ *
+ * This function releases memory allocated by kvm_alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
+{
+	unsigned long pfn;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+
+	if (!cma || !pages)
+		return false;
+
+	pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+		return false;
+
+	VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+
+	mutex_lock(&kvm_cma_mutex);
+	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, nr_pages);
+	free_contig_range(pfn, nr_pages);
+	mutex_unlock(&kvm_cma_mutex);
+
+	return true;
+}
+
+static int __init kvm_cma_activate_area(unsigned long base_pfn,
+					unsigned long count)
+{
+	unsigned long pfn = base_pfn;
+	unsigned i = count >> pageblock_order;
+	struct zone *zone;
+
+	WARN_ON_ONCE(!pfn_valid(pfn));
+	zone = page_zone(pfn_to_page(pfn));
+	do {
+		unsigned j;
+		base_pfn = pfn;
+		for (j = pageblock_nr_pages; j; --j, pfn++) {
+			WARN_ON_ONCE(!pfn_valid(pfn));
+			/*
+			 * alloc_contig_range requires the pfn range
+			 * specified to be in the same zone. Make this
+			 * simple by forcing the entire CMA resv range
+			 * to be in the same zone.
+			 */
+			if (page_zone(pfn_to_page(pfn)) != zone)
+				return -EINVAL;
+		}
+		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+	} while (--i);
+	return 0;
+}
+
+static int __init kvm_cma_init_reserved_areas(void)
+{
+	int bitmap_size, ret;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	pr_debug("%s()\n", __func__);
+	if (!cma->count)
+		return 0;
+
+	bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!cma->bitmap)
+		return -ENOMEM;
+
+	ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
+	if (ret)
+		goto error;
+	return 0;
+
+error:
+	kfree(cma->bitmap);
+	return ret;
+}
+core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
new file mode 100644
index 0000000..788bc3b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -0,0 +1,22 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_CMA_ALLOC_H__
+#define __POWERPC_KVM_CMA_ALLOC_H__
+extern struct page *kvm_alloc_cma(unsigned long nr_pages,
+				  unsigned long align_pages);
+extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
+extern long kvm_cma_declare_contiguous(phys_addr_t size,
+				       phys_addr_t alignment) __init;
+#endif
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
  2013-07-02  5:45 ` Aneesh Kumar K.V
  (?)
@ 2013-07-02  5:45   ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:45 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm-ppc, kvm, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Older version of power architecture use Real Mode Offset register and Real Mode Limit
Selector for mapping guest Real Mode Area. The guest RMA should be physically
contigous since we use the range when address translation is not enabled.

This patch switch RMA allocation code to use contigous memory allocator. The patch
also remove the the linear allocator which not used any more

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   1 +
 arch/powerpc/include/asm/kvm_host.h      |  12 +--
 arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
 arch/powerpc/kernel/setup_64.c           |   2 -
 arch/powerpc/kvm/book3s_hv.c             |  27 +++--
 arch/powerpc/kvm/book3s_hv_builtin.c     | 167 ++++++++-----------------------
 6 files changed, 65 insertions(+), 152 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index f8355a9..76ff0b5 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,6 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
+extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0097dab..3328353 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
 	struct page *pages[0];
 };
 
-struct kvmppc_linear_info {
-	void		*base_virt;
-	unsigned long	 base_pfn;
-	unsigned long	 npages;
-	struct list_head list;
-	atomic_t	 use_count;
-	int		 type;
+struct kvm_rma_info {
+	atomic_t use_count;
+	unsigned long base_pfn;
 };
 
 /* XICS components, defined in book3s_xics.c */
@@ -246,7 +242,7 @@ struct kvm_arch {
 	int tlbie_lock;
 	unsigned long lpcr;
 	unsigned long rmor;
-	struct kvmppc_linear_info *rma;
+	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
 	int using_mmu_notifiers;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index b5ef7a3..5a26bfc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -137,8 +137,8 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba, unsigned long tce);
 extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
 				struct kvm_allocate_rma *rma);
-extern struct kvmppc_linear_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvmppc_linear_info *ri);
+extern struct kvm_rma_info *kvm_alloc_rma(void);
+extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
@@ -282,7 +282,6 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 }
 
 extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
-extern void kvm_linear_init(void);
 
 #else
 static inline void __init kvm_cma_reserve(void)
@@ -291,9 +290,6 @@ static inline void __init kvm_cma_reserve(void)
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
-static inline void kvm_linear_init(void)
-{}
-
 static inline u32 kvmppc_get_xics_latch(void)
 {
 	return 0;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ee28d1f..8a022f5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -611,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
 	/* Initialize the MMU context management stuff */
 	mmu_context_init();
 
-	kvm_linear_init();
-
 	/* Interrupt code needs to be 64K-aligned */
 	if ((unsigned long)_stext & 0xffff)
 		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 550f592..55c8519 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
 
 static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
 	struct page *page;
+	struct kvm_rma_info *ri = vma->vm_file->private_data;
 
-	if (vmf->pgoff >= ri->npages)
+	if (vmf->pgoff >= kvm_rma_pages)
 		return VM_FAULT_SIGBUS;
 
 	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
@@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int kvm_rma_release(struct inode *inode, struct file *filp)
 {
-	struct kvmppc_linear_info *ri = filp->private_data;
+	struct kvm_rma_info *ri = filp->private_data;
 
 	kvm_release_rma(ri);
 	return 0;
@@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
 
 long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 {
-	struct kvmppc_linear_info *ri;
 	long fd;
+	struct kvm_rma_info *ri;
+	/*
+	 * Only do this on PPC970 in HV mode
+	 */
+	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+	    !cpu_has_feature(CPU_FTR_ARCH_201))
+		return -EINVAL;
+
+	if (!kvm_rma_pages)
+		return -EINVAL;
 
 	ri = kvm_alloc_rma();
 	if (!ri)
@@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 	if (fd < 0)
 		kvm_release_rma(ri);
 
-	ret->rma_size = ri->npages << PAGE_SHIFT;
+	ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
 	return fd;
 }
 
@@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvmppc_linear_info *ri = NULL;
+	struct kvm_rma_info *ri = NULL;
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
@@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 
 	} else {
 		/* Set up to use an RMO region */
-		rma_size = ri->npages;
+		rma_size = kvm_rma_pages;
 		if (rma_size > memslot->npages)
 			rma_size = memslot->npages;
 		rma_size <<= PAGE_SHIFT;
@@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 			/* POWER7 */
 			lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
 			lpcr |= rmls << LPCR_RMLS_SH;
-			kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
+			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
 		}
 		kvm->arch.lpcr = lpcr;
 		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
 			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
 
 		/* Initialize phys addrs of pages in RMO */
-		npages = ri->npages;
+		npages = kvm_rma_pages;
 		porder = __ilog2(npages);
 		physp = memslot->arch.slot_phys;
 		if (physp) {
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 4b865c5..8cd0dae 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -21,13 +21,6 @@
 #include <asm/kvm_book3s.h>
 
 #include "book3s_hv_cma.h"
-
-#define KVM_LINEAR_RMA		0
-#define KVM_LINEAR_HPT		1
-
-static void __init kvm_linear_init_one(ulong size, int count, int type);
-static struct kvmppc_linear_info *kvm_alloc_linear(int type);
-static void kvm_release_linear(struct kvmppc_linear_info *ri);
 /*
  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
  * should be power of 2.
@@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
  * By default we reserve 5% of memory for hash pagetable allocation.
  */
 static unsigned long kvm_cma_resv_ratio = 5;
-
-/*************** RMA *************/
-
 /*
- * This maintains a list of RMAs (real mode areas) for KVM guests to use.
+ * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
  * Each RMA has to be physically contiguous and of a size that the
  * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
  * and other larger sizes.  Since we are unlikely to be allocate that
  * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot for KVM to use.
+ * we preallocate a set of RMAs in early boot using CMA.
+ * should be power of 2.
  */
-static unsigned long kvm_rma_size = 64 << 20;	/* 64MB */
-static unsigned long kvm_rma_count;
+unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
+EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 /* Work out RMLS (real mode limit selector) field value for a given RMA size.
    Assumes POWER7 or PPC970. */
@@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
 
 static int __init early_parse_rma_size(char *p)
 {
-	if (!p)
-		return 1;
+	unsigned long kvm_rma_size;
 
+	pr_debug("%s(%s)\n", __func__, p);
+	if (!p)
+		return -EINVAL;
 	kvm_rma_size = memparse(p, &p);
-
+	/*
+	 * Check that the requested size is one supported in hardware
+	 */
+	if (lpcr_rmls(kvm_rma_size) < 0) {
+		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+		return -EINVAL;
+	}
+	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
 	return 0;
 }
 early_param("kvm_rma_size", early_parse_rma_size);
 
-static int __init early_parse_rma_count(char *p)
+struct kvm_rma_info *kvm_alloc_rma()
 {
-	if (!p)
-		return 1;
-
-	kvm_rma_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
-}
-early_param("kvm_rma_count", early_parse_rma_count);
-
-struct kvmppc_linear_info *kvm_alloc_rma(void)
-{
-	return kvm_alloc_linear(KVM_LINEAR_RMA);
+	struct page *page;
+	struct kvm_rma_info *ri;
+
+	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
+	if (!ri)
+		return NULL;
+	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+	if (!page)
+		goto err_out;
+	atomic_set(&ri->use_count, 1);
+	ri->base_pfn = page_to_pfn(page);
+	return ri;
+err_out:
+	kfree(ri);
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_rma);
 
-void kvm_release_rma(struct kvmppc_linear_info *ri)
+void kvm_release_rma(struct kvm_rma_info *ri)
 {
-	kvm_release_linear(ri);
+	if (atomic_dec_and_test(&ri->use_count)) {
+		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+		kfree(ri);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_release_rma);
 
@@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
-/*************** generic *************/
-
-static LIST_HEAD(free_linears);
-static DEFINE_SPINLOCK(linear_lock);
-
-static void __init kvm_linear_init_one(ulong size, int count, int type)
-{
-	unsigned long i;
-	unsigned long j, npages;
-	void *linear;
-	struct page *pg;
-	const char *typestr;
-	struct kvmppc_linear_info *linear_info;
-
-	if (!count)
-		return;
-
-	typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
-
-	npages = size >> PAGE_SHIFT;
-	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
-	for (i = 0; i < count; ++i) {
-		linear = alloc_bootmem_align(size, size);
-		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
-			 size >> 20);
-		linear_info[i].base_virt = linear;
-		linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
-		linear_info[i].npages = npages;
-		linear_info[i].type = type;
-		list_add_tail(&linear_info[i].list, &free_linears);
-		atomic_set(&linear_info[i].use_count, 0);
-
-		pg = pfn_to_page(linear_info[i].base_pfn);
-		for (j = 0; j < npages; ++j) {
-			atomic_inc(&pg->_count);
-			++pg;
-		}
-	}
-}
-
-static struct kvmppc_linear_info *kvm_alloc_linear(int type)
-{
-	struct kvmppc_linear_info *ri, *ret;
-
-	ret = NULL;
-	spin_lock(&linear_lock);
-	list_for_each_entry(ri, &free_linears, list) {
-		if (ri->type != type)
-			continue;
-
-		list_del(&ri->list);
-		atomic_inc(&ri->use_count);
-		memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
-		ret = ri;
-		break;
-	}
-	spin_unlock(&linear_lock);
-	return ret;
-}
-
-static void kvm_release_linear(struct kvmppc_linear_info *ri)
-{
-	if (atomic_dec_and_test(&ri->use_count)) {
-		spin_lock(&linear_lock);
-		list_add_tail(&ri->list, &free_linears);
-		spin_unlock(&linear_lock);
-
-	}
-}
-
-/*
- * Called at boot time while the bootmem allocator is active,
- * to allocate contiguous physical memory for the hash page
- * tables for guests.
- */
-void __init kvm_linear_init(void)
-{
-	/* RMA */
-	/* Only do this on PPC970 in HV mode */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
-		return;
-
-	if (!kvm_rma_size || !kvm_rma_count)
-		return;
-
-	/* Check that the requested size is one supported in hardware */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-		return;
-	}
-
-	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
-}
-
 /**
  * kvm_cma_reserve() - reserve area for kvm hash pagetable
  *
@@ -265,6 +176,8 @@ void __init kvm_cma_reserve(void)
 			align_size = __rounddown_pow_of_two(selected_size);
 		else
 			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+
+		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
 		kvm_cma_declare_contiguous(selected_size, align_size);
 	}
 }
-- 
1.8.1.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02  5:45   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:45 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Older version of power architecture use Real Mode Offset register and Real Mode Limit
Selector for mapping guest Real Mode Area. The guest RMA should be physically
contigous since we use the range when address translation is not enabled.

This patch switch RMA allocation code to use contigous memory allocator. The patch
also remove the the linear allocator which not used any more

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   1 +
 arch/powerpc/include/asm/kvm_host.h      |  12 +--
 arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
 arch/powerpc/kernel/setup_64.c           |   2 -
 arch/powerpc/kvm/book3s_hv.c             |  27 +++--
 arch/powerpc/kvm/book3s_hv_builtin.c     | 167 ++++++++-----------------------
 6 files changed, 65 insertions(+), 152 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index f8355a9..76ff0b5 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,6 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
+extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0097dab..3328353 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
 	struct page *pages[0];
 };
 
-struct kvmppc_linear_info {
-	void		*base_virt;
-	unsigned long	 base_pfn;
-	unsigned long	 npages;
-	struct list_head list;
-	atomic_t	 use_count;
-	int		 type;
+struct kvm_rma_info {
+	atomic_t use_count;
+	unsigned long base_pfn;
 };
 
 /* XICS components, defined in book3s_xics.c */
@@ -246,7 +242,7 @@ struct kvm_arch {
 	int tlbie_lock;
 	unsigned long lpcr;
 	unsigned long rmor;
-	struct kvmppc_linear_info *rma;
+	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
 	int using_mmu_notifiers;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index b5ef7a3..5a26bfc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -137,8 +137,8 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba, unsigned long tce);
 extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
 				struct kvm_allocate_rma *rma);
-extern struct kvmppc_linear_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvmppc_linear_info *ri);
+extern struct kvm_rma_info *kvm_alloc_rma(void);
+extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
@@ -282,7 +282,6 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 }
 
 extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
-extern void kvm_linear_init(void);
 
 #else
 static inline void __init kvm_cma_reserve(void)
@@ -291,9 +290,6 @@ static inline void __init kvm_cma_reserve(void)
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
-static inline void kvm_linear_init(void)
-{}
-
 static inline u32 kvmppc_get_xics_latch(void)
 {
 	return 0;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ee28d1f..8a022f5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -611,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
 	/* Initialize the MMU context management stuff */
 	mmu_context_init();
 
-	kvm_linear_init();
-
 	/* Interrupt code needs to be 64K-aligned */
 	if ((unsigned long)_stext & 0xffff)
 		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 550f592..55c8519 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
 
 static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
 	struct page *page;
+	struct kvm_rma_info *ri = vma->vm_file->private_data;
 
-	if (vmf->pgoff >= ri->npages)
+	if (vmf->pgoff >= kvm_rma_pages)
 		return VM_FAULT_SIGBUS;
 
 	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
@@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int kvm_rma_release(struct inode *inode, struct file *filp)
 {
-	struct kvmppc_linear_info *ri = filp->private_data;
+	struct kvm_rma_info *ri = filp->private_data;
 
 	kvm_release_rma(ri);
 	return 0;
@@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
 
 long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 {
-	struct kvmppc_linear_info *ri;
 	long fd;
+	struct kvm_rma_info *ri;
+	/*
+	 * Only do this on PPC970 in HV mode
+	 */
+	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+	    !cpu_has_feature(CPU_FTR_ARCH_201))
+		return -EINVAL;
+
+	if (!kvm_rma_pages)
+		return -EINVAL;
 
 	ri = kvm_alloc_rma();
 	if (!ri)
@@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 	if (fd < 0)
 		kvm_release_rma(ri);
 
-	ret->rma_size = ri->npages << PAGE_SHIFT;
+	ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
 	return fd;
 }
 
@@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvmppc_linear_info *ri = NULL;
+	struct kvm_rma_info *ri = NULL;
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
@@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 
 	} else {
 		/* Set up to use an RMO region */
-		rma_size = ri->npages;
+		rma_size = kvm_rma_pages;
 		if (rma_size > memslot->npages)
 			rma_size = memslot->npages;
 		rma_size <<= PAGE_SHIFT;
@@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 			/* POWER7 */
 			lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
 			lpcr |= rmls << LPCR_RMLS_SH;
-			kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
+			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
 		}
 		kvm->arch.lpcr = lpcr;
 		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
 			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
 
 		/* Initialize phys addrs of pages in RMO */
-		npages = ri->npages;
+		npages = kvm_rma_pages;
 		porder = __ilog2(npages);
 		physp = memslot->arch.slot_phys;
 		if (physp) {
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 4b865c5..8cd0dae 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -21,13 +21,6 @@
 #include <asm/kvm_book3s.h>
 
 #include "book3s_hv_cma.h"
-
-#define KVM_LINEAR_RMA		0
-#define KVM_LINEAR_HPT		1
-
-static void __init kvm_linear_init_one(ulong size, int count, int type);
-static struct kvmppc_linear_info *kvm_alloc_linear(int type);
-static void kvm_release_linear(struct kvmppc_linear_info *ri);
 /*
  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
  * should be power of 2.
@@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
  * By default we reserve 5% of memory for hash pagetable allocation.
  */
 static unsigned long kvm_cma_resv_ratio = 5;
-
-/*************** RMA *************/
-
 /*
- * This maintains a list of RMAs (real mode areas) for KVM guests to use.
+ * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
  * Each RMA has to be physically contiguous and of a size that the
  * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
  * and other larger sizes.  Since we are unlikely to be allocate that
  * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot for KVM to use.
+ * we preallocate a set of RMAs in early boot using CMA.
+ * should be power of 2.
  */
-static unsigned long kvm_rma_size = 64 << 20;	/* 64MB */
-static unsigned long kvm_rma_count;
+unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
+EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 /* Work out RMLS (real mode limit selector) field value for a given RMA size.
    Assumes POWER7 or PPC970. */
@@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
 
 static int __init early_parse_rma_size(char *p)
 {
-	if (!p)
-		return 1;
+	unsigned long kvm_rma_size;
 
+	pr_debug("%s(%s)\n", __func__, p);
+	if (!p)
+		return -EINVAL;
 	kvm_rma_size = memparse(p, &p);
-
+	/*
+	 * Check that the requested size is one supported in hardware
+	 */
+	if (lpcr_rmls(kvm_rma_size) < 0) {
+		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+		return -EINVAL;
+	}
+	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
 	return 0;
 }
 early_param("kvm_rma_size", early_parse_rma_size);
 
-static int __init early_parse_rma_count(char *p)
+struct kvm_rma_info *kvm_alloc_rma()
 {
-	if (!p)
-		return 1;
-
-	kvm_rma_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
-}
-early_param("kvm_rma_count", early_parse_rma_count);
-
-struct kvmppc_linear_info *kvm_alloc_rma(void)
-{
-	return kvm_alloc_linear(KVM_LINEAR_RMA);
+	struct page *page;
+	struct kvm_rma_info *ri;
+
+	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
+	if (!ri)
+		return NULL;
+	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+	if (!page)
+		goto err_out;
+	atomic_set(&ri->use_count, 1);
+	ri->base_pfn = page_to_pfn(page);
+	return ri;
+err_out:
+	kfree(ri);
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_rma);
 
-void kvm_release_rma(struct kvmppc_linear_info *ri)
+void kvm_release_rma(struct kvm_rma_info *ri)
 {
-	kvm_release_linear(ri);
+	if (atomic_dec_and_test(&ri->use_count)) {
+		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+		kfree(ri);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_release_rma);
 
@@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
-/*************** generic *************/
-
-static LIST_HEAD(free_linears);
-static DEFINE_SPINLOCK(linear_lock);
-
-static void __init kvm_linear_init_one(ulong size, int count, int type)
-{
-	unsigned long i;
-	unsigned long j, npages;
-	void *linear;
-	struct page *pg;
-	const char *typestr;
-	struct kvmppc_linear_info *linear_info;
-
-	if (!count)
-		return;
-
-	typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
-
-	npages = size >> PAGE_SHIFT;
-	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
-	for (i = 0; i < count; ++i) {
-		linear = alloc_bootmem_align(size, size);
-		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
-			 size >> 20);
-		linear_info[i].base_virt = linear;
-		linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
-		linear_info[i].npages = npages;
-		linear_info[i].type = type;
-		list_add_tail(&linear_info[i].list, &free_linears);
-		atomic_set(&linear_info[i].use_count, 0);
-
-		pg = pfn_to_page(linear_info[i].base_pfn);
-		for (j = 0; j < npages; ++j) {
-			atomic_inc(&pg->_count);
-			++pg;
-		}
-	}
-}
-
-static struct kvmppc_linear_info *kvm_alloc_linear(int type)
-{
-	struct kvmppc_linear_info *ri, *ret;
-
-	ret = NULL;
-	spin_lock(&linear_lock);
-	list_for_each_entry(ri, &free_linears, list) {
-		if (ri->type != type)
-			continue;
-
-		list_del(&ri->list);
-		atomic_inc(&ri->use_count);
-		memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
-		ret = ri;
-		break;
-	}
-	spin_unlock(&linear_lock);
-	return ret;
-}
-
-static void kvm_release_linear(struct kvmppc_linear_info *ri)
-{
-	if (atomic_dec_and_test(&ri->use_count)) {
-		spin_lock(&linear_lock);
-		list_add_tail(&ri->list, &free_linears);
-		spin_unlock(&linear_lock);
-
-	}
-}
-
-/*
- * Called at boot time while the bootmem allocator is active,
- * to allocate contiguous physical memory for the hash page
- * tables for guests.
- */
-void __init kvm_linear_init(void)
-{
-	/* RMA */
-	/* Only do this on PPC970 in HV mode */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
-		return;
-
-	if (!kvm_rma_size || !kvm_rma_count)
-		return;
-
-	/* Check that the requested size is one supported in hardware */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-		return;
-	}
-
-	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
-}
-
 /**
  * kvm_cma_reserve() - reserve area for kvm hash pagetable
  *
@@ -265,6 +176,8 @@ void __init kvm_cma_reserve(void)
 			align_size = __rounddown_pow_of_two(selected_size);
 		else
 			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+
+		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
 		kvm_cma_declare_contiguous(selected_size, align_size);
 	}
 }
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 4/4] powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
  2013-07-02  5:45 ` Aneesh Kumar K.V
  (?)
@ 2013-07-02  5:45   ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:45 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm-ppc, kvm, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Both RMA and hash page table request will be a multiple of 256K. We can use
a chunk size of 256K to track the free/used 256K chunk in the bitmap. This
should help to reduce the bitmap size.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  3 +++
 arch/powerpc/kvm/book3s_hv_cma.c    | 35 ++++++++++++++++++++++++-----------
 arch/powerpc/kvm/book3s_hv_cma.h    |  5 +++++
 3 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 354f4bb..7eb5dda 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,6 +37,8 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
+#include "book3s_hv_cma.h"
+
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970	63
 
@@ -71,6 +73,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 
 	/* Next try to allocate from the preallocated pool */
 	if (!hpt) {
+		VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
 		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
 		if (page) {
 			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
index e04b269..d9d3d85 100644
--- a/arch/powerpc/kvm/book3s_hv_cma.c
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -24,6 +24,8 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
+#include "book3s_hv_cma.h"
+
 struct kvm_cma {
 	unsigned long	base_pfn;
 	unsigned long	count;
@@ -96,6 +98,7 @@ struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 	int ret;
 	struct page *page = NULL;
 	struct kvm_cma *cma = &kvm_cma_area;
+	unsigned long chunk_count, nr_chunk;
 	unsigned long mask, pfn, pageno, start = 0;
 
 
@@ -107,21 +110,27 @@ struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 
 	if (!nr_pages)
 		return NULL;
-
+	/*
+	 * align mask with chunk size. The bit tracks pages in chunk size
+	 */
 	VM_BUG_ON(!is_power_of_2(align_pages));
-	mask = align_pages - 1;
+	mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
+	BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
+
+	chunk_count = cma->count >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	mutex_lock(&kvm_cma_mutex);
 	for (;;) {
-		pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
-						    start, nr_pages, mask);
-		if (pageno >= cma->count)
+		pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
+						    start, nr_chunk, mask);
+		if (pageno >= chunk_count)
 			break;
 
-		pfn = cma->base_pfn + pageno;
+		pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
 		ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
 		if (ret == 0) {
-			bitmap_set(cma->bitmap, pageno, nr_pages);
+			bitmap_set(cma->bitmap, pageno, nr_chunk);
 			page = pfn_to_page(pfn);
 			memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
 			break;
@@ -150,9 +159,9 @@ struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
 {
 	unsigned long pfn;
+	unsigned long nr_chunk;
 	struct kvm_cma *cma = &kvm_cma_area;
 
-
 	if (!cma || !pages)
 		return false;
 
@@ -164,9 +173,12 @@ bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
 		return false;
 
 	VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+	nr_chunk = nr_pages >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	mutex_lock(&kvm_cma_mutex);
-	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, nr_pages);
+	bitmap_clear(cma->bitmap,
+		     (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
+		     nr_chunk);
 	free_contig_range(pfn, nr_pages);
 	mutex_unlock(&kvm_cma_mutex);
 
@@ -204,13 +216,14 @@ static int __init kvm_cma_activate_area(unsigned long base_pfn,
 static int __init kvm_cma_init_reserved_areas(void)
 {
 	int bitmap_size, ret;
+	unsigned long chunk_count;
 	struct kvm_cma *cma = &kvm_cma_area;
 
 	pr_debug("%s()\n", __func__);
 	if (!cma->count)
 		return 0;
-
-	bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
+	chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
 	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
 	if (!cma->bitmap)
 		return -ENOMEM;
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
index 788bc3b..655144f 100644
--- a/arch/powerpc/kvm/book3s_hv_cma.h
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -14,6 +14,11 @@
 
 #ifndef __POWERPC_KVM_CMA_ALLOC_H__
 #define __POWERPC_KVM_CMA_ALLOC_H__
+/*
+ * Both RMA and Hash page allocation will be multiple of 256K.
+ */
+#define KVM_CMA_CHUNK_ORDER	18
+
 extern struct page *kvm_alloc_cma(unsigned long nr_pages,
 				  unsigned long align_pages);
 extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
-- 
1.8.1.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 4/4] powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
@ 2013-07-02  5:45   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:45 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm, kvm-ppc, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Both RMA and hash page table request will be a multiple of 256K. We can use
a chunk size of 256K to track the free/used 256K chunk in the bitmap. This
should help to reduce the bitmap size.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  3 +++
 arch/powerpc/kvm/book3s_hv_cma.c    | 35 ++++++++++++++++++++++++-----------
 arch/powerpc/kvm/book3s_hv_cma.h    |  5 +++++
 3 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 354f4bb..7eb5dda 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,6 +37,8 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
+#include "book3s_hv_cma.h"
+
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970	63
 
@@ -71,6 +73,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 
 	/* Next try to allocate from the preallocated pool */
 	if (!hpt) {
+		VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
 		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
 		if (page) {
 			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
index e04b269..d9d3d85 100644
--- a/arch/powerpc/kvm/book3s_hv_cma.c
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -24,6 +24,8 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
+#include "book3s_hv_cma.h"
+
 struct kvm_cma {
 	unsigned long	base_pfn;
 	unsigned long	count;
@@ -96,6 +98,7 @@ struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 	int ret;
 	struct page *page = NULL;
 	struct kvm_cma *cma = &kvm_cma_area;
+	unsigned long chunk_count, nr_chunk;
 	unsigned long mask, pfn, pageno, start = 0;
 
 
@@ -107,21 +110,27 @@ struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 
 	if (!nr_pages)
 		return NULL;
-
+	/*
+	 * align mask with chunk size. The bit tracks pages in chunk size
+	 */
 	VM_BUG_ON(!is_power_of_2(align_pages));
-	mask = align_pages - 1;
+	mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
+	BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
+
+	chunk_count = cma->count >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	mutex_lock(&kvm_cma_mutex);
 	for (;;) {
-		pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
-						    start, nr_pages, mask);
-		if (pageno >= cma->count)
+		pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
+						    start, nr_chunk, mask);
+		if (pageno >= chunk_count)
 			break;
 
-		pfn = cma->base_pfn + pageno;
+		pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
 		ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
 		if (ret == 0) {
-			bitmap_set(cma->bitmap, pageno, nr_pages);
+			bitmap_set(cma->bitmap, pageno, nr_chunk);
 			page = pfn_to_page(pfn);
 			memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
 			break;
@@ -150,9 +159,9 @@ struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
 {
 	unsigned long pfn;
+	unsigned long nr_chunk;
 	struct kvm_cma *cma = &kvm_cma_area;
 
-
 	if (!cma || !pages)
 		return false;
 
@@ -164,9 +173,12 @@ bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
 		return false;
 
 	VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+	nr_chunk = nr_pages >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	mutex_lock(&kvm_cma_mutex);
-	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, nr_pages);
+	bitmap_clear(cma->bitmap,
+		     (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
+		     nr_chunk);
 	free_contig_range(pfn, nr_pages);
 	mutex_unlock(&kvm_cma_mutex);
 
@@ -204,13 +216,14 @@ static int __init kvm_cma_activate_area(unsigned long base_pfn,
 static int __init kvm_cma_init_reserved_areas(void)
 {
 	int bitmap_size, ret;
+	unsigned long chunk_count;
 	struct kvm_cma *cma = &kvm_cma_area;
 
 	pr_debug("%s()\n", __func__);
 	if (!cma->count)
 		return 0;
-
-	bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
+	chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
 	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
 	if (!cma->bitmap)
 		return -ENOMEM;
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
index 788bc3b..655144f 100644
--- a/arch/powerpc/kvm/book3s_hv_cma.h
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -14,6 +14,11 @@
 
 #ifndef __POWERPC_KVM_CMA_ALLOC_H__
 #define __POWERPC_KVM_CMA_ALLOC_H__
+/*
+ * Both RMA and Hash page allocation will be multiple of 256K.
+ */
+#define KVM_CMA_CHUNK_ORDER	18
+
 extern struct page *kvm_alloc_cma(unsigned long nr_pages,
 				  unsigned long align_pages);
 extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
-- 
1.8.1.2

^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
@ 2013-07-02  5:45 ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:57 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm-ppc, kvm, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

We want to use CMA for allocating hash page table and real mode area for
PPC64. Hence move DMA contiguous related changes into a seperate config
so that ppc64 can enable CMA without requiring DMA contiguous.

Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/arm/configs/omap2plus_defconfig  |  2 +-
 arch/arm/configs/tegra_defconfig      |  2 +-
 arch/arm/include/asm/dma-contiguous.h |  2 +-
 arch/arm/mm/dma-mapping.c             |  6 +++---
 drivers/base/Kconfig                  | 20 ++++----------------
 drivers/base/Makefile                 |  2 +-
 include/linux/dma-contiguous.h        |  2 +-
 mm/Kconfig                            | 24 ++++++++++++++++++++++++
 8 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index abbe319..098268f 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -71,7 +71,7 @@ CONFIG_MAC80211=m
 CONFIG_MAC80211_RC_PID=y
 CONFIG_MAC80211_RC_DEFAULT_PID=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_CMA=y
+CONFIG_DMA_CMA=y
 CONFIG_CONNECTOR=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index f7ba3161..34ae8f2 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -79,7 +79,7 @@ CONFIG_RFKILL_GPIO=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_CMA=y
+CONFIG_DMA_CMA=y
 CONFIG_MTD=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_M25P80=y
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
index 3ed37b4..e072bb2 100644
--- a/arch/arm/include/asm/dma-contiguous.h
+++ b/arch/arm/include/asm/dma-contiguous.h
@@ -2,7 +2,7 @@
 #define ASMARM_DMA_CONTIGUOUS_H
 
 #ifdef __KERNEL__
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 #include <linux/types.h>
 #include <asm-generic/dma-contiguous.h>
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ef3e0f3..1fb40dc 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
 	if (!pages)
 		goto no_pages;
 
-	if (IS_ENABLED(CONFIG_CMA))
+	if (IS_ENABLED(CONFIG_DMA_CMA))
 		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
 					      atomic_pool_init);
 	else
@@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		addr = __alloc_simple_buffer(dev, size, gfp, &page);
 	else if (!(gfp & __GFP_WAIT))
 		addr = __alloc_from_pool(size, &page);
-	else if (!IS_ENABLED(CONFIG_CMA))
+	else if (!IS_ENABLED(CONFIG_DMA_CMA))
 		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
 	else
 		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
@@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		__dma_free_buffer(page, size);
 	} else if (__free_from_pool(cpu_addr, size)) {
 		return;
-	} else if (!IS_ENABLED(CONFIG_CMA)) {
+	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
 		__dma_free_remap(cpu_addr, size);
 		__dma_free_buffer(page, size);
 	} else {
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 07abd9d..10cd80a 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -202,11 +202,9 @@ config DMA_SHARED_BUFFER
 	  APIs extension; the file's descriptor can then be passed on to other
 	  driver.
 
-config CMA
-	bool "Contiguous Memory Allocator"
-	depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK
-	select MIGRATION
-	select MEMORY_ISOLATION
+config DMA_CMA
+	bool "DMA Contiguous Memory Allocator"
+	depends on HAVE_DMA_CONTIGUOUS && CMA
 	help
 	  This enables the Contiguous Memory Allocator which allows drivers
 	  to allocate big physically-contiguous blocks of memory for use with
@@ -215,17 +213,7 @@ config CMA
 	  For more information see <include/linux/dma-contiguous.h>.
 	  If unsure, say "n".
 
-if CMA
-
-config CMA_DEBUG
-	bool "CMA debug messages (DEVELOPMENT)"
-	depends on DEBUG_KERNEL
-	help
-	  Turns on debug messages in CMA.  This produces KERN_DEBUG
-	  messages for every CMA call as well as various messages while
-	  processing calls such as dma_alloc_from_contiguous().
-	  This option does not affect warning and error messages.
-
+if  DMA_CMA
 comment "Default contiguous memory area size:"
 
 config CMA_SIZE_MBYTES
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 4e22ce3..5d93bb5 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -6,7 +6,7 @@ obj-y			:= core.o bus.o dd.o syscore.o \
 			   attribute_container.o transport_class.o \
 			   topology.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
-obj-$(CONFIG_CMA) += dma-contiguous.o
+obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 01b5c84..00141d3 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -57,7 +57,7 @@ struct cma;
 struct page;
 struct device;
 
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 
 /*
  * There is always at least global CMA area and a few optional device
diff --git a/mm/Kconfig b/mm/Kconfig
index e742d06..26a5f81 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -477,3 +477,27 @@ config FRONTSWAP
 	  and swap data is stored as normal on the matching swap device.
 
 	  If unsure, say Y to enable frontswap.
+
+config CMA
+	bool "Contiguous Memory Allocator"
+	depends on HAVE_MEMBLOCK
+	select MIGRATION
+	select MEMORY_ISOLATION
+	help
+	  This enables the Contiguous Memory Allocator which allows other
+	  subsystems to allocate big physically-contiguous blocks of memory.
+	  CMA reserves a region of memory and allows only movable pages to
+	  be allocated from it. This way, the kernel can use the memory for
+	  pagecache and when a subsystem requests for contiguous area, the
+	  allocated pages are migrated away to serve the contiguous request.
+
+	  If unsure, say "n".
+
+config CMA_DEBUG
+	bool "CMA debug messages (DEVELOPMENT)"
+	depends on DEBUG_KERNEL && CMA
+	help
+	  Turns on debug messages in CMA.  This produces KERN_DEBUG
+	  messages for every CMA call as well as various messages while
+	  processing calls such as dma_alloc_from_contiguous().
+	  This option does not affect warning and error messages.
-- 
1.8.1.2


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02  5:45   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:57 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm-ppc, kvm, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Powerpc architecture uses a hash based page table mechanism for mapping virtual
addresses to physical address. The architecture require this hash page table to
be physically contiguous. With KVM on Powerpc currently we use early reservation
mechanism for allocating guest hash page table. This implies that we need to
reserve a big memory region to ensure we can create large number of guest
simultaneously with KVM on Power. Another disadvantage is that the reserved memory
is not available to rest of the subsystems and and that implies we limit the total
available memory in the host.

This patch series switch the guest hash page table allocation to use
contiguous memory allocator.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   1 -
 arch/powerpc/include/asm/kvm_host.h      |   2 +-
 arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
 arch/powerpc/kernel/setup_64.c           |   2 +
 arch/powerpc/kvm/Kconfig                 |   1 +
 arch/powerpc/kvm/Makefile                |   1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |  37 +++--
 arch/powerpc/kvm/book3s_hv_builtin.c     |  91 +++++++++----
 arch/powerpc/kvm/book3s_hv_cma.c         | 227 +++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_cma.h         |  22 +++
 10 files changed, 341 insertions(+), 51 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_cma.c
 create mode 100644 arch/powerpc/kvm/book3s_hv_cma.h

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9c1ff33..f8355a9 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
-extern int kvm_hpt_order;		/* order of preallocated HPTs */
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index af326cd..0097dab 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -259,7 +259,7 @@ struct kvm_arch {
 	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
-	struct kvmppc_linear_info *hpt_li;
+	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe..b5ef7a3 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -139,8 +139,8 @@ extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
 				struct kvm_allocate_rma *rma);
 extern struct kvmppc_linear_info *kvm_alloc_rma(void);
 extern void kvm_release_rma(struct kvmppc_linear_info *ri);
-extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
-extern void kvm_release_hpt(struct kvmppc_linear_info *li);
+extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
+extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
 extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
@@ -261,6 +261,7 @@ void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 struct openpic;
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
+extern void kvm_cma_reserve(void) __init;
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
 	paca[cpu].kvm_hstate.xics_phys = addr;
@@ -284,6 +285,9 @@ extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
 extern void kvm_linear_init(void);
 
 #else
+static inline void __init kvm_cma_reserve(void)
+{}
+
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e379d3f..ee28d1f 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -229,6 +229,8 @@ void __init early_setup(unsigned long dt_ptr)
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
 
+	kvm_cma_reserve();
+
 	/*
 	 * Reserve any gigantic pages requested on the command line.
 	 * memblock needs to have been initialized by the time this is
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f8..ffaef2c 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -72,6 +72,7 @@ config KVM_BOOK3S_64_HV
 	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	depends on KVM_BOOK3S_64
 	select MMU_NOTIFIER
+	select CMA
 	---help---
 	  Support running unmodified book3s_64 guest kernels in
 	  virtual machines on POWER7 and PPC970 processors that have
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 422de3f..6640393 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -80,6 +80,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 	book3s_64_vio_hv.o \
 	book3s_hv_ras.o \
 	book3s_hv_builtin.o \
+	book3s_hv_cma.o \
 	$(kvm-book3s_64-builtin-xics-objs-y)
 
 kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 5880dfb..354f4bb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -52,8 +52,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 {
 	unsigned long hpt;
 	struct revmap_entry *rev;
-	struct kvmppc_linear_info *li;
-	long order = kvm_hpt_order;
+	struct page *page = NULL;
+	long order = KVM_DEFAULT_HPT_ORDER;
 
 	if (htab_orderp) {
 		order = *htab_orderp;
@@ -61,26 +61,22 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 			order = PPC_MIN_HPT_ORDER;
 	}
 
+	kvm->arch.hpt_cma_alloc = 0;
 	/*
-	 * If the user wants a different size from default,
 	 * try first to allocate it from the kernel page allocator.
+	 * We keep the CMA reserved for failed allocation.
 	 */
-	hpt = 0;
-	if (order != kvm_hpt_order) {
-		hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
-				       __GFP_NOWARN, order - PAGE_SHIFT);
-		if (!hpt)
-			--order;
-	}
+	hpt = __get_free_pages(GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT |
+			       __GFP_NOWARN, order - PAGE_SHIFT);
 
 	/* Next try to allocate from the preallocated pool */
 	if (!hpt) {
-		li = kvm_alloc_hpt();
-		if (li) {
-			hpt = (ulong)li->base_virt;
-			kvm->arch.hpt_li = li;
-			order = kvm_hpt_order;
-		}
+		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+		if (page) {
+			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+			kvm->arch.hpt_cma_alloc = 1;
+		} else
+			--order;
 	}
 
 	/* Lastly try successively smaller sizes from the page allocator */
@@ -118,8 +114,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 	return 0;
 
  out_freehpt:
-	if (kvm->arch.hpt_li)
-		kvm_release_hpt(kvm->arch.hpt_li);
+	if (kvm->arch.hpt_cma_alloc)
+		kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
 	else
 		free_pages(hpt, order - PAGE_SHIFT);
 	return -ENOMEM;
@@ -165,8 +161,9 @@ void kvmppc_free_hpt(struct kvm *kvm)
 {
 	kvmppc_free_lpid(kvm->arch.lpid);
 	vfree(kvm->arch.revmap);
-	if (kvm->arch.hpt_li)
-		kvm_release_hpt(kvm->arch.hpt_li);
+	if (kvm->arch.hpt_cma_alloc)
+		kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
+				1 << (kvm->arch.hpt_order - PAGE_SHIFT));
 	else
 		free_pages(kvm->arch.hpt_virt,
 			   kvm->arch.hpt_order - PAGE_SHIFT);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ec0a9e5..4b865c5 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -13,20 +13,30 @@
 #include <linux/spinlock.h>
 #include <linux/bootmem.h>
 #include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/sizes.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 
+#include "book3s_hv_cma.h"
+
 #define KVM_LINEAR_RMA		0
 #define KVM_LINEAR_HPT		1
 
 static void __init kvm_linear_init_one(ulong size, int count, int type);
 static struct kvmppc_linear_info *kvm_alloc_linear(int type);
 static void kvm_release_linear(struct kvmppc_linear_info *ri);
-
-int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
-EXPORT_SYMBOL_GPL(kvm_hpt_order);
+/*
+ * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
+ * should be power of 2.
+ */
+#define HPT_ALIGN_PAGES		((1 << 18) >> PAGE_SHIFT) /* 256k */
+/*
+ * By default we reserve 5% of memory for hash pagetable allocation.
+ */
+static unsigned long kvm_cma_resv_ratio = 5;
 
 /*************** RMA *************/
 
@@ -101,36 +111,29 @@ void kvm_release_rma(struct kvmppc_linear_info *ri)
 }
 EXPORT_SYMBOL_GPL(kvm_release_rma);
 
-/*************** HPT *************/
-
-/*
- * This maintains a list of big linear HPT tables that contain the GVA->HPA
- * memory mappings. If we don't reserve those early on, we might not be able
- * to get a big (usually 16MB) linear memory region from the kernel anymore.
- */
-
-static unsigned long kvm_hpt_count;
-
-static int __init early_parse_hpt_count(char *p)
+static int __init early_parse_kvm_cma_resv(char *p)
 {
+	pr_debug("%s(%s)\n", __func__, p);
 	if (!p)
-		return 1;
-
-	kvm_hpt_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
+		return -EINVAL;
+	return kstrtoul(p, 0, &kvm_cma_resv_ratio);
 }
-early_param("kvm_hpt_count", early_parse_hpt_count);
+early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
 
-struct kvmppc_linear_info *kvm_alloc_hpt(void)
+struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
-	return kvm_alloc_linear(KVM_LINEAR_HPT);
+	unsigned long align_pages = HPT_ALIGN_PAGES;
+
+	/* Old CPUs require HPT aligned on a multiple of its size */
+	if (!cpu_has_feature(CPU_FTR_ARCH_206))
+		align_pages = nr_pages;
+	return kvm_alloc_cma(nr_pages, align_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
-void kvm_release_hpt(struct kvmppc_linear_info *li)
+void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 {
-	kvm_release_linear(li);
+	kvm_release_cma(page, nr_pages);
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
@@ -211,9 +214,6 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri)
  */
 void __init kvm_linear_init(void)
 {
-	/* HPT */
-	kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
-
 	/* RMA */
 	/* Only do this on PPC970 in HV mode */
 	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
@@ -231,3 +231,40 @@ void __init kvm_linear_init(void)
 
 	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
 }
+
+/**
+ * kvm_cma_reserve() - reserve area for kvm hash pagetable
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
+ */
+void __init kvm_cma_reserve(void)
+{
+	unsigned long align_size;
+	struct memblock_region *reg;
+	phys_addr_t selected_size = 0;
+	/*
+	 * We cannot use memblock_phys_mem_size() here, because
+	 * memblock_analyze() has not been called yet.
+	 */
+	for_each_memblock(memory, reg)
+		selected_size += memblock_region_memory_end_pfn(reg) -
+				 memblock_region_memory_base_pfn(reg);
+
+	selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
+	if (selected_size) {
+		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+			 (unsigned long)selected_size / SZ_1M);
+		/*
+		 * Old CPUs require HPT aligned on a multiple of its size. So for them
+		 * make the alignment as max size we could request.
+		 */
+		if (!cpu_has_feature(CPU_FTR_ARCH_206))
+			align_size = __rounddown_pow_of_two(selected_size);
+		else
+			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+		kvm_cma_declare_contiguous(selected_size, align_size);
+	}
+}
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
new file mode 100644
index 0000000..e04b269
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -0,0 +1,227 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+#define pr_fmt(fmt) "kvm_cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <linux/memblock.h>
+#include <linux/mutex.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+struct kvm_cma {
+	unsigned long	base_pfn;
+	unsigned long	count;
+	unsigned long	*bitmap;
+};
+
+static DEFINE_MUTEX(kvm_cma_mutex);
+static struct kvm_cma kvm_cma_area;
+
+/**
+ * kvm_cma_declare_contiguous() - reserve area for contiguous memory handling
+ *			          for kvm hash pagetable
+ * @size:  Size of the reserved memory.
+ * @alignment:  Alignment for the contiguous memory area
+ *
+ * This function reserves memory for kvm cma area. It should be
+ * called by arch code when early allocator (memblock or bootmem)
+ * is still activate.
+ */
+long __init kvm_cma_declare_contiguous(phys_addr_t size, phys_addr_t alignment)
+{
+	long base_pfn;
+	phys_addr_t addr;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	pr_debug("%s(size %lx)\n", __func__, (unsigned long)size);
+
+	if (!size)
+		return -EINVAL;
+	/*
+	 * Sanitise input arguments.
+	 * We should be pageblock aligned for CMA.
+	 */
+	alignment = max(alignment, (phys_addr_t)(PAGE_SIZE << pageblock_order));
+	size = ALIGN(size, alignment);
+	/*
+	 * Reserve memory
+	 * Use __memblock_alloc_base() since
+	 * memblock_alloc_base() panic()s.
+	 */
+	addr = __memblock_alloc_base(size, alignment, 0);
+	if (!addr) {
+		base_pfn = -ENOMEM;
+		goto err;
+	} else
+		base_pfn = PFN_DOWN(addr);
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma->base_pfn = base_pfn;
+	cma->count    = size >> PAGE_SHIFT;
+	pr_info("CMA: reserved %ld MiB\n", (unsigned long)size / SZ_1M);
+	return 0;
+err:
+	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
+	return base_pfn;
+}
+
+/**
+ * kvm_alloc_cma() - allocate pages from contiguous area
+ * @nr_pages: Requested number of pages.
+ * @align_pages: Requested alignment in number of pages
+ *
+ * This function allocates memory buffer for hash pagetable.
+ */
+struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
+{
+	int ret;
+	struct page *page = NULL;
+	struct kvm_cma *cma = &kvm_cma_area;
+	unsigned long mask, pfn, pageno, start = 0;
+
+
+	if (!cma || !cma->count)
+		return NULL;
+
+	pr_debug("%s(cma %p, count %lu, align pages %lu)\n", __func__,
+		 (void *)cma, nr_pages, align_pages);
+
+	if (!nr_pages)
+		return NULL;
+
+	VM_BUG_ON(!is_power_of_2(align_pages));
+	mask = align_pages - 1;
+
+	mutex_lock(&kvm_cma_mutex);
+	for (;;) {
+		pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
+						    start, nr_pages, mask);
+		if (pageno >= cma->count)
+			break;
+
+		pfn = cma->base_pfn + pageno;
+		ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
+		if (ret = 0) {
+			bitmap_set(cma->bitmap, pageno, nr_pages);
+			page = pfn_to_page(pfn);
+			memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
+			break;
+		} else if (ret != -EBUSY) {
+			break;
+		}
+		pr_debug("%s(): memory range at %p is busy, retrying\n",
+			 __func__, pfn_to_page(pfn));
+		/* try again with a bit different memory target */
+		start = pageno + mask + 1;
+	}
+	mutex_unlock(&kvm_cma_mutex);
+	pr_debug("%s(): returned %p\n", __func__, page);
+	return page;
+}
+
+/**
+ * kvm_release_cma() - release allocated pages for hash pagetable
+ * @pages: Allocated pages.
+ * @nr_pages: Number of allocated pages.
+ *
+ * This function releases memory allocated by kvm_alloc_cma().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
+{
+	unsigned long pfn;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+
+	if (!cma || !pages)
+		return false;
+
+	pr_debug("%s(page %p count %lu)\n", __func__, (void *)pages, nr_pages);
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
+		return false;
+
+	VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+
+	mutex_lock(&kvm_cma_mutex);
+	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, nr_pages);
+	free_contig_range(pfn, nr_pages);
+	mutex_unlock(&kvm_cma_mutex);
+
+	return true;
+}
+
+static int __init kvm_cma_activate_area(unsigned long base_pfn,
+					unsigned long count)
+{
+	unsigned long pfn = base_pfn;
+	unsigned i = count >> pageblock_order;
+	struct zone *zone;
+
+	WARN_ON_ONCE(!pfn_valid(pfn));
+	zone = page_zone(pfn_to_page(pfn));
+	do {
+		unsigned j;
+		base_pfn = pfn;
+		for (j = pageblock_nr_pages; j; --j, pfn++) {
+			WARN_ON_ONCE(!pfn_valid(pfn));
+			/*
+			 * alloc_contig_range requires the pfn range
+			 * specified to be in the same zone. Make this
+			 * simple by forcing the entire CMA resv range
+			 * to be in the same zone.
+			 */
+			if (page_zone(pfn_to_page(pfn)) != zone)
+				return -EINVAL;
+		}
+		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
+	} while (--i);
+	return 0;
+}
+
+static int __init kvm_cma_init_reserved_areas(void)
+{
+	int bitmap_size, ret;
+	struct kvm_cma *cma = &kvm_cma_area;
+
+	pr_debug("%s()\n", __func__);
+	if (!cma->count)
+		return 0;
+
+	bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!cma->bitmap)
+		return -ENOMEM;
+
+	ret = kvm_cma_activate_area(cma->base_pfn, cma->count);
+	if (ret)
+		goto error;
+	return 0;
+
+error:
+	kfree(cma->bitmap);
+	return ret;
+}
+core_initcall(kvm_cma_init_reserved_areas);
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
new file mode 100644
index 0000000..788bc3b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -0,0 +1,22 @@
+/*
+ * Contiguous Memory Allocator for ppc KVM hash pagetable  based on CMA
+ * for DMA mapping framework
+ *
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ *
+ */
+
+#ifndef __POWERPC_KVM_CMA_ALLOC_H__
+#define __POWERPC_KVM_CMA_ALLOC_H__
+extern struct page *kvm_alloc_cma(unsigned long nr_pages,
+				  unsigned long align_pages);
+extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
+extern long kvm_cma_declare_contiguous(phys_addr_t size,
+				       phys_addr_t alignment) __init;
+#endif
-- 
1.8.1.2


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02  5:45   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:57 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm-ppc, kvm, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Older version of power architecture use Real Mode Offset register and Real Mode Limit
Selector for mapping guest Real Mode Area. The guest RMA should be physically
contigous since we use the range when address translation is not enabled.

This patch switch RMA allocation code to use contigous memory allocator. The patch
also remove the the linear allocator which not used any more

Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   1 +
 arch/powerpc/include/asm/kvm_host.h      |  12 +--
 arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
 arch/powerpc/kernel/setup_64.c           |   2 -
 arch/powerpc/kvm/book3s_hv.c             |  27 +++--
 arch/powerpc/kvm/book3s_hv_builtin.c     | 167 ++++++++-----------------------
 6 files changed, 65 insertions(+), 152 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index f8355a9..76ff0b5 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,6 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
+extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0097dab..3328353 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
 	struct page *pages[0];
 };
 
-struct kvmppc_linear_info {
-	void		*base_virt;
-	unsigned long	 base_pfn;
-	unsigned long	 npages;
-	struct list_head list;
-	atomic_t	 use_count;
-	int		 type;
+struct kvm_rma_info {
+	atomic_t use_count;
+	unsigned long base_pfn;
 };
 
 /* XICS components, defined in book3s_xics.c */
@@ -246,7 +242,7 @@ struct kvm_arch {
 	int tlbie_lock;
 	unsigned long lpcr;
 	unsigned long rmor;
-	struct kvmppc_linear_info *rma;
+	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
 	int using_mmu_notifiers;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index b5ef7a3..5a26bfc 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -137,8 +137,8 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba, unsigned long tce);
 extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
 				struct kvm_allocate_rma *rma);
-extern struct kvmppc_linear_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvmppc_linear_info *ri);
+extern struct kvm_rma_info *kvm_alloc_rma(void);
+extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
@@ -282,7 +282,6 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 }
 
 extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
-extern void kvm_linear_init(void);
 
 #else
 static inline void __init kvm_cma_reserve(void)
@@ -291,9 +290,6 @@ static inline void __init kvm_cma_reserve(void)
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {}
 
-static inline void kvm_linear_init(void)
-{}
-
 static inline u32 kvmppc_get_xics_latch(void)
 {
 	return 0;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ee28d1f..8a022f5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -611,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
 	/* Initialize the MMU context management stuff */
 	mmu_context_init();
 
-	kvm_linear_init();
-
 	/* Interrupt code needs to be 64K-aligned */
 	if ((unsigned long)_stext & 0xffff)
 		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 550f592..55c8519 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
 
 static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
 	struct page *page;
+	struct kvm_rma_info *ri = vma->vm_file->private_data;
 
-	if (vmf->pgoff >= ri->npages)
+	if (vmf->pgoff >= kvm_rma_pages)
 		return VM_FAULT_SIGBUS;
 
 	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
@@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int kvm_rma_release(struct inode *inode, struct file *filp)
 {
-	struct kvmppc_linear_info *ri = filp->private_data;
+	struct kvm_rma_info *ri = filp->private_data;
 
 	kvm_release_rma(ri);
 	return 0;
@@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
 
 long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 {
-	struct kvmppc_linear_info *ri;
 	long fd;
+	struct kvm_rma_info *ri;
+	/*
+	 * Only do this on PPC970 in HV mode
+	 */
+	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+	    !cpu_has_feature(CPU_FTR_ARCH_201))
+		return -EINVAL;
+
+	if (!kvm_rma_pages)
+		return -EINVAL;
 
 	ri = kvm_alloc_rma();
 	if (!ri)
@@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
 	if (fd < 0)
 		kvm_release_rma(ri);
 
-	ret->rma_size = ri->npages << PAGE_SHIFT;
+	ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
 	return fd;
 }
 
@@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvmppc_linear_info *ri = NULL;
+	struct kvm_rma_info *ri = NULL;
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
@@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 
 	} else {
 		/* Set up to use an RMO region */
-		rma_size = ri->npages;
+		rma_size = kvm_rma_pages;
 		if (rma_size > memslot->npages)
 			rma_size = memslot->npages;
 		rma_size <<= PAGE_SHIFT;
@@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 			/* POWER7 */
 			lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
 			lpcr |= rmls << LPCR_RMLS_SH;
-			kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
+			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
 		}
 		kvm->arch.lpcr = lpcr;
 		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
 			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
 
 		/* Initialize phys addrs of pages in RMO */
-		npages = ri->npages;
+		npages = kvm_rma_pages;
 		porder = __ilog2(npages);
 		physp = memslot->arch.slot_phys;
 		if (physp) {
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 4b865c5..8cd0dae 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -21,13 +21,6 @@
 #include <asm/kvm_book3s.h>
 
 #include "book3s_hv_cma.h"
-
-#define KVM_LINEAR_RMA		0
-#define KVM_LINEAR_HPT		1
-
-static void __init kvm_linear_init_one(ulong size, int count, int type);
-static struct kvmppc_linear_info *kvm_alloc_linear(int type);
-static void kvm_release_linear(struct kvmppc_linear_info *ri);
 /*
  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
  * should be power of 2.
@@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
  * By default we reserve 5% of memory for hash pagetable allocation.
  */
 static unsigned long kvm_cma_resv_ratio = 5;
-
-/*************** RMA *************/
-
 /*
- * This maintains a list of RMAs (real mode areas) for KVM guests to use.
+ * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
  * Each RMA has to be physically contiguous and of a size that the
  * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
  * and other larger sizes.  Since we are unlikely to be allocate that
  * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot for KVM to use.
+ * we preallocate a set of RMAs in early boot using CMA.
+ * should be power of 2.
  */
-static unsigned long kvm_rma_size = 64 << 20;	/* 64MB */
-static unsigned long kvm_rma_count;
+unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
+EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 /* Work out RMLS (real mode limit selector) field value for a given RMA size.
    Assumes POWER7 or PPC970. */
@@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
 
 static int __init early_parse_rma_size(char *p)
 {
-	if (!p)
-		return 1;
+	unsigned long kvm_rma_size;
 
+	pr_debug("%s(%s)\n", __func__, p);
+	if (!p)
+		return -EINVAL;
 	kvm_rma_size = memparse(p, &p);
-
+	/*
+	 * Check that the requested size is one supported in hardware
+	 */
+	if (lpcr_rmls(kvm_rma_size) < 0) {
+		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
+		return -EINVAL;
+	}
+	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
 	return 0;
 }
 early_param("kvm_rma_size", early_parse_rma_size);
 
-static int __init early_parse_rma_count(char *p)
+struct kvm_rma_info *kvm_alloc_rma()
 {
-	if (!p)
-		return 1;
-
-	kvm_rma_count = simple_strtoul(p, NULL, 0);
-
-	return 0;
-}
-early_param("kvm_rma_count", early_parse_rma_count);
-
-struct kvmppc_linear_info *kvm_alloc_rma(void)
-{
-	return kvm_alloc_linear(KVM_LINEAR_RMA);
+	struct page *page;
+	struct kvm_rma_info *ri;
+
+	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
+	if (!ri)
+		return NULL;
+	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
+	if (!page)
+		goto err_out;
+	atomic_set(&ri->use_count, 1);
+	ri->base_pfn = page_to_pfn(page);
+	return ri;
+err_out:
+	kfree(ri);
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_rma);
 
-void kvm_release_rma(struct kvmppc_linear_info *ri)
+void kvm_release_rma(struct kvm_rma_info *ri)
 {
-	kvm_release_linear(ri);
+	if (atomic_dec_and_test(&ri->use_count)) {
+		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
+		kfree(ri);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_release_rma);
 
@@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
 }
 EXPORT_SYMBOL_GPL(kvm_release_hpt);
 
-/*************** generic *************/
-
-static LIST_HEAD(free_linears);
-static DEFINE_SPINLOCK(linear_lock);
-
-static void __init kvm_linear_init_one(ulong size, int count, int type)
-{
-	unsigned long i;
-	unsigned long j, npages;
-	void *linear;
-	struct page *pg;
-	const char *typestr;
-	struct kvmppc_linear_info *linear_info;
-
-	if (!count)
-		return;
-
-	typestr = (type = KVM_LINEAR_RMA) ? "RMA" : "HPT";
-
-	npages = size >> PAGE_SHIFT;
-	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
-	for (i = 0; i < count; ++i) {
-		linear = alloc_bootmem_align(size, size);
-		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
-			 size >> 20);
-		linear_info[i].base_virt = linear;
-		linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
-		linear_info[i].npages = npages;
-		linear_info[i].type = type;
-		list_add_tail(&linear_info[i].list, &free_linears);
-		atomic_set(&linear_info[i].use_count, 0);
-
-		pg = pfn_to_page(linear_info[i].base_pfn);
-		for (j = 0; j < npages; ++j) {
-			atomic_inc(&pg->_count);
-			++pg;
-		}
-	}
-}
-
-static struct kvmppc_linear_info *kvm_alloc_linear(int type)
-{
-	struct kvmppc_linear_info *ri, *ret;
-
-	ret = NULL;
-	spin_lock(&linear_lock);
-	list_for_each_entry(ri, &free_linears, list) {
-		if (ri->type != type)
-			continue;
-
-		list_del(&ri->list);
-		atomic_inc(&ri->use_count);
-		memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT);
-		ret = ri;
-		break;
-	}
-	spin_unlock(&linear_lock);
-	return ret;
-}
-
-static void kvm_release_linear(struct kvmppc_linear_info *ri)
-{
-	if (atomic_dec_and_test(&ri->use_count)) {
-		spin_lock(&linear_lock);
-		list_add_tail(&ri->list, &free_linears);
-		spin_unlock(&linear_lock);
-
-	}
-}
-
-/*
- * Called at boot time while the bootmem allocator is active,
- * to allocate contiguous physical memory for the hash page
- * tables for guests.
- */
-void __init kvm_linear_init(void)
-{
-	/* RMA */
-	/* Only do this on PPC970 in HV mode */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
-		return;
-
-	if (!kvm_rma_size || !kvm_rma_count)
-		return;
-
-	/* Check that the requested size is one supported in hardware */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-		return;
-	}
-
-	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
-}
-
 /**
  * kvm_cma_reserve() - reserve area for kvm hash pagetable
  *
@@ -265,6 +176,8 @@ void __init kvm_cma_reserve(void)
 			align_size = __rounddown_pow_of_two(selected_size);
 		else
 			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
+
+		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
 		kvm_cma_declare_contiguous(selected_size, align_size);
 	}
 }
-- 
1.8.1.2


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* [PATCH -V3 4/4] powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
@ 2013-07-02  5:45   ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02  5:57 UTC (permalink / raw)
  To: benh, paulus, agraf, m.szyprowski, mina86
  Cc: linux-mm, linuxppc-dev, kvm-ppc, kvm, Aneesh Kumar K.V

From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>

Both RMA and hash page table request will be a multiple of 256K. We can use
a chunk size of 256K to track the free/used 256K chunk in the bitmap. This
should help to reduce the bitmap size.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  3 +++
 arch/powerpc/kvm/book3s_hv_cma.c    | 35 ++++++++++++++++++++++++-----------
 arch/powerpc/kvm/book3s_hv_cma.h    |  5 +++++
 3 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 354f4bb..7eb5dda 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,6 +37,8 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
+#include "book3s_hv_cma.h"
+
 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
 #define MAX_LPID_970	63
 
@@ -71,6 +73,7 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 
 	/* Next try to allocate from the preallocated pool */
 	if (!hpt) {
+		VM_BUG_ON(order < KVM_CMA_CHUNK_ORDER);
 		page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
 		if (page) {
 			hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
diff --git a/arch/powerpc/kvm/book3s_hv_cma.c b/arch/powerpc/kvm/book3s_hv_cma.c
index e04b269..d9d3d85 100644
--- a/arch/powerpc/kvm/book3s_hv_cma.c
+++ b/arch/powerpc/kvm/book3s_hv_cma.c
@@ -24,6 +24,8 @@
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
+#include "book3s_hv_cma.h"
+
 struct kvm_cma {
 	unsigned long	base_pfn;
 	unsigned long	count;
@@ -96,6 +98,7 @@ struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 	int ret;
 	struct page *page = NULL;
 	struct kvm_cma *cma = &kvm_cma_area;
+	unsigned long chunk_count, nr_chunk;
 	unsigned long mask, pfn, pageno, start = 0;
 
 
@@ -107,21 +110,27 @@ struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 
 	if (!nr_pages)
 		return NULL;
-
+	/*
+	 * align mask with chunk size. The bit tracks pages in chunk size
+	 */
 	VM_BUG_ON(!is_power_of_2(align_pages));
-	mask = align_pages - 1;
+	mask = (align_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT)) - 1;
+	BUILD_BUG_ON(PAGE_SHIFT > KVM_CMA_CHUNK_ORDER);
+
+	chunk_count = cma->count >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	nr_chunk = nr_pages >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	mutex_lock(&kvm_cma_mutex);
 	for (;;) {
-		pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count,
-						    start, nr_pages, mask);
-		if (pageno >= cma->count)
+		pageno = bitmap_find_next_zero_area(cma->bitmap, chunk_count,
+						    start, nr_chunk, mask);
+		if (pageno >= chunk_count)
 			break;
 
-		pfn = cma->base_pfn + pageno;
+		pfn = cma->base_pfn + (pageno << (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT));
 		ret = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_CMA);
 		if (ret = 0) {
-			bitmap_set(cma->bitmap, pageno, nr_pages);
+			bitmap_set(cma->bitmap, pageno, nr_chunk);
 			page = pfn_to_page(pfn);
 			memset(pfn_to_kaddr(pfn), 0, nr_pages << PAGE_SHIFT);
 			break;
@@ -150,9 +159,9 @@ struct page *kvm_alloc_cma(unsigned long nr_pages, unsigned long align_pages)
 bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
 {
 	unsigned long pfn;
+	unsigned long nr_chunk;
 	struct kvm_cma *cma = &kvm_cma_area;
 
-
 	if (!cma || !pages)
 		return false;
 
@@ -164,9 +173,12 @@ bool kvm_release_cma(struct page *pages, unsigned long nr_pages)
 		return false;
 
 	VM_BUG_ON(pfn + nr_pages > cma->base_pfn + cma->count);
+	nr_chunk = nr_pages >>  (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	mutex_lock(&kvm_cma_mutex);
-	bitmap_clear(cma->bitmap, pfn - cma->base_pfn, nr_pages);
+	bitmap_clear(cma->bitmap,
+		     (pfn - cma->base_pfn) >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT),
+		     nr_chunk);
 	free_contig_range(pfn, nr_pages);
 	mutex_unlock(&kvm_cma_mutex);
 
@@ -204,13 +216,14 @@ static int __init kvm_cma_activate_area(unsigned long base_pfn,
 static int __init kvm_cma_init_reserved_areas(void)
 {
 	int bitmap_size, ret;
+	unsigned long chunk_count;
 	struct kvm_cma *cma = &kvm_cma_area;
 
 	pr_debug("%s()\n", __func__);
 	if (!cma->count)
 		return 0;
-
-	bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
+	chunk_count = cma->count >> (KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	bitmap_size = BITS_TO_LONGS(chunk_count) * sizeof(long);
 	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
 	if (!cma->bitmap)
 		return -ENOMEM;
diff --git a/arch/powerpc/kvm/book3s_hv_cma.h b/arch/powerpc/kvm/book3s_hv_cma.h
index 788bc3b..655144f 100644
--- a/arch/powerpc/kvm/book3s_hv_cma.h
+++ b/arch/powerpc/kvm/book3s_hv_cma.h
@@ -14,6 +14,11 @@
 
 #ifndef __POWERPC_KVM_CMA_ALLOC_H__
 #define __POWERPC_KVM_CMA_ALLOC_H__
+/*
+ * Both RMA and Hash page allocation will be multiple of 256K.
+ */
+#define KVM_CMA_CHUNK_ORDER	18
+
 extern struct page *kvm_alloc_cma(unsigned long nr_pages,
 				  unsigned long align_pages);
 extern bool kvm_release_cma(struct page *pages, unsigned long nr_pages);
-- 
1.8.1.2


^ permalink raw reply related	[flat|nested] 67+ messages in thread

* virtual machine windows freeze on copy data to an samba share.
  2013-07-02  5:45   ` Aneesh Kumar K.V
  (?)
  (?)
@ 2013-07-02  6:29   ` Marko Weber | ZBF
  -1 siblings, 0 replies; 67+ messages in thread
From: Marko Weber | ZBF @ 2013-07-02  6:29 UTC (permalink / raw)
  To: Kvm


hello,

my virtual windows machine freezes when i copy data to an smb share.
it was reproduceable for me several times.
in systemlog i find this:

Jul  2 08:22:00 databunka kernel: general protection fault: 0000 [#1] 
SMP
Jul  2 08:22:00 databunka kernel: Modules linked in: vhost_net macvtap 
macvlan ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat_ipv4 
nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack it87 
hwmon_vid ipt_REJECT xt_CHECKSUM iptable_mangle xt_tcpudp iptable_filter 
ip_tables x_tables bridge stp llc tun acpi_cpufreq mperf kvm_amd kvm 
ata_generic nvidia(PO) r8169 pata_acpi k10temp pata_atiixp mii i2c_piix4 
processor button
Jul  2 08:22:00 databunka kernel: CPU 2
Jul  2 08:22:00 databunka kernel: Pid: 3694, comm: vhost-3693 Tainted: P 
           O 3.9.8databunka_3.9.8 #1 Gigabyte Technology Co., Ltd. 
GA-MA785GT-UD3H/GA-MA785GT-UD3H
Jul  2 08:22:00 databunka kernel: RIP: 0010:[<ffffffff81090ba4>]  
[<ffffffff81090ba4>] put_page+0x9/0x2e
Jul  2 08:22:00 databunka kernel: RSP: 0018:ffff88037cc37bf8  EFLAGS: 
00010212
Jul  2 08:22:00 databunka kernel: RAX: ffff88038dca0cc0 RBX: 
0003f00fee030006 RCX: ffff8803f91ec01c
Jul  2 08:22:00 databunka kernel: RDX: 0000000000000140 RSI: 
0000000000000246 RDI: 0003f00fee030006
Jul  2 08:22:00 databunka kernel: RBP: ffff88037cc37c08 R08: 
ffff880389344518 R09: 0000000000001000
Jul  2 08:22:00 databunka kernel: R10: ffff88038dfe27f8 R11: 
0000008000000000 R12: 0000000000000012
Jul  2 08:22:00 databunka kernel: R13: 000000000000000c R14: 
ffff8803eb4edb80 R15: ffff880389340001
Jul  2 08:22:00 databunka kernel: FS:  00007f0edef09700(0000) 
GS:ffff8803ffd00000(0000) knlGS:0000000000000000
Jul  2 08:22:00 databunka kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 
000000008005003b
Jul  2 08:22:00 databunka kernel: CR2: 000007fefeeb7081 CR3: 
000000038dfe2000 CR4: 00000000000007a0
Jul  2 08:22:00 databunka kernel: DR0: 0000000000000000 DR1: 
0000000000000000 DR2: 0000000000000000
Jul  2 08:22:00 databunka kernel: DR3: 0000000000000000 DR6: 
00000000ffff0ff0 DR7: 0000000000000400
Jul  2 08:22:00 databunka kernel: Process vhost-3693 (pid: 3694, 
threadinfo ffff88037cc36000, task ffff88038a40a340)
Jul  2 08:22:00 databunka kernel: Stack:
Jul  2 08:22:00 databunka kernel: ffff88038dca0cc0 ffff8803eb4edb80 
ffff88037cc37c28 ffffffff8144377b
Jul  2 08:22:00 databunka kernel: ffff8803eb4edb80 000000000000efbe 
ffff88037cc37c48 ffffffff8144380b
Jul  2 08:22:00 databunka kernel: 0000000000000000 ffff8803eb4edb80 
ffff88037cc37c68 ffffffff814438a4
Jul  2 08:22:00 databunka kernel: Call Trace:
Jul  2 08:22:00 databunka kernel: [<ffffffff8144377b>] 
skb_release_data+0x80/0xfa
Jul  2 08:22:00 databunka kernel: [<ffffffff8144380b>] 
__kfree_skb+0x16/0x7d
Jul  2 08:22:00 databunka kernel: [<ffffffff814438a4>] 
kfree_skb+0x32/0x36
Jul  2 08:22:00 databunka kernel: [<ffffffffa000e83c>] 
tun_get_user+0x277/0x622 [tun]
Jul  2 08:22:00 databunka kernel: [<ffffffff8101f0dc>] ? 
default_spin_lock_flags+0x9/0xd
Jul  2 08:22:00 databunka kernel: [<ffffffffa000ec36>] 
tun_sendmsg+0x4f/0x70 [tun]
Jul  2 08:22:00 databunka kernel: [<ffffffff814cccc1>] ? 
_cond_resched+0x9/0x1d
Jul  2 08:22:00 databunka kernel: [<ffffffffa0989940>] 
handle_tx+0x3a4/0x4ae [vhost_net]
Jul  2 08:22:00 databunka kernel: [<ffffffffa0989a6c>] 
handle_tx_kick+0x10/0x12 [vhost_net]
Jul  2 08:22:00 databunka kernel: [<ffffffffa0987751>] 
vhost_worker+0xf6/0x15b [vhost_net]
Jul  2 08:22:00 databunka kernel: [<ffffffff814cd9a9>] ? 
_raw_spin_unlock_irqrestore+0x15/0x18
Jul  2 08:22:00 databunka kernel: [<ffffffffa098765b>] ? 
kref_sub.constprop.12+0x1d/0x1d [vhost_net]
Jul  2 08:22:00 databunka kernel: [<ffffffff81041605>] kthread+0x88/0x90
Jul  2 08:22:00 databunka kernel: [<ffffffff81040000>] ? 
parse_args+0x206/0x256
Jul  2 08:22:00 databunka kernel: [<ffffffff8104157d>] ? 
__kthread_parkme+0x60/0x60
Jul  2 08:22:00 databunka kernel: [<ffffffff814ce4fc>] 
ret_from_fork+0x7c/0xb0
Jul  2 08:22:00 databunka kernel: [<ffffffff8104157d>] ? 
__kthread_parkme+0x60/0x60
Jul  2 08:22:00 databunka kernel: Code: 83 ec 10 48 8d 55 fc c7 45 fc 00 
00 00 00 e8 07 ff ff ff 48 63 45 fc 65 48 01 04 25 d0 da 00 00 c9 c3 55 
48 89 e5 53 48 89 fb 50 <48> f7 03 00 c0 00 00 74 07 e8 46 fc ff ff eb 
11 e8 ca f7 ff ff
Jul  2 08:22:00 databunka kernel: RSP <ffff88037cc37bf8>
Jul  2 08:22:00 databunka kernel: ---[ end trace 38c4bb5d1100b013 ]---


dunoo if am right with posting this. do i have to post this to libvirt 
chan?

thanks

,marko

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
  2013-07-02  5:45 ` Aneesh Kumar K.V
  (?)
@ 2013-07-02  8:20   ` Marek Szyprowski
  -1 siblings, 0 replies; 67+ messages in thread
From: Marek Szyprowski @ 2013-07-02  8:20 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, agraf, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

Hello,

On 7/2/2013 7:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>
> We want to use CMA for allocating hash page table and real mode area for
> PPC64. Hence move DMA contiguous related changes into a seperate config
> so that ppc64 can enable CMA without requiring DMA contiguous.
>
> Acked-by: Michal Nazarewicz <mina86@mina86.com>
> Acked-by: Paul Mackerras <paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

OK. It looks that there is not that much that can be easily shared between
dma-mapping cma provider and ppc/kvm cma allocator. I would prefer to merge
patch 1/4 to my dma-mapping tree, because I plan some significant changes in
cma code, see: 
http://thread.gmane.org/gmane.linux.drivers.devicetree/40013/
I think it is better to keep those changes together.

For now I've merged your patch with removed defconfig updates. AFAIK such
changes require separate handling to avoid pointless merge conflicts. I've
also prepared a topic branch for-v3.12-cma-dma, available at
git://git.linaro.org/people/mszyprowski/linux-dma-mapping, which You can 
merge
together with your changes to ppc kernel trees.

> ---
>   arch/arm/configs/omap2plus_defconfig  |  2 +-
>   arch/arm/configs/tegra_defconfig      |  2 +-
>   arch/arm/include/asm/dma-contiguous.h |  2 +-
>   arch/arm/mm/dma-mapping.c             |  6 +++---
>   drivers/base/Kconfig                  | 20 ++++----------------
>   drivers/base/Makefile                 |  2 +-
>   include/linux/dma-contiguous.h        |  2 +-
>   mm/Kconfig                            | 24 ++++++++++++++++++++++++
>   8 files changed, 36 insertions(+), 24 deletions(-)
>
> diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
> index abbe319..098268f 100644
> --- a/arch/arm/configs/omap2plus_defconfig
> +++ b/arch/arm/configs/omap2plus_defconfig
> @@ -71,7 +71,7 @@ CONFIG_MAC80211=m
>   CONFIG_MAC80211_RC_PID=y
>   CONFIG_MAC80211_RC_DEFAULT_PID=y
>   CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
> -CONFIG_CMA=y
> +CONFIG_DMA_CMA=y
>   CONFIG_CONNECTOR=y
>   CONFIG_DEVTMPFS=y
>   CONFIG_DEVTMPFS_MOUNT=y
> diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
> index f7ba3161..34ae8f2 100644
> --- a/arch/arm/configs/tegra_defconfig
> +++ b/arch/arm/configs/tegra_defconfig
> @@ -79,7 +79,7 @@ CONFIG_RFKILL_GPIO=y
>   CONFIG_DEVTMPFS=y
>   CONFIG_DEVTMPFS_MOUNT=y
>   # CONFIG_FIRMWARE_IN_KERNEL is not set
> -CONFIG_CMA=y
> +CONFIG_DMA_CMA=y
>   CONFIG_MTD=y
>   CONFIG_MTD_CHAR=y
>   CONFIG_MTD_M25P80=y
> diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
> index 3ed37b4..e072bb2 100644
> --- a/arch/arm/include/asm/dma-contiguous.h
> +++ b/arch/arm/include/asm/dma-contiguous.h
> @@ -2,7 +2,7 @@
>   #define ASMARM_DMA_CONTIGUOUS_H
>   
>   #ifdef __KERNEL__
> -#ifdef CONFIG_CMA
> +#ifdef CONFIG_DMA_CMA
>   
>   #include <linux/types.h>
>   #include <asm-generic/dma-contiguous.h>
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index ef3e0f3..1fb40dc 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
>   	if (!pages)
>   		goto no_pages;
>   
> -	if (IS_ENABLED(CONFIG_CMA))
> +	if (IS_ENABLED(CONFIG_DMA_CMA))
>   		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
>   					      atomic_pool_init);
>   	else
> @@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>   		addr = __alloc_simple_buffer(dev, size, gfp, &page);
>   	else if (!(gfp & __GFP_WAIT))
>   		addr = __alloc_from_pool(size, &page);
> -	else if (!IS_ENABLED(CONFIG_CMA))
> +	else if (!IS_ENABLED(CONFIG_DMA_CMA))
>   		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
>   	else
>   		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
> @@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
>   		__dma_free_buffer(page, size);
>   	} else if (__free_from_pool(cpu_addr, size)) {
>   		return;
> -	} else if (!IS_ENABLED(CONFIG_CMA)) {
> +	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
>   		__dma_free_remap(cpu_addr, size);
>   		__dma_free_buffer(page, size);
>   	} else {
> diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
> index 07abd9d..10cd80a 100644
> --- a/drivers/base/Kconfig
> +++ b/drivers/base/Kconfig
> @@ -202,11 +202,9 @@ config DMA_SHARED_BUFFER
>   	  APIs extension; the file's descriptor can then be passed on to other
>   	  driver.
>   
> -config CMA
> -	bool "Contiguous Memory Allocator"
> -	depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK
> -	select MIGRATION
> -	select MEMORY_ISOLATION
> +config DMA_CMA
> +	bool "DMA Contiguous Memory Allocator"
> +	depends on HAVE_DMA_CONTIGUOUS && CMA
>   	help
>   	  This enables the Contiguous Memory Allocator which allows drivers
>   	  to allocate big physically-contiguous blocks of memory for use with
> @@ -215,17 +213,7 @@ config CMA
>   	  For more information see <include/linux/dma-contiguous.h>.
>   	  If unsure, say "n".
>   
> -if CMA
> -
> -config CMA_DEBUG
> -	bool "CMA debug messages (DEVELOPMENT)"
> -	depends on DEBUG_KERNEL
> -	help
> -	  Turns on debug messages in CMA.  This produces KERN_DEBUG
> -	  messages for every CMA call as well as various messages while
> -	  processing calls such as dma_alloc_from_contiguous().
> -	  This option does not affect warning and error messages.
> -
> +if  DMA_CMA
>   comment "Default contiguous memory area size:"
>   
>   config CMA_SIZE_MBYTES
> diff --git a/drivers/base/Makefile b/drivers/base/Makefile
> index 4e22ce3..5d93bb5 100644
> --- a/drivers/base/Makefile
> +++ b/drivers/base/Makefile
> @@ -6,7 +6,7 @@ obj-y			:= core.o bus.o dd.o syscore.o \
>   			   attribute_container.o transport_class.o \
>   			   topology.o
>   obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
> -obj-$(CONFIG_CMA) += dma-contiguous.o
> +obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
>   obj-y			+= power/
>   obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
>   obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
> diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
> index 01b5c84..00141d3 100644
> --- a/include/linux/dma-contiguous.h
> +++ b/include/linux/dma-contiguous.h
> @@ -57,7 +57,7 @@ struct cma;
>   struct page;
>   struct device;
>   
> -#ifdef CONFIG_CMA
> +#ifdef CONFIG_DMA_CMA
>   
>   /*
>    * There is always at least global CMA area and a few optional device
> diff --git a/mm/Kconfig b/mm/Kconfig
> index e742d06..26a5f81 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -477,3 +477,27 @@ config FRONTSWAP
>   	  and swap data is stored as normal on the matching swap device.
>   
>   	  If unsure, say Y to enable frontswap.
> +
> +config CMA
> +	bool "Contiguous Memory Allocator"
> +	depends on HAVE_MEMBLOCK
> +	select MIGRATION
> +	select MEMORY_ISOLATION
> +	help
> +	  This enables the Contiguous Memory Allocator which allows other
> +	  subsystems to allocate big physically-contiguous blocks of memory.
> +	  CMA reserves a region of memory and allows only movable pages to
> +	  be allocated from it. This way, the kernel can use the memory for
> +	  pagecache and when a subsystem requests for contiguous area, the
> +	  allocated pages are migrated away to serve the contiguous request.
> +
> +	  If unsure, say "n".
> +
> +config CMA_DEBUG
> +	bool "CMA debug messages (DEVELOPMENT)"
> +	depends on DEBUG_KERNEL && CMA
> +	help
> +	  Turns on debug messages in CMA.  This produces KERN_DEBUG
> +	  messages for every CMA call as well as various messages while
> +	  processing calls such as dma_alloc_from_contiguous().
> +	  This option does not affect warning and error messages.

Best regards
-- 
Marek Szyprowski
Samsung R&D Institute Poland


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
@ 2013-07-02  8:20   ` Marek Szyprowski
  0 siblings, 0 replies; 67+ messages in thread
From: Marek Szyprowski @ 2013-07-02  8:20 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, agraf, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev

Hello,

On 7/2/2013 7:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>
> We want to use CMA for allocating hash page table and real mode area for
> PPC64. Hence move DMA contiguous related changes into a seperate config
> so that ppc64 can enable CMA without requiring DMA contiguous.
>
> Acked-by: Michal Nazarewicz <mina86@mina86.com>
> Acked-by: Paul Mackerras <paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

OK. It looks that there is not that much that can be easily shared between
dma-mapping cma provider and ppc/kvm cma allocator. I would prefer to merge
patch 1/4 to my dma-mapping tree, because I plan some significant changes in
cma code, see: 
http://thread.gmane.org/gmane.linux.drivers.devicetree/40013/
I think it is better to keep those changes together.

For now I've merged your patch with removed defconfig updates. AFAIK such
changes require separate handling to avoid pointless merge conflicts. I've
also prepared a topic branch for-v3.12-cma-dma, available at
git://git.linaro.org/people/mszyprowski/linux-dma-mapping, which You can 
merge
together with your changes to ppc kernel trees.

> ---
>   arch/arm/configs/omap2plus_defconfig  |  2 +-
>   arch/arm/configs/tegra_defconfig      |  2 +-
>   arch/arm/include/asm/dma-contiguous.h |  2 +-
>   arch/arm/mm/dma-mapping.c             |  6 +++---
>   drivers/base/Kconfig                  | 20 ++++----------------
>   drivers/base/Makefile                 |  2 +-
>   include/linux/dma-contiguous.h        |  2 +-
>   mm/Kconfig                            | 24 ++++++++++++++++++++++++
>   8 files changed, 36 insertions(+), 24 deletions(-)
>
> diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
> index abbe319..098268f 100644
> --- a/arch/arm/configs/omap2plus_defconfig
> +++ b/arch/arm/configs/omap2plus_defconfig
> @@ -71,7 +71,7 @@ CONFIG_MAC80211=m
>   CONFIG_MAC80211_RC_PID=y
>   CONFIG_MAC80211_RC_DEFAULT_PID=y
>   CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
> -CONFIG_CMA=y
> +CONFIG_DMA_CMA=y
>   CONFIG_CONNECTOR=y
>   CONFIG_DEVTMPFS=y
>   CONFIG_DEVTMPFS_MOUNT=y
> diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
> index f7ba3161..34ae8f2 100644
> --- a/arch/arm/configs/tegra_defconfig
> +++ b/arch/arm/configs/tegra_defconfig
> @@ -79,7 +79,7 @@ CONFIG_RFKILL_GPIO=y
>   CONFIG_DEVTMPFS=y
>   CONFIG_DEVTMPFS_MOUNT=y
>   # CONFIG_FIRMWARE_IN_KERNEL is not set
> -CONFIG_CMA=y
> +CONFIG_DMA_CMA=y
>   CONFIG_MTD=y
>   CONFIG_MTD_CHAR=y
>   CONFIG_MTD_M25P80=y
> diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
> index 3ed37b4..e072bb2 100644
> --- a/arch/arm/include/asm/dma-contiguous.h
> +++ b/arch/arm/include/asm/dma-contiguous.h
> @@ -2,7 +2,7 @@
>   #define ASMARM_DMA_CONTIGUOUS_H
>   
>   #ifdef __KERNEL__
> -#ifdef CONFIG_CMA
> +#ifdef CONFIG_DMA_CMA
>   
>   #include <linux/types.h>
>   #include <asm-generic/dma-contiguous.h>
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index ef3e0f3..1fb40dc 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
>   	if (!pages)
>   		goto no_pages;
>   
> -	if (IS_ENABLED(CONFIG_CMA))
> +	if (IS_ENABLED(CONFIG_DMA_CMA))
>   		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
>   					      atomic_pool_init);
>   	else
> @@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>   		addr = __alloc_simple_buffer(dev, size, gfp, &page);
>   	else if (!(gfp & __GFP_WAIT))
>   		addr = __alloc_from_pool(size, &page);
> -	else if (!IS_ENABLED(CONFIG_CMA))
> +	else if (!IS_ENABLED(CONFIG_DMA_CMA))
>   		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
>   	else
>   		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
> @@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
>   		__dma_free_buffer(page, size);
>   	} else if (__free_from_pool(cpu_addr, size)) {
>   		return;
> -	} else if (!IS_ENABLED(CONFIG_CMA)) {
> +	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
>   		__dma_free_remap(cpu_addr, size);
>   		__dma_free_buffer(page, size);
>   	} else {
> diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
> index 07abd9d..10cd80a 100644
> --- a/drivers/base/Kconfig
> +++ b/drivers/base/Kconfig
> @@ -202,11 +202,9 @@ config DMA_SHARED_BUFFER
>   	  APIs extension; the file's descriptor can then be passed on to other
>   	  driver.
>   
> -config CMA
> -	bool "Contiguous Memory Allocator"
> -	depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK
> -	select MIGRATION
> -	select MEMORY_ISOLATION
> +config DMA_CMA
> +	bool "DMA Contiguous Memory Allocator"
> +	depends on HAVE_DMA_CONTIGUOUS && CMA
>   	help
>   	  This enables the Contiguous Memory Allocator which allows drivers
>   	  to allocate big physically-contiguous blocks of memory for use with
> @@ -215,17 +213,7 @@ config CMA
>   	  For more information see <include/linux/dma-contiguous.h>.
>   	  If unsure, say "n".
>   
> -if CMA
> -
> -config CMA_DEBUG
> -	bool "CMA debug messages (DEVELOPMENT)"
> -	depends on DEBUG_KERNEL
> -	help
> -	  Turns on debug messages in CMA.  This produces KERN_DEBUG
> -	  messages for every CMA call as well as various messages while
> -	  processing calls such as dma_alloc_from_contiguous().
> -	  This option does not affect warning and error messages.
> -
> +if  DMA_CMA
>   comment "Default contiguous memory area size:"
>   
>   config CMA_SIZE_MBYTES
> diff --git a/drivers/base/Makefile b/drivers/base/Makefile
> index 4e22ce3..5d93bb5 100644
> --- a/drivers/base/Makefile
> +++ b/drivers/base/Makefile
> @@ -6,7 +6,7 @@ obj-y			:= core.o bus.o dd.o syscore.o \
>   			   attribute_container.o transport_class.o \
>   			   topology.o
>   obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
> -obj-$(CONFIG_CMA) += dma-contiguous.o
> +obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
>   obj-y			+= power/
>   obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
>   obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
> diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
> index 01b5c84..00141d3 100644
> --- a/include/linux/dma-contiguous.h
> +++ b/include/linux/dma-contiguous.h
> @@ -57,7 +57,7 @@ struct cma;
>   struct page;
>   struct device;
>   
> -#ifdef CONFIG_CMA
> +#ifdef CONFIG_DMA_CMA
>   
>   /*
>    * There is always at least global CMA area and a few optional device
> diff --git a/mm/Kconfig b/mm/Kconfig
> index e742d06..26a5f81 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -477,3 +477,27 @@ config FRONTSWAP
>   	  and swap data is stored as normal on the matching swap device.
>   
>   	  If unsure, say Y to enable frontswap.
> +
> +config CMA
> +	bool "Contiguous Memory Allocator"
> +	depends on HAVE_MEMBLOCK
> +	select MIGRATION
> +	select MEMORY_ISOLATION
> +	help
> +	  This enables the Contiguous Memory Allocator which allows other
> +	  subsystems to allocate big physically-contiguous blocks of memory.
> +	  CMA reserves a region of memory and allows only movable pages to
> +	  be allocated from it. This way, the kernel can use the memory for
> +	  pagecache and when a subsystem requests for contiguous area, the
> +	  allocated pages are migrated away to serve the contiguous request.
> +
> +	  If unsure, say "n".
> +
> +config CMA_DEBUG
> +	bool "CMA debug messages (DEVELOPMENT)"
> +	depends on DEBUG_KERNEL && CMA
> +	help
> +	  Turns on debug messages in CMA.  This produces KERN_DEBUG
> +	  messages for every CMA call as well as various messages while
> +	  processing calls such as dma_alloc_from_contiguous().
> +	  This option does not affect warning and error messages.

Best regards
-- 
Marek Szyprowski
Samsung R&D Institute Poland

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
@ 2013-07-02  8:20   ` Marek Szyprowski
  0 siblings, 0 replies; 67+ messages in thread
From: Marek Szyprowski @ 2013-07-02  8:20 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, agraf, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

Hello,

On 7/2/2013 7:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>
> We want to use CMA for allocating hash page table and real mode area for
> PPC64. Hence move DMA contiguous related changes into a seperate config
> so that ppc64 can enable CMA without requiring DMA contiguous.
>
> Acked-by: Michal Nazarewicz <mina86@mina86.com>
> Acked-by: Paul Mackerras <paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

OK. It looks that there is not that much that can be easily shared between
dma-mapping cma provider and ppc/kvm cma allocator. I would prefer to merge
patch 1/4 to my dma-mapping tree, because I plan some significant changes in
cma code, see: 
http://thread.gmane.org/gmane.linux.drivers.devicetree/40013/
I think it is better to keep those changes together.

For now I've merged your patch with removed defconfig updates. AFAIK such
changes require separate handling to avoid pointless merge conflicts. I've
also prepared a topic branch for-v3.12-cma-dma, available at
git://git.linaro.org/people/mszyprowski/linux-dma-mapping, which You can 
merge
together with your changes to ppc kernel trees.

> ---
>   arch/arm/configs/omap2plus_defconfig  |  2 +-
>   arch/arm/configs/tegra_defconfig      |  2 +-
>   arch/arm/include/asm/dma-contiguous.h |  2 +-
>   arch/arm/mm/dma-mapping.c             |  6 +++---
>   drivers/base/Kconfig                  | 20 ++++----------------
>   drivers/base/Makefile                 |  2 +-
>   include/linux/dma-contiguous.h        |  2 +-
>   mm/Kconfig                            | 24 ++++++++++++++++++++++++
>   8 files changed, 36 insertions(+), 24 deletions(-)
>
> diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
> index abbe319..098268f 100644
> --- a/arch/arm/configs/omap2plus_defconfig
> +++ b/arch/arm/configs/omap2plus_defconfig
> @@ -71,7 +71,7 @@ CONFIG_MAC80211=m
>   CONFIG_MAC80211_RC_PID=y
>   CONFIG_MAC80211_RC_DEFAULT_PID=y
>   CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
> -CONFIG_CMA=y
> +CONFIG_DMA_CMA=y
>   CONFIG_CONNECTOR=y
>   CONFIG_DEVTMPFS=y
>   CONFIG_DEVTMPFS_MOUNT=y
> diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
> index f7ba3161..34ae8f2 100644
> --- a/arch/arm/configs/tegra_defconfig
> +++ b/arch/arm/configs/tegra_defconfig
> @@ -79,7 +79,7 @@ CONFIG_RFKILL_GPIO=y
>   CONFIG_DEVTMPFS=y
>   CONFIG_DEVTMPFS_MOUNT=y
>   # CONFIG_FIRMWARE_IN_KERNEL is not set
> -CONFIG_CMA=y
> +CONFIG_DMA_CMA=y
>   CONFIG_MTD=y
>   CONFIG_MTD_CHAR=y
>   CONFIG_MTD_M25P80=y
> diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
> index 3ed37b4..e072bb2 100644
> --- a/arch/arm/include/asm/dma-contiguous.h
> +++ b/arch/arm/include/asm/dma-contiguous.h
> @@ -2,7 +2,7 @@
>   #define ASMARM_DMA_CONTIGUOUS_H
>   
>   #ifdef __KERNEL__
> -#ifdef CONFIG_CMA
> +#ifdef CONFIG_DMA_CMA
>   
>   #include <linux/types.h>
>   #include <asm-generic/dma-contiguous.h>
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index ef3e0f3..1fb40dc 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -358,7 +358,7 @@ static int __init atomic_pool_init(void)
>   	if (!pages)
>   		goto no_pages;
>   
> -	if (IS_ENABLED(CONFIG_CMA))
> +	if (IS_ENABLED(CONFIG_DMA_CMA))
>   		ptr = __alloc_from_contiguous(NULL, pool->size, prot, &page,
>   					      atomic_pool_init);
>   	else
> @@ -670,7 +670,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
>   		addr = __alloc_simple_buffer(dev, size, gfp, &page);
>   	else if (!(gfp & __GFP_WAIT))
>   		addr = __alloc_from_pool(size, &page);
> -	else if (!IS_ENABLED(CONFIG_CMA))
> +	else if (!IS_ENABLED(CONFIG_DMA_CMA))
>   		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller);
>   	else
>   		addr = __alloc_from_contiguous(dev, size, prot, &page, caller);
> @@ -759,7 +759,7 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
>   		__dma_free_buffer(page, size);
>   	} else if (__free_from_pool(cpu_addr, size)) {
>   		return;
> -	} else if (!IS_ENABLED(CONFIG_CMA)) {
> +	} else if (!IS_ENABLED(CONFIG_DMA_CMA)) {
>   		__dma_free_remap(cpu_addr, size);
>   		__dma_free_buffer(page, size);
>   	} else {
> diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
> index 07abd9d..10cd80a 100644
> --- a/drivers/base/Kconfig
> +++ b/drivers/base/Kconfig
> @@ -202,11 +202,9 @@ config DMA_SHARED_BUFFER
>   	  APIs extension; the file's descriptor can then be passed on to other
>   	  driver.
>   
> -config CMA
> -	bool "Contiguous Memory Allocator"
> -	depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK
> -	select MIGRATION
> -	select MEMORY_ISOLATION
> +config DMA_CMA
> +	bool "DMA Contiguous Memory Allocator"
> +	depends on HAVE_DMA_CONTIGUOUS && CMA
>   	help
>   	  This enables the Contiguous Memory Allocator which allows drivers
>   	  to allocate big physically-contiguous blocks of memory for use with
> @@ -215,17 +213,7 @@ config CMA
>   	  For more information see <include/linux/dma-contiguous.h>.
>   	  If unsure, say "n".
>   
> -if CMA
> -
> -config CMA_DEBUG
> -	bool "CMA debug messages (DEVELOPMENT)"
> -	depends on DEBUG_KERNEL
> -	help
> -	  Turns on debug messages in CMA.  This produces KERN_DEBUG
> -	  messages for every CMA call as well as various messages while
> -	  processing calls such as dma_alloc_from_contiguous().
> -	  This option does not affect warning and error messages.
> -
> +if  DMA_CMA
>   comment "Default contiguous memory area size:"
>   
>   config CMA_SIZE_MBYTES
> diff --git a/drivers/base/Makefile b/drivers/base/Makefile
> index 4e22ce3..5d93bb5 100644
> --- a/drivers/base/Makefile
> +++ b/drivers/base/Makefile
> @@ -6,7 +6,7 @@ obj-y			:= core.o bus.o dd.o syscore.o \
>   			   attribute_container.o transport_class.o \
>   			   topology.o
>   obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
> -obj-$(CONFIG_CMA) += dma-contiguous.o
> +obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
>   obj-y			+= power/
>   obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
>   obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
> diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
> index 01b5c84..00141d3 100644
> --- a/include/linux/dma-contiguous.h
> +++ b/include/linux/dma-contiguous.h
> @@ -57,7 +57,7 @@ struct cma;
>   struct page;
>   struct device;
>   
> -#ifdef CONFIG_CMA
> +#ifdef CONFIG_DMA_CMA
>   
>   /*
>    * There is always at least global CMA area and a few optional device
> diff --git a/mm/Kconfig b/mm/Kconfig
> index e742d06..26a5f81 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -477,3 +477,27 @@ config FRONTSWAP
>   	  and swap data is stored as normal on the matching swap device.
>   
>   	  If unsure, say Y to enable frontswap.
> +
> +config CMA
> +	bool "Contiguous Memory Allocator"
> +	depends on HAVE_MEMBLOCK
> +	select MIGRATION
> +	select MEMORY_ISOLATION
> +	help
> +	  This enables the Contiguous Memory Allocator which allows other
> +	  subsystems to allocate big physically-contiguous blocks of memory.
> +	  CMA reserves a region of memory and allows only movable pages to
> +	  be allocated from it. This way, the kernel can use the memory for
> +	  pagecache and when a subsystem requests for contiguous area, the
> +	  allocated pages are migrated away to serve the contiguous request.
> +
> +	  If unsure, say "n".
> +
> +config CMA_DEBUG
> +	bool "CMA debug messages (DEVELOPMENT)"
> +	depends on DEBUG_KERNEL && CMA
> +	help
> +	  Turns on debug messages in CMA.  This produces KERN_DEBUG
> +	  messages for every CMA call as well as various messages while
> +	  processing calls such as dma_alloc_from_contiguous().
> +	  This option does not affect warning and error messages.

Best regards
-- 
Marek Szyprowski
Samsung R&D Institute Poland



^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
  2013-07-02  5:45   ` Aneesh Kumar K.V
  (?)
  (?)
@ 2013-07-02 15:12     ` Alexander Graf
  -1 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:12 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>
> Powerpc architecture uses a hash based page table mechanism for mapping virtual
> addresses to physical address. The architecture require this hash page table to
> be physically contiguous. With KVM on Powerpc currently we use early reservation
> mechanism for allocating guest hash page table. This implies that we need to
> reserve a big memory region to ensure we can create large number of guest
> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
> is not available to rest of the subsystems and and that implies we limit the total
> available memory in the host.
>
> This patch series switch the guest hash page table allocation to use
> contiguous memory allocator.
>
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>

Is CMA a mandatory option in the kernel? Or can it be optionally 
disabled? If it can be disabled, we should keep the preallocated 
fallback case around for systems that have CMA disabled.


Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 15:12     ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:12 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>
> Powerpc architecture uses a hash based page table mechanism for mapping virtual
> addresses to physical address. The architecture require this hash page table to
> be physically contiguous. With KVM on Powerpc currently we use early reservation
> mechanism for allocating guest hash page table. This implies that we need to
> reserve a big memory region to ensure we can create large number of guest
> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
> is not available to rest of the subsystems and and that implies we limit the total
> available memory in the host.
>
> This patch series switch the guest hash page table allocation to use
> contiguous memory allocator.
>
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>

Is CMA a mandatory option in the kernel? Or can it be optionally 
disabled? If it can be disabled, we should keep the preallocated 
fallback case around for systems that have CMA disabled.


Alex

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 15:12     ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:12 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>
> Powerpc architecture uses a hash based page table mechanism for mapping virtual
> addresses to physical address. The architecture require this hash page table to
> be physically contiguous. With KVM on Powerpc currently we use early reservation
> mechanism for allocating guest hash page table. This implies that we need to
> reserve a big memory region to ensure we can create large number of guest
> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
> is not available to rest of the subsystems and and that implies we limit the total
> available memory in the host.
>
> This patch series switch the guest hash page table allocation to use
> contiguous memory allocator.
>
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>

Is CMA a mandatory option in the kernel? Or can it be optionally 
disabled? If it can be disabled, we should keep the preallocated 
fallback case around for systems that have CMA disabled.


Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 15:12     ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:12 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>
> Powerpc architecture uses a hash based page table mechanism for mapping virtual
> addresses to physical address. The architecture require this hash page table to
> be physically contiguous. With KVM on Powerpc currently we use early reservation
> mechanism for allocating guest hash page table. This implies that we need to
> reserve a big memory region to ensure we can create large number of guest
> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
> is not available to rest of the subsystems and and that implies we limit the total
> available memory in the host.
>
> This patch series switch the guest hash page table allocation to use
> contiguous memory allocator.
>
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>

Is CMA a mandatory option in the kernel? Or can it be optionally 
disabled? If it can be disabled, we should keep the preallocated 
fallback case around for systems that have CMA disabled.


Alex


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
  2013-07-02  5:45   ` Aneesh Kumar K.V
  (?)
  (?)
@ 2013-07-02 15:17     ` Alexander Graf
  -1 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:17 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>
> Older version of power architecture use Real Mode Offset register and Real Mode Limit
> Selector for mapping guest Real Mode Area. The guest RMA should be physically
> contigous since we use the range when address translation is not enabled.
>
> This patch switch RMA allocation code to use contigous memory allocator. The patch
> also remove the the linear allocator which not used any more
>
> Acked-by: Paul Mackerras<paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/kvm_book3s_64.h |   1 +
>   arch/powerpc/include/asm/kvm_host.h      |  12 +--
>   arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
>   arch/powerpc/kernel/setup_64.c           |   2 -
>   arch/powerpc/kvm/book3s_hv.c             |  27 +++--
>   arch/powerpc/kvm/book3s_hv_builtin.c     | 167 ++++++++-----------------------
>   6 files changed, 65 insertions(+), 152 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
> index f8355a9..76ff0b5 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -37,6 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
>
>   #ifdef CONFIG_KVM_BOOK3S_64_HV
>   #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
> +extern unsigned long kvm_rma_pages;
>   #endif
>
>   #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 0097dab..3328353 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
>   	struct page *pages[0];
>   };
>
> -struct kvmppc_linear_info {
> -	void		*base_virt;
> -	unsigned long	 base_pfn;
> -	unsigned long	 npages;
> -	struct list_head list;
> -	atomic_t	 use_count;
> -	int		 type;
> +struct kvm_rma_info {
> +	atomic_t use_count;
> +	unsigned long base_pfn;
>   };
>
>   /* XICS components, defined in book3s_xics.c */
> @@ -246,7 +242,7 @@ struct kvm_arch {
>   	int tlbie_lock;
>   	unsigned long lpcr;
>   	unsigned long rmor;
> -	struct kvmppc_linear_info *rma;
> +	struct kvm_rma_info *rma;
>   	unsigned long vrma_slb_v;
>   	int rma_setup_done;
>   	int using_mmu_notifiers;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index b5ef7a3..5a26bfc 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -137,8 +137,8 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
>   			     unsigned long ioba, unsigned long tce);
>   extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
>   				struct kvm_allocate_rma *rma);
> -extern struct kvmppc_linear_info *kvm_alloc_rma(void);
> -extern void kvm_release_rma(struct kvmppc_linear_info *ri);
> +extern struct kvm_rma_info *kvm_alloc_rma(void);
> +extern void kvm_release_rma(struct kvm_rma_info *ri);
>   extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
>   extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
>   extern int kvmppc_core_init_vm(struct kvm *kvm);
> @@ -282,7 +282,6 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
>   }
>
>   extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
> -extern void kvm_linear_init(void);
>
>   #else
>   static inline void __init kvm_cma_reserve(void)
> @@ -291,9 +290,6 @@ static inline void __init kvm_cma_reserve(void)
>   static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
>   {}
>
> -static inline void kvm_linear_init(void)
> -{}
> -
>   static inline u32 kvmppc_get_xics_latch(void)
>   {
>   	return 0;
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index ee28d1f..8a022f5 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -611,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
>   	/* Initialize the MMU context management stuff */
>   	mmu_context_init();
>
> -	kvm_linear_init();
> -
>   	/* Interrupt code needs to be 64K-aligned */
>   	if ((unsigned long)_stext&  0xffff)
>   		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 550f592..55c8519 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>
>   static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>   {
> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>   	struct page *page;
> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>
> -	if (vmf->pgoff>= ri->npages)
> +	if (vmf->pgoff>= kvm_rma_pages)
>   		return VM_FAULT_SIGBUS;
>
>   	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>
>   static int kvm_rma_release(struct inode *inode, struct file *filp)
>   {
> -	struct kvmppc_linear_info *ri = filp->private_data;
> +	struct kvm_rma_info *ri = filp->private_data;
>
>   	kvm_release_rma(ri);
>   	return 0;
> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>
>   long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>   {
> -	struct kvmppc_linear_info *ri;
>   	long fd;
> +	struct kvm_rma_info *ri;
> +	/*
> +	 * Only do this on PPC970 in HV mode
> +	 */
> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
> +		return -EINVAL;

Is this really what we want? User space may want to use an RMA on POWER7 
systems, no?

> +
> +	if (!kvm_rma_pages)
> +		return -EINVAL;
>
>   	ri = kvm_alloc_rma();
>   	if (!ri)
> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>   	if (fd<  0)
>   		kvm_release_rma(ri);
>
> -	ret->rma_size = ri->npages<<  PAGE_SHIFT;
> +	ret->rma_size = kvm_rma_pages<<  PAGE_SHIFT;
>   	return fd;
>   }
>
> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>   {
>   	int err = 0;
>   	struct kvm *kvm = vcpu->kvm;
> -	struct kvmppc_linear_info *ri = NULL;
> +	struct kvm_rma_info *ri = NULL;
>   	unsigned long hva;
>   	struct kvm_memory_slot *memslot;
>   	struct vm_area_struct *vma;
> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>
>   	} else {
>   		/* Set up to use an RMO region */
> -		rma_size = ri->npages;
> +		rma_size = kvm_rma_pages;
>   		if (rma_size>  memslot->npages)
>   			rma_size = memslot->npages;
>   		rma_size<<= PAGE_SHIFT;
> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>   			/* POWER7 */
>   			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>   			lpcr |= rmls<<  LPCR_RMLS_SH;
> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<  PAGE_SHIFT;
> +			kvm->arch.rmor = ri->base_pfn<<  PAGE_SHIFT;
>   		}
>   		kvm->arch.lpcr = lpcr;
>   		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>   			ri->base_pfn<<  PAGE_SHIFT, rma_size, lpcr);
>
>   		/* Initialize phys addrs of pages in RMO */
> -		npages = ri->npages;
> +		npages = kvm_rma_pages;
>   		porder = __ilog2(npages);
>   		physp = memslot->arch.slot_phys;
>   		if (physp) {
> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
> index 4b865c5..8cd0dae 100644
> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
> @@ -21,13 +21,6 @@
>   #include<asm/kvm_book3s.h>
>
>   #include "book3s_hv_cma.h"
> -
> -#define KVM_LINEAR_RMA		0
> -#define KVM_LINEAR_HPT		1
> -
> -static void __init kvm_linear_init_one(ulong size, int count, int type);
> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>   /*
>    * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>    * should be power of 2.
> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>    * By default we reserve 5% of memory for hash pagetable allocation.
>    */
>   static unsigned long kvm_cma_resv_ratio = 5;
> -
> -/*************** RMA *************/
> -
>   /*
> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>    * Each RMA has to be physically contiguous and of a size that the
>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>    * and other larger sizes.  Since we are unlikely to be allocate that
>    * much physically contiguous memory after the system is up and running,
> - * we preallocate a set of RMAs in early boot for KVM to use.
> + * we preallocate a set of RMAs in early boot using CMA.
> + * should be power of 2.
>    */
> -static unsigned long kvm_rma_size = 64<<  20;	/* 64MB */
> -static unsigned long kvm_rma_count;
> +unsigned long kvm_rma_pages = (1<<  27)>>  PAGE_SHIFT;	/* 128MB */
> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>
>   /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>      Assumes POWER7 or PPC970. */
> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>
>   static int __init early_parse_rma_size(char *p)
>   {
> -	if (!p)
> -		return 1;
> +	unsigned long kvm_rma_size;
>
> +	pr_debug("%s(%s)\n", __func__, p);
> +	if (!p)
> +		return -EINVAL;
>   	kvm_rma_size = memparse(p,&p);
> -
> +	/*
> +	 * Check that the requested size is one supported in hardware
> +	 */
> +	if (lpcr_rmls(kvm_rma_size)<  0) {
> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
> +		return -EINVAL;
> +	}
> +	kvm_rma_pages = kvm_rma_size>>  PAGE_SHIFT;
>   	return 0;
>   }
>   early_param("kvm_rma_size", early_parse_rma_size);
>
> -static int __init early_parse_rma_count(char *p)
> +struct kvm_rma_info *kvm_alloc_rma()
>   {
> -	if (!p)
> -		return 1;
> -
> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
> -
> -	return 0;
> -}
> -early_param("kvm_rma_count", early_parse_rma_count);
> -
> -struct kvmppc_linear_info *kvm_alloc_rma(void)
> -{
> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
> +	struct page *page;
> +	struct kvm_rma_info *ri;
> +
> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
> +	if (!ri)
> +		return NULL;
> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
> +	if (!page)
> +		goto err_out;
> +	atomic_set(&ri->use_count, 1);
> +	ri->base_pfn = page_to_pfn(page);
> +	return ri;
> +err_out:
> +	kfree(ri);
> +	return NULL;
>   }
>   EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>
> -void kvm_release_rma(struct kvmppc_linear_info *ri)
> +void kvm_release_rma(struct kvm_rma_info *ri)
>   {
> -	kvm_release_linear(ri);
> +	if (atomic_dec_and_test(&ri->use_count)) {
> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
> +		kfree(ri);
> +	}
>   }
>   EXPORT_SYMBOL_GPL(kvm_release_rma);
>
> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>   }
>   EXPORT_SYMBOL_GPL(kvm_release_hpt);
>
> -/*************** generic *************/
> -
> -static LIST_HEAD(free_linears);
> -static DEFINE_SPINLOCK(linear_lock);
> -
> -static void __init kvm_linear_init_one(ulong size, int count, int type)

Please split the linar removal bits out into a separate patch :).


Alex

> -{
> -	unsigned long i;
> -	unsigned long j, npages;
> -	void *linear;
> -	struct page *pg;
> -	const char *typestr;
> -	struct kvmppc_linear_info *linear_info;
> -
> -	if (!count)
> -		return;
> -
> -	typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
> -
> -	npages = size>>  PAGE_SHIFT;
> -	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
> -	for (i = 0; i<  count; ++i) {
> -		linear = alloc_bootmem_align(size, size);
> -		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
> -			 size>>  20);
> -		linear_info[i].base_virt = linear;
> -		linear_info[i].base_pfn = __pa(linear)>>  PAGE_SHIFT;
> -		linear_info[i].npages = npages;
> -		linear_info[i].type = type;
> -		list_add_tail(&linear_info[i].list,&free_linears);
> -		atomic_set(&linear_info[i].use_count, 0);
> -
> -		pg = pfn_to_page(linear_info[i].base_pfn);
> -		for (j = 0; j<  npages; ++j) {
> -			atomic_inc(&pg->_count);
> -			++pg;
> -		}
> -	}
> -}
> -
> -static struct kvmppc_linear_info *kvm_alloc_linear(int type)
> -{
> -	struct kvmppc_linear_info *ri, *ret;
> -
> -	ret = NULL;
> -	spin_lock(&linear_lock);
> -	list_for_each_entry(ri,&free_linears, list) {
> -		if (ri->type != type)
> -			continue;
> -
> -		list_del(&ri->list);
> -		atomic_inc(&ri->use_count);
> -		memset(ri->base_virt, 0, ri->npages<<  PAGE_SHIFT);
> -		ret = ri;
> -		break;
> -	}
> -	spin_unlock(&linear_lock);
> -	return ret;
> -}
> -
> -static void kvm_release_linear(struct kvmppc_linear_info *ri)
> -{
> -	if (atomic_dec_and_test(&ri->use_count)) {
> -		spin_lock(&linear_lock);
> -		list_add_tail(&ri->list,&free_linears);
> -		spin_unlock(&linear_lock);
> -
> -	}
> -}
> -
> -/*
> - * Called at boot time while the bootmem allocator is active,
> - * to allocate contiguous physical memory for the hash page
> - * tables for guests.
> - */
> -void __init kvm_linear_init(void)
> -{
> -	/* RMA */
> -	/* Only do this on PPC970 in HV mode */
> -	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
> -	    !cpu_has_feature(CPU_FTR_ARCH_201))
> -		return;
> -
> -	if (!kvm_rma_size || !kvm_rma_count)
> -		return;
> -
> -	/* Check that the requested size is one supported in hardware */
> -	if (lpcr_rmls(kvm_rma_size)<  0) {
> -		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
> -		return;
> -	}
> -
> -	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
> -}
> -
>   /**
>    * kvm_cma_reserve() - reserve area for kvm hash pagetable
>    *
> @@ -265,6 +176,8 @@ void __init kvm_cma_reserve(void)
>   			align_size = __rounddown_pow_of_two(selected_size);
>   		else
>   			align_size = HPT_ALIGN_PAGES<<  PAGE_SHIFT;
> +
> +		align_size = max(kvm_rma_pages<<  PAGE_SHIFT, align_size);
>   		kvm_cma_declare_contiguous(selected_size, align_size);
>   	}
>   }

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 15:17     ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:17 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>
> Older version of power architecture use Real Mode Offset register and Real Mode Limit
> Selector for mapping guest Real Mode Area. The guest RMA should be physically
> contigous since we use the range when address translation is not enabled.
>
> This patch switch RMA allocation code to use contigous memory allocator. The patch
> also remove the the linear allocator which not used any more
>
> Acked-by: Paul Mackerras<paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/kvm_book3s_64.h |   1 +
>   arch/powerpc/include/asm/kvm_host.h      |  12 +--
>   arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
>   arch/powerpc/kernel/setup_64.c           |   2 -
>   arch/powerpc/kvm/book3s_hv.c             |  27 +++--
>   arch/powerpc/kvm/book3s_hv_builtin.c     | 167 ++++++++-----------------------
>   6 files changed, 65 insertions(+), 152 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
> index f8355a9..76ff0b5 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -37,6 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
>
>   #ifdef CONFIG_KVM_BOOK3S_64_HV
>   #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
> +extern unsigned long kvm_rma_pages;
>   #endif
>
>   #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 0097dab..3328353 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
>   	struct page *pages[0];
>   };
>
> -struct kvmppc_linear_info {
> -	void		*base_virt;
> -	unsigned long	 base_pfn;
> -	unsigned long	 npages;
> -	struct list_head list;
> -	atomic_t	 use_count;
> -	int		 type;
> +struct kvm_rma_info {
> +	atomic_t use_count;
> +	unsigned long base_pfn;
>   };
>
>   /* XICS components, defined in book3s_xics.c */
> @@ -246,7 +242,7 @@ struct kvm_arch {
>   	int tlbie_lock;
>   	unsigned long lpcr;
>   	unsigned long rmor;
> -	struct kvmppc_linear_info *rma;
> +	struct kvm_rma_info *rma;
>   	unsigned long vrma_slb_v;
>   	int rma_setup_done;
>   	int using_mmu_notifiers;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index b5ef7a3..5a26bfc 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -137,8 +137,8 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
>   			     unsigned long ioba, unsigned long tce);
>   extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
>   				struct kvm_allocate_rma *rma);
> -extern struct kvmppc_linear_info *kvm_alloc_rma(void);
> -extern void kvm_release_rma(struct kvmppc_linear_info *ri);
> +extern struct kvm_rma_info *kvm_alloc_rma(void);
> +extern void kvm_release_rma(struct kvm_rma_info *ri);
>   extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
>   extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
>   extern int kvmppc_core_init_vm(struct kvm *kvm);
> @@ -282,7 +282,6 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
>   }
>
>   extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
> -extern void kvm_linear_init(void);
>
>   #else
>   static inline void __init kvm_cma_reserve(void)
> @@ -291,9 +290,6 @@ static inline void __init kvm_cma_reserve(void)
>   static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
>   {}
>
> -static inline void kvm_linear_init(void)
> -{}
> -
>   static inline u32 kvmppc_get_xics_latch(void)
>   {
>   	return 0;
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index ee28d1f..8a022f5 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -611,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
>   	/* Initialize the MMU context management stuff */
>   	mmu_context_init();
>
> -	kvm_linear_init();
> -
>   	/* Interrupt code needs to be 64K-aligned */
>   	if ((unsigned long)_stext&  0xffff)
>   		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 550f592..55c8519 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>
>   static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>   {
> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>   	struct page *page;
> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>
> -	if (vmf->pgoff>= ri->npages)
> +	if (vmf->pgoff>= kvm_rma_pages)
>   		return VM_FAULT_SIGBUS;
>
>   	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>
>   static int kvm_rma_release(struct inode *inode, struct file *filp)
>   {
> -	struct kvmppc_linear_info *ri = filp->private_data;
> +	struct kvm_rma_info *ri = filp->private_data;
>
>   	kvm_release_rma(ri);
>   	return 0;
> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>
>   long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>   {
> -	struct kvmppc_linear_info *ri;
>   	long fd;
> +	struct kvm_rma_info *ri;
> +	/*
> +	 * Only do this on PPC970 in HV mode
> +	 */
> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
> +		return -EINVAL;

Is this really what we want? User space may want to use an RMA on POWER7 
systems, no?

> +
> +	if (!kvm_rma_pages)
> +		return -EINVAL;
>
>   	ri = kvm_alloc_rma();
>   	if (!ri)
> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>   	if (fd<  0)
>   		kvm_release_rma(ri);
>
> -	ret->rma_size = ri->npages<<  PAGE_SHIFT;
> +	ret->rma_size = kvm_rma_pages<<  PAGE_SHIFT;
>   	return fd;
>   }
>
> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>   {
>   	int err = 0;
>   	struct kvm *kvm = vcpu->kvm;
> -	struct kvmppc_linear_info *ri = NULL;
> +	struct kvm_rma_info *ri = NULL;
>   	unsigned long hva;
>   	struct kvm_memory_slot *memslot;
>   	struct vm_area_struct *vma;
> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>
>   	} else {
>   		/* Set up to use an RMO region */
> -		rma_size = ri->npages;
> +		rma_size = kvm_rma_pages;
>   		if (rma_size>  memslot->npages)
>   			rma_size = memslot->npages;
>   		rma_size<<= PAGE_SHIFT;
> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>   			/* POWER7 */
>   			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>   			lpcr |= rmls<<  LPCR_RMLS_SH;
> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<  PAGE_SHIFT;
> +			kvm->arch.rmor = ri->base_pfn<<  PAGE_SHIFT;
>   		}
>   		kvm->arch.lpcr = lpcr;
>   		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>   			ri->base_pfn<<  PAGE_SHIFT, rma_size, lpcr);
>
>   		/* Initialize phys addrs of pages in RMO */
> -		npages = ri->npages;
> +		npages = kvm_rma_pages;
>   		porder = __ilog2(npages);
>   		physp = memslot->arch.slot_phys;
>   		if (physp) {
> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
> index 4b865c5..8cd0dae 100644
> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
> @@ -21,13 +21,6 @@
>   #include<asm/kvm_book3s.h>
>
>   #include "book3s_hv_cma.h"
> -
> -#define KVM_LINEAR_RMA		0
> -#define KVM_LINEAR_HPT		1
> -
> -static void __init kvm_linear_init_one(ulong size, int count, int type);
> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>   /*
>    * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>    * should be power of 2.
> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>    * By default we reserve 5% of memory for hash pagetable allocation.
>    */
>   static unsigned long kvm_cma_resv_ratio = 5;
> -
> -/*************** RMA *************/
> -
>   /*
> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>    * Each RMA has to be physically contiguous and of a size that the
>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>    * and other larger sizes.  Since we are unlikely to be allocate that
>    * much physically contiguous memory after the system is up and running,
> - * we preallocate a set of RMAs in early boot for KVM to use.
> + * we preallocate a set of RMAs in early boot using CMA.
> + * should be power of 2.
>    */
> -static unsigned long kvm_rma_size = 64<<  20;	/* 64MB */
> -static unsigned long kvm_rma_count;
> +unsigned long kvm_rma_pages = (1<<  27)>>  PAGE_SHIFT;	/* 128MB */
> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>
>   /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>      Assumes POWER7 or PPC970. */
> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>
>   static int __init early_parse_rma_size(char *p)
>   {
> -	if (!p)
> -		return 1;
> +	unsigned long kvm_rma_size;
>
> +	pr_debug("%s(%s)\n", __func__, p);
> +	if (!p)
> +		return -EINVAL;
>   	kvm_rma_size = memparse(p,&p);
> -
> +	/*
> +	 * Check that the requested size is one supported in hardware
> +	 */
> +	if (lpcr_rmls(kvm_rma_size)<  0) {
> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
> +		return -EINVAL;
> +	}
> +	kvm_rma_pages = kvm_rma_size>>  PAGE_SHIFT;
>   	return 0;
>   }
>   early_param("kvm_rma_size", early_parse_rma_size);
>
> -static int __init early_parse_rma_count(char *p)
> +struct kvm_rma_info *kvm_alloc_rma()
>   {
> -	if (!p)
> -		return 1;
> -
> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
> -
> -	return 0;
> -}
> -early_param("kvm_rma_count", early_parse_rma_count);
> -
> -struct kvmppc_linear_info *kvm_alloc_rma(void)
> -{
> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
> +	struct page *page;
> +	struct kvm_rma_info *ri;
> +
> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
> +	if (!ri)
> +		return NULL;
> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
> +	if (!page)
> +		goto err_out;
> +	atomic_set(&ri->use_count, 1);
> +	ri->base_pfn = page_to_pfn(page);
> +	return ri;
> +err_out:
> +	kfree(ri);
> +	return NULL;
>   }
>   EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>
> -void kvm_release_rma(struct kvmppc_linear_info *ri)
> +void kvm_release_rma(struct kvm_rma_info *ri)
>   {
> -	kvm_release_linear(ri);
> +	if (atomic_dec_and_test(&ri->use_count)) {
> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
> +		kfree(ri);
> +	}
>   }
>   EXPORT_SYMBOL_GPL(kvm_release_rma);
>
> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>   }
>   EXPORT_SYMBOL_GPL(kvm_release_hpt);
>
> -/*************** generic *************/
> -
> -static LIST_HEAD(free_linears);
> -static DEFINE_SPINLOCK(linear_lock);
> -
> -static void __init kvm_linear_init_one(ulong size, int count, int type)

Please split the linar removal bits out into a separate patch :).


Alex

> -{
> -	unsigned long i;
> -	unsigned long j, npages;
> -	void *linear;
> -	struct page *pg;
> -	const char *typestr;
> -	struct kvmppc_linear_info *linear_info;
> -
> -	if (!count)
> -		return;
> -
> -	typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
> -
> -	npages = size>>  PAGE_SHIFT;
> -	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
> -	for (i = 0; i<  count; ++i) {
> -		linear = alloc_bootmem_align(size, size);
> -		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
> -			 size>>  20);
> -		linear_info[i].base_virt = linear;
> -		linear_info[i].base_pfn = __pa(linear)>>  PAGE_SHIFT;
> -		linear_info[i].npages = npages;
> -		linear_info[i].type = type;
> -		list_add_tail(&linear_info[i].list,&free_linears);
> -		atomic_set(&linear_info[i].use_count, 0);
> -
> -		pg = pfn_to_page(linear_info[i].base_pfn);
> -		for (j = 0; j<  npages; ++j) {
> -			atomic_inc(&pg->_count);
> -			++pg;
> -		}
> -	}
> -}
> -
> -static struct kvmppc_linear_info *kvm_alloc_linear(int type)
> -{
> -	struct kvmppc_linear_info *ri, *ret;
> -
> -	ret = NULL;
> -	spin_lock(&linear_lock);
> -	list_for_each_entry(ri,&free_linears, list) {
> -		if (ri->type != type)
> -			continue;
> -
> -		list_del(&ri->list);
> -		atomic_inc(&ri->use_count);
> -		memset(ri->base_virt, 0, ri->npages<<  PAGE_SHIFT);
> -		ret = ri;
> -		break;
> -	}
> -	spin_unlock(&linear_lock);
> -	return ret;
> -}
> -
> -static void kvm_release_linear(struct kvmppc_linear_info *ri)
> -{
> -	if (atomic_dec_and_test(&ri->use_count)) {
> -		spin_lock(&linear_lock);
> -		list_add_tail(&ri->list,&free_linears);
> -		spin_unlock(&linear_lock);
> -
> -	}
> -}
> -
> -/*
> - * Called at boot time while the bootmem allocator is active,
> - * to allocate contiguous physical memory for the hash page
> - * tables for guests.
> - */
> -void __init kvm_linear_init(void)
> -{
> -	/* RMA */
> -	/* Only do this on PPC970 in HV mode */
> -	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
> -	    !cpu_has_feature(CPU_FTR_ARCH_201))
> -		return;
> -
> -	if (!kvm_rma_size || !kvm_rma_count)
> -		return;
> -
> -	/* Check that the requested size is one supported in hardware */
> -	if (lpcr_rmls(kvm_rma_size)<  0) {
> -		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
> -		return;
> -	}
> -
> -	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
> -}
> -
>   /**
>    * kvm_cma_reserve() - reserve area for kvm hash pagetable
>    *
> @@ -265,6 +176,8 @@ void __init kvm_cma_reserve(void)
>   			align_size = __rounddown_pow_of_two(selected_size);
>   		else
>   			align_size = HPT_ALIGN_PAGES<<  PAGE_SHIFT;
> +
> +		align_size = max(kvm_rma_pages<<  PAGE_SHIFT, align_size);
>   		kvm_cma_declare_contiguous(selected_size, align_size);
>   	}
>   }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 15:17     ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:17 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>
> Older version of power architecture use Real Mode Offset register and Real Mode Limit
> Selector for mapping guest Real Mode Area. The guest RMA should be physically
> contigous since we use the range when address translation is not enabled.
>
> This patch switch RMA allocation code to use contigous memory allocator. The patch
> also remove the the linear allocator which not used any more
>
> Acked-by: Paul Mackerras<paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/kvm_book3s_64.h |   1 +
>   arch/powerpc/include/asm/kvm_host.h      |  12 +--
>   arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
>   arch/powerpc/kernel/setup_64.c           |   2 -
>   arch/powerpc/kvm/book3s_hv.c             |  27 +++--
>   arch/powerpc/kvm/book3s_hv_builtin.c     | 167 ++++++++-----------------------
>   6 files changed, 65 insertions(+), 152 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
> index f8355a9..76ff0b5 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -37,6 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
>
>   #ifdef CONFIG_KVM_BOOK3S_64_HV
>   #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
> +extern unsigned long kvm_rma_pages;
>   #endif
>
>   #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 0097dab..3328353 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
>   	struct page *pages[0];
>   };
>
> -struct kvmppc_linear_info {
> -	void		*base_virt;
> -	unsigned long	 base_pfn;
> -	unsigned long	 npages;
> -	struct list_head list;
> -	atomic_t	 use_count;
> -	int		 type;
> +struct kvm_rma_info {
> +	atomic_t use_count;
> +	unsigned long base_pfn;
>   };
>
>   /* XICS components, defined in book3s_xics.c */
> @@ -246,7 +242,7 @@ struct kvm_arch {
>   	int tlbie_lock;
>   	unsigned long lpcr;
>   	unsigned long rmor;
> -	struct kvmppc_linear_info *rma;
> +	struct kvm_rma_info *rma;
>   	unsigned long vrma_slb_v;
>   	int rma_setup_done;
>   	int using_mmu_notifiers;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index b5ef7a3..5a26bfc 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -137,8 +137,8 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
>   			     unsigned long ioba, unsigned long tce);
>   extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
>   				struct kvm_allocate_rma *rma);
> -extern struct kvmppc_linear_info *kvm_alloc_rma(void);
> -extern void kvm_release_rma(struct kvmppc_linear_info *ri);
> +extern struct kvm_rma_info *kvm_alloc_rma(void);
> +extern void kvm_release_rma(struct kvm_rma_info *ri);
>   extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
>   extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
>   extern int kvmppc_core_init_vm(struct kvm *kvm);
> @@ -282,7 +282,6 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
>   }
>
>   extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
> -extern void kvm_linear_init(void);
>
>   #else
>   static inline void __init kvm_cma_reserve(void)
> @@ -291,9 +290,6 @@ static inline void __init kvm_cma_reserve(void)
>   static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
>   {}
>
> -static inline void kvm_linear_init(void)
> -{}
> -
>   static inline u32 kvmppc_get_xics_latch(void)
>   {
>   	return 0;
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index ee28d1f..8a022f5 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -611,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
>   	/* Initialize the MMU context management stuff */
>   	mmu_context_init();
>
> -	kvm_linear_init();
> -
>   	/* Interrupt code needs to be 64K-aligned */
>   	if ((unsigned long)_stext&  0xffff)
>   		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 550f592..55c8519 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>
>   static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>   {
> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>   	struct page *page;
> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>
> -	if (vmf->pgoff>= ri->npages)
> +	if (vmf->pgoff>= kvm_rma_pages)
>   		return VM_FAULT_SIGBUS;
>
>   	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>
>   static int kvm_rma_release(struct inode *inode, struct file *filp)
>   {
> -	struct kvmppc_linear_info *ri = filp->private_data;
> +	struct kvm_rma_info *ri = filp->private_data;
>
>   	kvm_release_rma(ri);
>   	return 0;
> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>
>   long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>   {
> -	struct kvmppc_linear_info *ri;
>   	long fd;
> +	struct kvm_rma_info *ri;
> +	/*
> +	 * Only do this on PPC970 in HV mode
> +	 */
> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
> +		return -EINVAL;

Is this really what we want? User space may want to use an RMA on POWER7 
systems, no?

> +
> +	if (!kvm_rma_pages)
> +		return -EINVAL;
>
>   	ri = kvm_alloc_rma();
>   	if (!ri)
> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>   	if (fd<  0)
>   		kvm_release_rma(ri);
>
> -	ret->rma_size = ri->npages<<  PAGE_SHIFT;
> +	ret->rma_size = kvm_rma_pages<<  PAGE_SHIFT;
>   	return fd;
>   }
>
> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>   {
>   	int err = 0;
>   	struct kvm *kvm = vcpu->kvm;
> -	struct kvmppc_linear_info *ri = NULL;
> +	struct kvm_rma_info *ri = NULL;
>   	unsigned long hva;
>   	struct kvm_memory_slot *memslot;
>   	struct vm_area_struct *vma;
> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>
>   	} else {
>   		/* Set up to use an RMO region */
> -		rma_size = ri->npages;
> +		rma_size = kvm_rma_pages;
>   		if (rma_size>  memslot->npages)
>   			rma_size = memslot->npages;
>   		rma_size<<= PAGE_SHIFT;
> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>   			/* POWER7 */
>   			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>   			lpcr |= rmls<<  LPCR_RMLS_SH;
> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<  PAGE_SHIFT;
> +			kvm->arch.rmor = ri->base_pfn<<  PAGE_SHIFT;
>   		}
>   		kvm->arch.lpcr = lpcr;
>   		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>   			ri->base_pfn<<  PAGE_SHIFT, rma_size, lpcr);
>
>   		/* Initialize phys addrs of pages in RMO */
> -		npages = ri->npages;
> +		npages = kvm_rma_pages;
>   		porder = __ilog2(npages);
>   		physp = memslot->arch.slot_phys;
>   		if (physp) {
> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
> index 4b865c5..8cd0dae 100644
> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
> @@ -21,13 +21,6 @@
>   #include<asm/kvm_book3s.h>
>
>   #include "book3s_hv_cma.h"
> -
> -#define KVM_LINEAR_RMA		0
> -#define KVM_LINEAR_HPT		1
> -
> -static void __init kvm_linear_init_one(ulong size, int count, int type);
> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>   /*
>    * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>    * should be power of 2.
> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>    * By default we reserve 5% of memory for hash pagetable allocation.
>    */
>   static unsigned long kvm_cma_resv_ratio = 5;
> -
> -/*************** RMA *************/
> -
>   /*
> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>    * Each RMA has to be physically contiguous and of a size that the
>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>    * and other larger sizes.  Since we are unlikely to be allocate that
>    * much physically contiguous memory after the system is up and running,
> - * we preallocate a set of RMAs in early boot for KVM to use.
> + * we preallocate a set of RMAs in early boot using CMA.
> + * should be power of 2.
>    */
> -static unsigned long kvm_rma_size = 64<<  20;	/* 64MB */
> -static unsigned long kvm_rma_count;
> +unsigned long kvm_rma_pages = (1<<  27)>>  PAGE_SHIFT;	/* 128MB */
> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>
>   /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>      Assumes POWER7 or PPC970. */
> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>
>   static int __init early_parse_rma_size(char *p)
>   {
> -	if (!p)
> -		return 1;
> +	unsigned long kvm_rma_size;
>
> +	pr_debug("%s(%s)\n", __func__, p);
> +	if (!p)
> +		return -EINVAL;
>   	kvm_rma_size = memparse(p,&p);
> -
> +	/*
> +	 * Check that the requested size is one supported in hardware
> +	 */
> +	if (lpcr_rmls(kvm_rma_size)<  0) {
> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
> +		return -EINVAL;
> +	}
> +	kvm_rma_pages = kvm_rma_size>>  PAGE_SHIFT;
>   	return 0;
>   }
>   early_param("kvm_rma_size", early_parse_rma_size);
>
> -static int __init early_parse_rma_count(char *p)
> +struct kvm_rma_info *kvm_alloc_rma()
>   {
> -	if (!p)
> -		return 1;
> -
> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
> -
> -	return 0;
> -}
> -early_param("kvm_rma_count", early_parse_rma_count);
> -
> -struct kvmppc_linear_info *kvm_alloc_rma(void)
> -{
> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
> +	struct page *page;
> +	struct kvm_rma_info *ri;
> +
> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
> +	if (!ri)
> +		return NULL;
> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
> +	if (!page)
> +		goto err_out;
> +	atomic_set(&ri->use_count, 1);
> +	ri->base_pfn = page_to_pfn(page);
> +	return ri;
> +err_out:
> +	kfree(ri);
> +	return NULL;
>   }
>   EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>
> -void kvm_release_rma(struct kvmppc_linear_info *ri)
> +void kvm_release_rma(struct kvm_rma_info *ri)
>   {
> -	kvm_release_linear(ri);
> +	if (atomic_dec_and_test(&ri->use_count)) {
> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
> +		kfree(ri);
> +	}
>   }
>   EXPORT_SYMBOL_GPL(kvm_release_rma);
>
> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>   }
>   EXPORT_SYMBOL_GPL(kvm_release_hpt);
>
> -/*************** generic *************/
> -
> -static LIST_HEAD(free_linears);
> -static DEFINE_SPINLOCK(linear_lock);
> -
> -static void __init kvm_linear_init_one(ulong size, int count, int type)

Please split the linar removal bits out into a separate patch :).


Alex

> -{
> -	unsigned long i;
> -	unsigned long j, npages;
> -	void *linear;
> -	struct page *pg;
> -	const char *typestr;
> -	struct kvmppc_linear_info *linear_info;
> -
> -	if (!count)
> -		return;
> -
> -	typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT";
> -
> -	npages = size>>  PAGE_SHIFT;
> -	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
> -	for (i = 0; i<  count; ++i) {
> -		linear = alloc_bootmem_align(size, size);
> -		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
> -			 size>>  20);
> -		linear_info[i].base_virt = linear;
> -		linear_info[i].base_pfn = __pa(linear)>>  PAGE_SHIFT;
> -		linear_info[i].npages = npages;
> -		linear_info[i].type = type;
> -		list_add_tail(&linear_info[i].list,&free_linears);
> -		atomic_set(&linear_info[i].use_count, 0);
> -
> -		pg = pfn_to_page(linear_info[i].base_pfn);
> -		for (j = 0; j<  npages; ++j) {
> -			atomic_inc(&pg->_count);
> -			++pg;
> -		}
> -	}
> -}
> -
> -static struct kvmppc_linear_info *kvm_alloc_linear(int type)
> -{
> -	struct kvmppc_linear_info *ri, *ret;
> -
> -	ret = NULL;
> -	spin_lock(&linear_lock);
> -	list_for_each_entry(ri,&free_linears, list) {
> -		if (ri->type != type)
> -			continue;
> -
> -		list_del(&ri->list);
> -		atomic_inc(&ri->use_count);
> -		memset(ri->base_virt, 0, ri->npages<<  PAGE_SHIFT);
> -		ret = ri;
> -		break;
> -	}
> -	spin_unlock(&linear_lock);
> -	return ret;
> -}
> -
> -static void kvm_release_linear(struct kvmppc_linear_info *ri)
> -{
> -	if (atomic_dec_and_test(&ri->use_count)) {
> -		spin_lock(&linear_lock);
> -		list_add_tail(&ri->list,&free_linears);
> -		spin_unlock(&linear_lock);
> -
> -	}
> -}
> -
> -/*
> - * Called at boot time while the bootmem allocator is active,
> - * to allocate contiguous physical memory for the hash page
> - * tables for guests.
> - */
> -void __init kvm_linear_init(void)
> -{
> -	/* RMA */
> -	/* Only do this on PPC970 in HV mode */
> -	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
> -	    !cpu_has_feature(CPU_FTR_ARCH_201))
> -		return;
> -
> -	if (!kvm_rma_size || !kvm_rma_count)
> -		return;
> -
> -	/* Check that the requested size is one supported in hardware */
> -	if (lpcr_rmls(kvm_rma_size)<  0) {
> -		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
> -		return;
> -	}
> -
> -	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
> -}
> -
>   /**
>    * kvm_cma_reserve() - reserve area for kvm hash pagetable
>    *
> @@ -265,6 +176,8 @@ void __init kvm_cma_reserve(void)
>   			align_size = __rounddown_pow_of_two(selected_size);
>   		else
>   			align_size = HPT_ALIGN_PAGES<<  PAGE_SHIFT;
> +
> +		align_size = max(kvm_rma_pages<<  PAGE_SHIFT, align_size);
>   		kvm_cma_declare_contiguous(selected_size, align_size);
>   	}
>   }

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 15:17     ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:17 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>
> Older version of power architecture use Real Mode Offset register and Real Mode Limit
> Selector for mapping guest Real Mode Area. The guest RMA should be physically
> contigous since we use the range when address translation is not enabled.
>
> This patch switch RMA allocation code to use contigous memory allocator. The patch
> also remove the the linear allocator which not used any more
>
> Acked-by: Paul Mackerras<paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/kvm_book3s_64.h |   1 +
>   arch/powerpc/include/asm/kvm_host.h      |  12 +--
>   arch/powerpc/include/asm/kvm_ppc.h       |   8 +-
>   arch/powerpc/kernel/setup_64.c           |   2 -
>   arch/powerpc/kvm/book3s_hv.c             |  27 +++--
>   arch/powerpc/kvm/book3s_hv_builtin.c     | 167 ++++++++-----------------------
>   6 files changed, 65 insertions(+), 152 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
> index f8355a9..76ff0b5 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_64.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_64.h
> @@ -37,6 +37,7 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
>
>   #ifdef CONFIG_KVM_BOOK3S_64_HV
>   #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
> +extern unsigned long kvm_rma_pages;
>   #endif
>
>   #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 0097dab..3328353 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -183,13 +183,9 @@ struct kvmppc_spapr_tce_table {
>   	struct page *pages[0];
>   };
>
> -struct kvmppc_linear_info {
> -	void		*base_virt;
> -	unsigned long	 base_pfn;
> -	unsigned long	 npages;
> -	struct list_head list;
> -	atomic_t	 use_count;
> -	int		 type;
> +struct kvm_rma_info {
> +	atomic_t use_count;
> +	unsigned long base_pfn;
>   };
>
>   /* XICS components, defined in book3s_xics.c */
> @@ -246,7 +242,7 @@ struct kvm_arch {
>   	int tlbie_lock;
>   	unsigned long lpcr;
>   	unsigned long rmor;
> -	struct kvmppc_linear_info *rma;
> +	struct kvm_rma_info *rma;
>   	unsigned long vrma_slb_v;
>   	int rma_setup_done;
>   	int using_mmu_notifiers;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index b5ef7a3..5a26bfc 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -137,8 +137,8 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
>   			     unsigned long ioba, unsigned long tce);
>   extern long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
>   				struct kvm_allocate_rma *rma);
> -extern struct kvmppc_linear_info *kvm_alloc_rma(void);
> -extern void kvm_release_rma(struct kvmppc_linear_info *ri);
> +extern struct kvm_rma_info *kvm_alloc_rma(void);
> +extern void kvm_release_rma(struct kvm_rma_info *ri);
>   extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
>   extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
>   extern int kvmppc_core_init_vm(struct kvm *kvm);
> @@ -282,7 +282,6 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
>   }
>
>   extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu);
> -extern void kvm_linear_init(void);
>
>   #else
>   static inline void __init kvm_cma_reserve(void)
> @@ -291,9 +290,6 @@ static inline void __init kvm_cma_reserve(void)
>   static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
>   {}
>
> -static inline void kvm_linear_init(void)
> -{}
> -
>   static inline u32 kvmppc_get_xics_latch(void)
>   {
>   	return 0;
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index ee28d1f..8a022f5 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -611,8 +611,6 @@ void __init setup_arch(char **cmdline_p)
>   	/* Initialize the MMU context management stuff */
>   	mmu_context_init();
>
> -	kvm_linear_init();
> -
>   	/* Interrupt code needs to be 64K-aligned */
>   	if ((unsigned long)_stext&  0xffff)
>   		panic("Kernelbase not 64K-aligned (0x%lx)!\n",
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 550f592..55c8519 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>
>   static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>   {
> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>   	struct page *page;
> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>
> -	if (vmf->pgoff>= ri->npages)
> +	if (vmf->pgoff>= kvm_rma_pages)
>   		return VM_FAULT_SIGBUS;
>
>   	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>
>   static int kvm_rma_release(struct inode *inode, struct file *filp)
>   {
> -	struct kvmppc_linear_info *ri = filp->private_data;
> +	struct kvm_rma_info *ri = filp->private_data;
>
>   	kvm_release_rma(ri);
>   	return 0;
> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>
>   long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>   {
> -	struct kvmppc_linear_info *ri;
>   	long fd;
> +	struct kvm_rma_info *ri;
> +	/*
> +	 * Only do this on PPC970 in HV mode
> +	 */
> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
> +		return -EINVAL;

Is this really what we want? User space may want to use an RMA on POWER7 
systems, no?

> +
> +	if (!kvm_rma_pages)
> +		return -EINVAL;
>
>   	ri = kvm_alloc_rma();
>   	if (!ri)
> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>   	if (fd<  0)
>   		kvm_release_rma(ri);
>
> -	ret->rma_size = ri->npages<<  PAGE_SHIFT;
> +	ret->rma_size = kvm_rma_pages<<  PAGE_SHIFT;
>   	return fd;
>   }
>
> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>   {
>   	int err = 0;
>   	struct kvm *kvm = vcpu->kvm;
> -	struct kvmppc_linear_info *ri = NULL;
> +	struct kvm_rma_info *ri = NULL;
>   	unsigned long hva;
>   	struct kvm_memory_slot *memslot;
>   	struct vm_area_struct *vma;
> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>
>   	} else {
>   		/* Set up to use an RMO region */
> -		rma_size = ri->npages;
> +		rma_size = kvm_rma_pages;
>   		if (rma_size>  memslot->npages)
>   			rma_size = memslot->npages;
>   		rma_size<<= PAGE_SHIFT;
> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>   			/* POWER7 */
>   			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>   			lpcr |= rmls<<  LPCR_RMLS_SH;
> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<  PAGE_SHIFT;
> +			kvm->arch.rmor = ri->base_pfn<<  PAGE_SHIFT;
>   		}
>   		kvm->arch.lpcr = lpcr;
>   		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>   			ri->base_pfn<<  PAGE_SHIFT, rma_size, lpcr);
>
>   		/* Initialize phys addrs of pages in RMO */
> -		npages = ri->npages;
> +		npages = kvm_rma_pages;
>   		porder = __ilog2(npages);
>   		physp = memslot->arch.slot_phys;
>   		if (physp) {
> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
> index 4b865c5..8cd0dae 100644
> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
> @@ -21,13 +21,6 @@
>   #include<asm/kvm_book3s.h>
>
>   #include "book3s_hv_cma.h"
> -
> -#define KVM_LINEAR_RMA		0
> -#define KVM_LINEAR_HPT		1
> -
> -static void __init kvm_linear_init_one(ulong size, int count, int type);
> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>   /*
>    * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>    * should be power of 2.
> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>    * By default we reserve 5% of memory for hash pagetable allocation.
>    */
>   static unsigned long kvm_cma_resv_ratio = 5;
> -
> -/*************** RMA *************/
> -
>   /*
> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>    * Each RMA has to be physically contiguous and of a size that the
>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>    * and other larger sizes.  Since we are unlikely to be allocate that
>    * much physically contiguous memory after the system is up and running,
> - * we preallocate a set of RMAs in early boot for KVM to use.
> + * we preallocate a set of RMAs in early boot using CMA.
> + * should be power of 2.
>    */
> -static unsigned long kvm_rma_size = 64<<  20;	/* 64MB */
> -static unsigned long kvm_rma_count;
> +unsigned long kvm_rma_pages = (1<<  27)>>  PAGE_SHIFT;	/* 128MB */
> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>
>   /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>      Assumes POWER7 or PPC970. */
> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>
>   static int __init early_parse_rma_size(char *p)
>   {
> -	if (!p)
> -		return 1;
> +	unsigned long kvm_rma_size;
>
> +	pr_debug("%s(%s)\n", __func__, p);
> +	if (!p)
> +		return -EINVAL;
>   	kvm_rma_size = memparse(p,&p);
> -
> +	/*
> +	 * Check that the requested size is one supported in hardware
> +	 */
> +	if (lpcr_rmls(kvm_rma_size)<  0) {
> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
> +		return -EINVAL;
> +	}
> +	kvm_rma_pages = kvm_rma_size>>  PAGE_SHIFT;
>   	return 0;
>   }
>   early_param("kvm_rma_size", early_parse_rma_size);
>
> -static int __init early_parse_rma_count(char *p)
> +struct kvm_rma_info *kvm_alloc_rma()
>   {
> -	if (!p)
> -		return 1;
> -
> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
> -
> -	return 0;
> -}
> -early_param("kvm_rma_count", early_parse_rma_count);
> -
> -struct kvmppc_linear_info *kvm_alloc_rma(void)
> -{
> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
> +	struct page *page;
> +	struct kvm_rma_info *ri;
> +
> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
> +	if (!ri)
> +		return NULL;
> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
> +	if (!page)
> +		goto err_out;
> +	atomic_set(&ri->use_count, 1);
> +	ri->base_pfn = page_to_pfn(page);
> +	return ri;
> +err_out:
> +	kfree(ri);
> +	return NULL;
>   }
>   EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>
> -void kvm_release_rma(struct kvmppc_linear_info *ri)
> +void kvm_release_rma(struct kvm_rma_info *ri)
>   {
> -	kvm_release_linear(ri);
> +	if (atomic_dec_and_test(&ri->use_count)) {
> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
> +		kfree(ri);
> +	}
>   }
>   EXPORT_SYMBOL_GPL(kvm_release_rma);
>
> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>   }
>   EXPORT_SYMBOL_GPL(kvm_release_hpt);
>
> -/*************** generic *************/
> -
> -static LIST_HEAD(free_linears);
> -static DEFINE_SPINLOCK(linear_lock);
> -
> -static void __init kvm_linear_init_one(ulong size, int count, int type)

Please split the linar removal bits out into a separate patch :).


Alex

> -{
> -	unsigned long i;
> -	unsigned long j, npages;
> -	void *linear;
> -	struct page *pg;
> -	const char *typestr;
> -	struct kvmppc_linear_info *linear_info;
> -
> -	if (!count)
> -		return;
> -
> -	typestr = (type = KVM_LINEAR_RMA) ? "RMA" : "HPT";
> -
> -	npages = size>>  PAGE_SHIFT;
> -	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
> -	for (i = 0; i<  count; ++i) {
> -		linear = alloc_bootmem_align(size, size);
> -		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
> -			 size>>  20);
> -		linear_info[i].base_virt = linear;
> -		linear_info[i].base_pfn = __pa(linear)>>  PAGE_SHIFT;
> -		linear_info[i].npages = npages;
> -		linear_info[i].type = type;
> -		list_add_tail(&linear_info[i].list,&free_linears);
> -		atomic_set(&linear_info[i].use_count, 0);
> -
> -		pg = pfn_to_page(linear_info[i].base_pfn);
> -		for (j = 0; j<  npages; ++j) {
> -			atomic_inc(&pg->_count);
> -			++pg;
> -		}
> -	}
> -}
> -
> -static struct kvmppc_linear_info *kvm_alloc_linear(int type)
> -{
> -	struct kvmppc_linear_info *ri, *ret;
> -
> -	ret = NULL;
> -	spin_lock(&linear_lock);
> -	list_for_each_entry(ri,&free_linears, list) {
> -		if (ri->type != type)
> -			continue;
> -
> -		list_del(&ri->list);
> -		atomic_inc(&ri->use_count);
> -		memset(ri->base_virt, 0, ri->npages<<  PAGE_SHIFT);
> -		ret = ri;
> -		break;
> -	}
> -	spin_unlock(&linear_lock);
> -	return ret;
> -}
> -
> -static void kvm_release_linear(struct kvmppc_linear_info *ri)
> -{
> -	if (atomic_dec_and_test(&ri->use_count)) {
> -		spin_lock(&linear_lock);
> -		list_add_tail(&ri->list,&free_linears);
> -		spin_unlock(&linear_lock);
> -
> -	}
> -}
> -
> -/*
> - * Called at boot time while the bootmem allocator is active,
> - * to allocate contiguous physical memory for the hash page
> - * tables for guests.
> - */
> -void __init kvm_linear_init(void)
> -{
> -	/* RMA */
> -	/* Only do this on PPC970 in HV mode */
> -	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
> -	    !cpu_has_feature(CPU_FTR_ARCH_201))
> -		return;
> -
> -	if (!kvm_rma_size || !kvm_rma_count)
> -		return;
> -
> -	/* Check that the requested size is one supported in hardware */
> -	if (lpcr_rmls(kvm_rma_size)<  0) {
> -		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
> -		return;
> -	}
> -
> -	kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA);
> -}
> -
>   /**
>    * kvm_cma_reserve() - reserve area for kvm hash pagetable
>    *
> @@ -265,6 +176,8 @@ void __init kvm_cma_reserve(void)
>   			align_size = __rounddown_pow_of_two(selected_size);
>   		else
>   			align_size = HPT_ALIGN_PAGES<<  PAGE_SHIFT;
> +
> +		align_size = max(kvm_rma_pages<<  PAGE_SHIFT, align_size);
>   		kvm_cma_declare_contiguous(selected_size, align_size);
>   	}
>   }


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
  2013-07-02 15:17     ` Alexander Graf
  (?)
  (?)
@ 2013-07-02 15:29       ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:29 UTC (permalink / raw)
  To: Alexander Graf
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>
>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>> contigous since we use the range when address translation is not enabled.
>>
>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>> also remove the the linear allocator which not used any more
>>
>> Acked-by: Paul Mackerras<paulus@samba.org>
>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>> ---

.... snip ....

>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 550f592..55c8519 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>
>>   static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>   {
>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>   	struct page *page;
>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>
>> -	if (vmf->pgoff>= ri->npages)
>> +	if (vmf->pgoff>= kvm_rma_pages)
>>   		return VM_FAULT_SIGBUS;
>>
>>   	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>
>>   static int kvm_rma_release(struct inode *inode, struct file *filp)
>>   {
>> -	struct kvmppc_linear_info *ri = filp->private_data;
>> +	struct kvm_rma_info *ri = filp->private_data;
>>
>>   	kvm_release_rma(ri);
>>   	return 0;
>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>
>>   long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>   {
>> -	struct kvmppc_linear_info *ri;
>>   	long fd;
>> +	struct kvm_rma_info *ri;
>> +	/*
>> +	 * Only do this on PPC970 in HV mode
>> +	 */
>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>> +		return -EINVAL;
>
> Is this really what we want? User space may want to use an RMA on POWER7 
> systems, no?

IIUC they will use virtual real mode area (VRMA) and not RMA

>
>> +
>> +	if (!kvm_rma_pages)
>> +		return -EINVAL;
>>
>>   	ri = kvm_alloc_rma();
>>   	if (!ri)
>> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>   	if (fd<  0)
>>   		kvm_release_rma(ri);
>>
>> -	ret->rma_size = ri->npages<<  PAGE_SHIFT;
>> +	ret->rma_size = kvm_rma_pages<<  PAGE_SHIFT;
>>   	return fd;
>>   }
>>
>> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>   {
>>   	int err = 0;
>>   	struct kvm *kvm = vcpu->kvm;
>> -	struct kvmppc_linear_info *ri = NULL;
>> +	struct kvm_rma_info *ri = NULL;
>>   	unsigned long hva;
>>   	struct kvm_memory_slot *memslot;
>>   	struct vm_area_struct *vma;
>> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>
>>   	} else {
>>   		/* Set up to use an RMO region */
>> -		rma_size = ri->npages;
>> +		rma_size = kvm_rma_pages;
>>   		if (rma_size>  memslot->npages)
>>   			rma_size = memslot->npages;
>>   		rma_size<<= PAGE_SHIFT;
>> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>   			/* POWER7 */
>>   			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>>   			lpcr |= rmls<<  LPCR_RMLS_SH;
>> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<  PAGE_SHIFT;
>> +			kvm->arch.rmor = ri->base_pfn<<  PAGE_SHIFT;
>>   		}
>>   		kvm->arch.lpcr = lpcr;
>>   		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>>   			ri->base_pfn<<  PAGE_SHIFT, rma_size, lpcr);
>>
>>   		/* Initialize phys addrs of pages in RMO */
>> -		npages = ri->npages;
>> +		npages = kvm_rma_pages;
>>   		porder = __ilog2(npages);
>>   		physp = memslot->arch.slot_phys;
>>   		if (physp) {
>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
>> index 4b865c5..8cd0dae 100644
>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>> @@ -21,13 +21,6 @@
>>   #include<asm/kvm_book3s.h>
>>
>>   #include "book3s_hv_cma.h"
>> -
>> -#define KVM_LINEAR_RMA		0
>> -#define KVM_LINEAR_HPT		1
>> -
>> -static void __init kvm_linear_init_one(ulong size, int count, int type);
>> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
>> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>   /*
>>    * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>>    * should be power of 2.
>> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>    * By default we reserve 5% of memory for hash pagetable allocation.
>>    */
>>   static unsigned long kvm_cma_resv_ratio = 5;
>> -
>> -/*************** RMA *************/
>> -
>>   /*
>> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
>> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>>    * Each RMA has to be physically contiguous and of a size that the
>>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>>    * and other larger sizes.  Since we are unlikely to be allocate that
>>    * much physically contiguous memory after the system is up and running,
>> - * we preallocate a set of RMAs in early boot for KVM to use.
>> + * we preallocate a set of RMAs in early boot using CMA.
>> + * should be power of 2.
>>    */
>> -static unsigned long kvm_rma_size = 64<<  20;	/* 64MB */
>> -static unsigned long kvm_rma_count;
>> +unsigned long kvm_rma_pages = (1<<  27)>>  PAGE_SHIFT;	/* 128MB */
>> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>>
>>   /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>>      Assumes POWER7 or PPC970. */
>> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>
>>   static int __init early_parse_rma_size(char *p)
>>   {
>> -	if (!p)
>> -		return 1;
>> +	unsigned long kvm_rma_size;
>>
>> +	pr_debug("%s(%s)\n", __func__, p);
>> +	if (!p)
>> +		return -EINVAL;
>>   	kvm_rma_size = memparse(p,&p);
>> -
>> +	/*
>> +	 * Check that the requested size is one supported in hardware
>> +	 */
>> +	if (lpcr_rmls(kvm_rma_size)<  0) {
>> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
>> +		return -EINVAL;
>> +	}
>> +	kvm_rma_pages = kvm_rma_size>>  PAGE_SHIFT;
>>   	return 0;
>>   }
>>   early_param("kvm_rma_size", early_parse_rma_size);
>>
>> -static int __init early_parse_rma_count(char *p)
>> +struct kvm_rma_info *kvm_alloc_rma()
>>   {
>> -	if (!p)
>> -		return 1;
>> -
>> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
>> -
>> -	return 0;
>> -}
>> -early_param("kvm_rma_count", early_parse_rma_count);
>> -
>> -struct kvmppc_linear_info *kvm_alloc_rma(void)
>> -{
>> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
>> +	struct page *page;
>> +	struct kvm_rma_info *ri;
>> +
>> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
>> +	if (!ri)
>> +		return NULL;
>> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
>> +	if (!page)
>> +		goto err_out;
>> +	atomic_set(&ri->use_count, 1);
>> +	ri->base_pfn = page_to_pfn(page);
>> +	return ri;
>> +err_out:
>> +	kfree(ri);
>> +	return NULL;
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>>
>> -void kvm_release_rma(struct kvmppc_linear_info *ri)
>> +void kvm_release_rma(struct kvm_rma_info *ri)
>>   {
>> -	kvm_release_linear(ri);
>> +	if (atomic_dec_and_test(&ri->use_count)) {
>> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
>> +		kfree(ri);
>> +	}
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_release_rma);
>>
>> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_release_hpt);
>>
>> -/*************** generic *************/
>> -
>> -static LIST_HEAD(free_linears);
>> -static DEFINE_SPINLOCK(linear_lock);
>> -
>> -static void __init kvm_linear_init_one(ulong size, int count, int type)
>
> Please split the linar removal bits out into a separate patch :).
>
>

That was the way I had in the earlier patchset. That will cause a bisect
build break, because we consider warnings as error and we hit warning
of unused function.

I also realized that linear alloc functions are nearby and mostly fall
in the same hunk. Hence folded it back.

-aneesh

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 15:29       ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:29 UTC (permalink / raw)
  To: Alexander Graf
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>
>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>> contigous since we use the range when address translation is not enabled.
>>
>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>> also remove the the linear allocator which not used any more
>>
>> Acked-by: Paul Mackerras<paulus@samba.org>
>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>> ---

.... snip ....

>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 550f592..55c8519 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>
>>   static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>   {
>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>   	struct page *page;
>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>
>> -	if (vmf->pgoff>= ri->npages)
>> +	if (vmf->pgoff>= kvm_rma_pages)
>>   		return VM_FAULT_SIGBUS;
>>
>>   	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>
>>   static int kvm_rma_release(struct inode *inode, struct file *filp)
>>   {
>> -	struct kvmppc_linear_info *ri = filp->private_data;
>> +	struct kvm_rma_info *ri = filp->private_data;
>>
>>   	kvm_release_rma(ri);
>>   	return 0;
>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>
>>   long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>   {
>> -	struct kvmppc_linear_info *ri;
>>   	long fd;
>> +	struct kvm_rma_info *ri;
>> +	/*
>> +	 * Only do this on PPC970 in HV mode
>> +	 */
>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>> +		return -EINVAL;
>
> Is this really what we want? User space may want to use an RMA on POWER7 
> systems, no?

IIUC they will use virtual real mode area (VRMA) and not RMA

>
>> +
>> +	if (!kvm_rma_pages)
>> +		return -EINVAL;
>>
>>   	ri = kvm_alloc_rma();
>>   	if (!ri)
>> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>   	if (fd<  0)
>>   		kvm_release_rma(ri);
>>
>> -	ret->rma_size = ri->npages<<  PAGE_SHIFT;
>> +	ret->rma_size = kvm_rma_pages<<  PAGE_SHIFT;
>>   	return fd;
>>   }
>>
>> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>   {
>>   	int err = 0;
>>   	struct kvm *kvm = vcpu->kvm;
>> -	struct kvmppc_linear_info *ri = NULL;
>> +	struct kvm_rma_info *ri = NULL;
>>   	unsigned long hva;
>>   	struct kvm_memory_slot *memslot;
>>   	struct vm_area_struct *vma;
>> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>
>>   	} else {
>>   		/* Set up to use an RMO region */
>> -		rma_size = ri->npages;
>> +		rma_size = kvm_rma_pages;
>>   		if (rma_size>  memslot->npages)
>>   			rma_size = memslot->npages;
>>   		rma_size<<= PAGE_SHIFT;
>> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>   			/* POWER7 */
>>   			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>>   			lpcr |= rmls<<  LPCR_RMLS_SH;
>> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<  PAGE_SHIFT;
>> +			kvm->arch.rmor = ri->base_pfn<<  PAGE_SHIFT;
>>   		}
>>   		kvm->arch.lpcr = lpcr;
>>   		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>>   			ri->base_pfn<<  PAGE_SHIFT, rma_size, lpcr);
>>
>>   		/* Initialize phys addrs of pages in RMO */
>> -		npages = ri->npages;
>> +		npages = kvm_rma_pages;
>>   		porder = __ilog2(npages);
>>   		physp = memslot->arch.slot_phys;
>>   		if (physp) {
>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
>> index 4b865c5..8cd0dae 100644
>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>> @@ -21,13 +21,6 @@
>>   #include<asm/kvm_book3s.h>
>>
>>   #include "book3s_hv_cma.h"
>> -
>> -#define KVM_LINEAR_RMA		0
>> -#define KVM_LINEAR_HPT		1
>> -
>> -static void __init kvm_linear_init_one(ulong size, int count, int type);
>> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
>> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>   /*
>>    * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>>    * should be power of 2.
>> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>    * By default we reserve 5% of memory for hash pagetable allocation.
>>    */
>>   static unsigned long kvm_cma_resv_ratio = 5;
>> -
>> -/*************** RMA *************/
>> -
>>   /*
>> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
>> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>>    * Each RMA has to be physically contiguous and of a size that the
>>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>>    * and other larger sizes.  Since we are unlikely to be allocate that
>>    * much physically contiguous memory after the system is up and running,
>> - * we preallocate a set of RMAs in early boot for KVM to use.
>> + * we preallocate a set of RMAs in early boot using CMA.
>> + * should be power of 2.
>>    */
>> -static unsigned long kvm_rma_size = 64<<  20;	/* 64MB */
>> -static unsigned long kvm_rma_count;
>> +unsigned long kvm_rma_pages = (1<<  27)>>  PAGE_SHIFT;	/* 128MB */
>> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>>
>>   /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>>      Assumes POWER7 or PPC970. */
>> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>
>>   static int __init early_parse_rma_size(char *p)
>>   {
>> -	if (!p)
>> -		return 1;
>> +	unsigned long kvm_rma_size;
>>
>> +	pr_debug("%s(%s)\n", __func__, p);
>> +	if (!p)
>> +		return -EINVAL;
>>   	kvm_rma_size = memparse(p,&p);
>> -
>> +	/*
>> +	 * Check that the requested size is one supported in hardware
>> +	 */
>> +	if (lpcr_rmls(kvm_rma_size)<  0) {
>> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
>> +		return -EINVAL;
>> +	}
>> +	kvm_rma_pages = kvm_rma_size>>  PAGE_SHIFT;
>>   	return 0;
>>   }
>>   early_param("kvm_rma_size", early_parse_rma_size);
>>
>> -static int __init early_parse_rma_count(char *p)
>> +struct kvm_rma_info *kvm_alloc_rma()
>>   {
>> -	if (!p)
>> -		return 1;
>> -
>> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
>> -
>> -	return 0;
>> -}
>> -early_param("kvm_rma_count", early_parse_rma_count);
>> -
>> -struct kvmppc_linear_info *kvm_alloc_rma(void)
>> -{
>> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
>> +	struct page *page;
>> +	struct kvm_rma_info *ri;
>> +
>> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
>> +	if (!ri)
>> +		return NULL;
>> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
>> +	if (!page)
>> +		goto err_out;
>> +	atomic_set(&ri->use_count, 1);
>> +	ri->base_pfn = page_to_pfn(page);
>> +	return ri;
>> +err_out:
>> +	kfree(ri);
>> +	return NULL;
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>>
>> -void kvm_release_rma(struct kvmppc_linear_info *ri)
>> +void kvm_release_rma(struct kvm_rma_info *ri)
>>   {
>> -	kvm_release_linear(ri);
>> +	if (atomic_dec_and_test(&ri->use_count)) {
>> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
>> +		kfree(ri);
>> +	}
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_release_rma);
>>
>> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_release_hpt);
>>
>> -/*************** generic *************/
>> -
>> -static LIST_HEAD(free_linears);
>> -static DEFINE_SPINLOCK(linear_lock);
>> -
>> -static void __init kvm_linear_init_one(ulong size, int count, int type)
>
> Please split the linar removal bits out into a separate patch :).
>
>

That was the way I had in the earlier patchset. That will cause a bisect
build break, because we consider warnings as error and we hit warning
of unused function.

I also realized that linear alloc functions are nearby and mostly fall
in the same hunk. Hence folded it back.

-aneesh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 15:29       ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:29 UTC (permalink / raw)
  To: Alexander Graf
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>
>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>> contigous since we use the range when address translation is not enabled.
>>
>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>> also remove the the linear allocator which not used any more
>>
>> Acked-by: Paul Mackerras<paulus@samba.org>
>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>> ---

.... snip ....

>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 550f592..55c8519 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>
>>   static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>   {
>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>   	struct page *page;
>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>
>> -	if (vmf->pgoff>= ri->npages)
>> +	if (vmf->pgoff>= kvm_rma_pages)
>>   		return VM_FAULT_SIGBUS;
>>
>>   	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>
>>   static int kvm_rma_release(struct inode *inode, struct file *filp)
>>   {
>> -	struct kvmppc_linear_info *ri = filp->private_data;
>> +	struct kvm_rma_info *ri = filp->private_data;
>>
>>   	kvm_release_rma(ri);
>>   	return 0;
>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>
>>   long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>   {
>> -	struct kvmppc_linear_info *ri;
>>   	long fd;
>> +	struct kvm_rma_info *ri;
>> +	/*
>> +	 * Only do this on PPC970 in HV mode
>> +	 */
>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>> +		return -EINVAL;
>
> Is this really what we want? User space may want to use an RMA on POWER7 
> systems, no?

IIUC they will use virtual real mode area (VRMA) and not RMA

>
>> +
>> +	if (!kvm_rma_pages)
>> +		return -EINVAL;
>>
>>   	ri = kvm_alloc_rma();
>>   	if (!ri)
>> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>   	if (fd<  0)
>>   		kvm_release_rma(ri);
>>
>> -	ret->rma_size = ri->npages<<  PAGE_SHIFT;
>> +	ret->rma_size = kvm_rma_pages<<  PAGE_SHIFT;
>>   	return fd;
>>   }
>>
>> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>   {
>>   	int err = 0;
>>   	struct kvm *kvm = vcpu->kvm;
>> -	struct kvmppc_linear_info *ri = NULL;
>> +	struct kvm_rma_info *ri = NULL;
>>   	unsigned long hva;
>>   	struct kvm_memory_slot *memslot;
>>   	struct vm_area_struct *vma;
>> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>
>>   	} else {
>>   		/* Set up to use an RMO region */
>> -		rma_size = ri->npages;
>> +		rma_size = kvm_rma_pages;
>>   		if (rma_size>  memslot->npages)
>>   			rma_size = memslot->npages;
>>   		rma_size<<= PAGE_SHIFT;
>> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>   			/* POWER7 */
>>   			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>>   			lpcr |= rmls<<  LPCR_RMLS_SH;
>> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<  PAGE_SHIFT;
>> +			kvm->arch.rmor = ri->base_pfn<<  PAGE_SHIFT;
>>   		}
>>   		kvm->arch.lpcr = lpcr;
>>   		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>>   			ri->base_pfn<<  PAGE_SHIFT, rma_size, lpcr);
>>
>>   		/* Initialize phys addrs of pages in RMO */
>> -		npages = ri->npages;
>> +		npages = kvm_rma_pages;
>>   		porder = __ilog2(npages);
>>   		physp = memslot->arch.slot_phys;
>>   		if (physp) {
>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
>> index 4b865c5..8cd0dae 100644
>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>> @@ -21,13 +21,6 @@
>>   #include<asm/kvm_book3s.h>
>>
>>   #include "book3s_hv_cma.h"
>> -
>> -#define KVM_LINEAR_RMA		0
>> -#define KVM_LINEAR_HPT		1
>> -
>> -static void __init kvm_linear_init_one(ulong size, int count, int type);
>> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
>> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>   /*
>>    * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>>    * should be power of 2.
>> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>    * By default we reserve 5% of memory for hash pagetable allocation.
>>    */
>>   static unsigned long kvm_cma_resv_ratio = 5;
>> -
>> -/*************** RMA *************/
>> -
>>   /*
>> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
>> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>>    * Each RMA has to be physically contiguous and of a size that the
>>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>>    * and other larger sizes.  Since we are unlikely to be allocate that
>>    * much physically contiguous memory after the system is up and running,
>> - * we preallocate a set of RMAs in early boot for KVM to use.
>> + * we preallocate a set of RMAs in early boot using CMA.
>> + * should be power of 2.
>>    */
>> -static unsigned long kvm_rma_size = 64<<  20;	/* 64MB */
>> -static unsigned long kvm_rma_count;
>> +unsigned long kvm_rma_pages = (1<<  27)>>  PAGE_SHIFT;	/* 128MB */
>> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>>
>>   /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>>      Assumes POWER7 or PPC970. */
>> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>
>>   static int __init early_parse_rma_size(char *p)
>>   {
>> -	if (!p)
>> -		return 1;
>> +	unsigned long kvm_rma_size;
>>
>> +	pr_debug("%s(%s)\n", __func__, p);
>> +	if (!p)
>> +		return -EINVAL;
>>   	kvm_rma_size = memparse(p,&p);
>> -
>> +	/*
>> +	 * Check that the requested size is one supported in hardware
>> +	 */
>> +	if (lpcr_rmls(kvm_rma_size)<  0) {
>> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
>> +		return -EINVAL;
>> +	}
>> +	kvm_rma_pages = kvm_rma_size>>  PAGE_SHIFT;
>>   	return 0;
>>   }
>>   early_param("kvm_rma_size", early_parse_rma_size);
>>
>> -static int __init early_parse_rma_count(char *p)
>> +struct kvm_rma_info *kvm_alloc_rma()
>>   {
>> -	if (!p)
>> -		return 1;
>> -
>> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
>> -
>> -	return 0;
>> -}
>> -early_param("kvm_rma_count", early_parse_rma_count);
>> -
>> -struct kvmppc_linear_info *kvm_alloc_rma(void)
>> -{
>> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
>> +	struct page *page;
>> +	struct kvm_rma_info *ri;
>> +
>> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
>> +	if (!ri)
>> +		return NULL;
>> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
>> +	if (!page)
>> +		goto err_out;
>> +	atomic_set(&ri->use_count, 1);
>> +	ri->base_pfn = page_to_pfn(page);
>> +	return ri;
>> +err_out:
>> +	kfree(ri);
>> +	return NULL;
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>>
>> -void kvm_release_rma(struct kvmppc_linear_info *ri)
>> +void kvm_release_rma(struct kvm_rma_info *ri)
>>   {
>> -	kvm_release_linear(ri);
>> +	if (atomic_dec_and_test(&ri->use_count)) {
>> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
>> +		kfree(ri);
>> +	}
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_release_rma);
>>
>> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_release_hpt);
>>
>> -/*************** generic *************/
>> -
>> -static LIST_HEAD(free_linears);
>> -static DEFINE_SPINLOCK(linear_lock);
>> -
>> -static void __init kvm_linear_init_one(ulong size, int count, int type)
>
> Please split the linar removal bits out into a separate patch :).
>
>

That was the way I had in the earlier patchset. That will cause a bisect
build break, because we consider warnings as error and we hit warning
of unused function.

I also realized that linear alloc functions are nearby and mostly fall
in the same hunk. Hence folded it back.

-aneesh

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
  2013-07-02 15:12     ` Alexander Graf
  (?)
  (?)
@ 2013-07-02 15:31       ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:31 UTC (permalink / raw)
  To: Alexander Graf
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>
>> Powerpc architecture uses a hash based page table mechanism for mapping virtual
>> addresses to physical address. The architecture require this hash page table to
>> be physically contiguous. With KVM on Powerpc currently we use early reservation
>> mechanism for allocating guest hash page table. This implies that we need to
>> reserve a big memory region to ensure we can create large number of guest
>> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
>> is not available to rest of the subsystems and and that implies we limit the total
>> available memory in the host.
>>
>> This patch series switch the guest hash page table allocation to use
>> contiguous memory allocator.
>>
>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>
> Is CMA a mandatory option in the kernel? Or can it be optionally 
> disabled? If it can be disabled, we should keep the preallocated 
> fallback case around for systems that have CMA disabled.
>

CMA is not a mandatory option. But we have 

config KVM_BOOK3S_64_HV
	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
	depends on KVM_BOOK3S_64
	select MMU_NOTIFIER
	select CMA

ie, for book3s HV we select CMA and only this CMA needs is memblock
which we already support

-aneesh


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 15:31       ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:31 UTC (permalink / raw)
  To: Alexander Graf
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>
>> Powerpc architecture uses a hash based page table mechanism for mapping virtual
>> addresses to physical address. The architecture require this hash page table to
>> be physically contiguous. With KVM on Powerpc currently we use early reservation
>> mechanism for allocating guest hash page table. This implies that we need to
>> reserve a big memory region to ensure we can create large number of guest
>> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
>> is not available to rest of the subsystems and and that implies we limit the total
>> available memory in the host.
>>
>> This patch series switch the guest hash page table allocation to use
>> contiguous memory allocator.
>>
>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>
> Is CMA a mandatory option in the kernel? Or can it be optionally 
> disabled? If it can be disabled, we should keep the preallocated 
> fallback case around for systems that have CMA disabled.
>

CMA is not a mandatory option. But we have 

config KVM_BOOK3S_64_HV
	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
	depends on KVM_BOOK3S_64
	select MMU_NOTIFIER
	select CMA

ie, for book3s HV we select CMA and only this CMA needs is memblock
which we already support

-aneesh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 15:31       ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:31 UTC (permalink / raw)
  To: Alexander Graf
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>
>> Powerpc architecture uses a hash based page table mechanism for mapping virtual
>> addresses to physical address. The architecture require this hash page table to
>> be physically contiguous. With KVM on Powerpc currently we use early reservation
>> mechanism for allocating guest hash page table. This implies that we need to
>> reserve a big memory region to ensure we can create large number of guest
>> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
>> is not available to rest of the subsystems and and that implies we limit the total
>> available memory in the host.
>>
>> This patch series switch the guest hash page table allocation to use
>> contiguous memory allocator.
>>
>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>
> Is CMA a mandatory option in the kernel? Or can it be optionally 
> disabled? If it can be disabled, we should keep the preallocated 
> fallback case around for systems that have CMA disabled.
>

CMA is not a mandatory option. But we have 

config KVM_BOOK3S_64_HV
	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
	depends on KVM_BOOK3S_64
	select MMU_NOTIFIER
	select CMA

ie, for book3s HV we select CMA and only this CMA needs is memblock
which we already support

-aneesh

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
  2013-07-02 15:29       ` Aneesh Kumar K.V
  (?)
  (?)
@ 2013-07-02 15:32         ` Alexander Graf
  -1 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:32 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>> contigous since we use the range when address translation is not enabled.
>>>
>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>> also remove the the linear allocator which not used any more
>>>
>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>> ---
> .... snip ....
>
>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>> index 550f592..55c8519 100644
>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>
>>>    static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>    {
>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>    	struct page *page;
>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>
>>> -	if (vmf->pgoff>= ri->npages)
>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>    		return VM_FAULT_SIGBUS;
>>>
>>>    	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>
>>>    static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>    {
>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>
>>>    	kvm_release_rma(ri);
>>>    	return 0;
>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>
>>>    long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>    {
>>> -	struct kvmppc_linear_info *ri;
>>>    	long fd;
>>> +	struct kvm_rma_info *ri;
>>> +	/*
>>> +	 * Only do this on PPC970 in HV mode
>>> +	 */
>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>> +		return -EINVAL;
>> Is this really what we want? User space may want to use an RMA on POWER7
>> systems, no?
> IIUC they will use virtual real mode area (VRMA) and not RMA

Then I suppose we should at least update the comment a bit further down 
the patch that indicates that on POWER7 systems we do support a real 
RMA. I can't really think of any reason why user space would want to use 
RMA over VRMA.

>
>>> +
>>> +	if (!kvm_rma_pages)
>>> +		return -EINVAL;
>>>
>>>    	ri = kvm_alloc_rma();
>>>    	if (!ri)
>>> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>    	if (fd<   0)
>>>    		kvm_release_rma(ri);
>>>
>>> -	ret->rma_size = ri->npages<<   PAGE_SHIFT;
>>> +	ret->rma_size = kvm_rma_pages<<   PAGE_SHIFT;
>>>    	return fd;
>>>    }
>>>
>>> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>    {
>>>    	int err = 0;
>>>    	struct kvm *kvm = vcpu->kvm;
>>> -	struct kvmppc_linear_info *ri = NULL;
>>> +	struct kvm_rma_info *ri = NULL;
>>>    	unsigned long hva;
>>>    	struct kvm_memory_slot *memslot;
>>>    	struct vm_area_struct *vma;
>>> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>
>>>    	} else {
>>>    		/* Set up to use an RMO region */
>>> -		rma_size = ri->npages;
>>> +		rma_size = kvm_rma_pages;
>>>    		if (rma_size>   memslot->npages)
>>>    			rma_size = memslot->npages;
>>>    		rma_size<<= PAGE_SHIFT;
>>> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>    			/* POWER7 */
>>>    			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>>>    			lpcr |= rmls<<   LPCR_RMLS_SH;
>>> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<   PAGE_SHIFT;
>>> +			kvm->arch.rmor = ri->base_pfn<<   PAGE_SHIFT;
>>>    		}
>>>    		kvm->arch.lpcr = lpcr;
>>>    		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>>>    			ri->base_pfn<<   PAGE_SHIFT, rma_size, lpcr);
>>>
>>>    		/* Initialize phys addrs of pages in RMO */
>>> -		npages = ri->npages;
>>> +		npages = kvm_rma_pages;
>>>    		porder = __ilog2(npages);
>>>    		physp = memslot->arch.slot_phys;
>>>    		if (physp) {
>>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
>>> index 4b865c5..8cd0dae 100644
>>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>>> @@ -21,13 +21,6 @@
>>>    #include<asm/kvm_book3s.h>
>>>
>>>    #include "book3s_hv_cma.h"
>>> -
>>> -#define KVM_LINEAR_RMA		0
>>> -#define KVM_LINEAR_HPT		1
>>> -
>>> -static void __init kvm_linear_init_one(ulong size, int count, int type);
>>> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
>>> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>>    /*
>>>     * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>>>     * should be power of 2.
>>> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>>     * By default we reserve 5% of memory for hash pagetable allocation.
>>>     */
>>>    static unsigned long kvm_cma_resv_ratio = 5;
>>> -
>>> -/*************** RMA *************/
>>> -
>>>    /*
>>> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
>>> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>>>     * Each RMA has to be physically contiguous and of a size that the
>>>     * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>>>     * and other larger sizes.  Since we are unlikely to be allocate that
>>>     * much physically contiguous memory after the system is up and running,
>>> - * we preallocate a set of RMAs in early boot for KVM to use.
>>> + * we preallocate a set of RMAs in early boot using CMA.
>>> + * should be power of 2.
>>>     */
>>> -static unsigned long kvm_rma_size = 64<<   20;	/* 64MB */
>>> -static unsigned long kvm_rma_count;
>>> +unsigned long kvm_rma_pages = (1<<   27)>>   PAGE_SHIFT;	/* 128MB */
>>> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>>>
>>>    /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>>>       Assumes POWER7 or PPC970. */
>>> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>
>>>    static int __init early_parse_rma_size(char *p)
>>>    {
>>> -	if (!p)
>>> -		return 1;
>>> +	unsigned long kvm_rma_size;
>>>
>>> +	pr_debug("%s(%s)\n", __func__, p);
>>> +	if (!p)
>>> +		return -EINVAL;
>>>    	kvm_rma_size = memparse(p,&p);
>>> -
>>> +	/*
>>> +	 * Check that the requested size is one supported in hardware
>>> +	 */
>>> +	if (lpcr_rmls(kvm_rma_size)<   0) {
>>> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
>>> +		return -EINVAL;
>>> +	}
>>> +	kvm_rma_pages = kvm_rma_size>>   PAGE_SHIFT;
>>>    	return 0;
>>>    }
>>>    early_param("kvm_rma_size", early_parse_rma_size);
>>>
>>> -static int __init early_parse_rma_count(char *p)
>>> +struct kvm_rma_info *kvm_alloc_rma()
>>>    {
>>> -	if (!p)
>>> -		return 1;
>>> -
>>> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
>>> -
>>> -	return 0;
>>> -}
>>> -early_param("kvm_rma_count", early_parse_rma_count);
>>> -
>>> -struct kvmppc_linear_info *kvm_alloc_rma(void)
>>> -{
>>> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
>>> +	struct page *page;
>>> +	struct kvm_rma_info *ri;
>>> +
>>> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
>>> +	if (!ri)
>>> +		return NULL;
>>> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
>>> +	if (!page)
>>> +		goto err_out;
>>> +	atomic_set(&ri->use_count, 1);
>>> +	ri->base_pfn = page_to_pfn(page);
>>> +	return ri;
>>> +err_out:
>>> +	kfree(ri);
>>> +	return NULL;
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>>>
>>> -void kvm_release_rma(struct kvmppc_linear_info *ri)
>>> +void kvm_release_rma(struct kvm_rma_info *ri)
>>>    {
>>> -	kvm_release_linear(ri);
>>> +	if (atomic_dec_and_test(&ri->use_count)) {
>>> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
>>> +		kfree(ri);
>>> +	}
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_release_rma);
>>>
>>> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_release_hpt);
>>>
>>> -/*************** generic *************/
>>> -
>>> -static LIST_HEAD(free_linears);
>>> -static DEFINE_SPINLOCK(linear_lock);
>>> -
>>> -static void __init kvm_linear_init_one(ulong size, int count, int type)
>> Please split the linar removal bits out into a separate patch :).
>>
>>
> That was the way I had in the earlier patchset. That will cause a bisect
> build break, because we consider warnings as error and we hit warning
> of unused function.
>
> I also realized that linear alloc functions are nearby and mostly fall
> in the same hunk. Hence folded it back.

Fair enough :)


Alex


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 15:32         ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:32 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>> contigous since we use the range when address translation is not enabled.
>>>
>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>> also remove the the linear allocator which not used any more
>>>
>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>> ---
> .... snip ....
>
>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>> index 550f592..55c8519 100644
>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>
>>>    static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>    {
>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>    	struct page *page;
>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>
>>> -	if (vmf->pgoff>= ri->npages)
>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>    		return VM_FAULT_SIGBUS;
>>>
>>>    	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>
>>>    static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>    {
>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>
>>>    	kvm_release_rma(ri);
>>>    	return 0;
>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>
>>>    long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>    {
>>> -	struct kvmppc_linear_info *ri;
>>>    	long fd;
>>> +	struct kvm_rma_info *ri;
>>> +	/*
>>> +	 * Only do this on PPC970 in HV mode
>>> +	 */
>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>> +		return -EINVAL;
>> Is this really what we want? User space may want to use an RMA on POWER7
>> systems, no?
> IIUC they will use virtual real mode area (VRMA) and not RMA

Then I suppose we should at least update the comment a bit further down 
the patch that indicates that on POWER7 systems we do support a real 
RMA. I can't really think of any reason why user space would want to use 
RMA over VRMA.

>
>>> +
>>> +	if (!kvm_rma_pages)
>>> +		return -EINVAL;
>>>
>>>    	ri = kvm_alloc_rma();
>>>    	if (!ri)
>>> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>    	if (fd<   0)
>>>    		kvm_release_rma(ri);
>>>
>>> -	ret->rma_size = ri->npages<<   PAGE_SHIFT;
>>> +	ret->rma_size = kvm_rma_pages<<   PAGE_SHIFT;
>>>    	return fd;
>>>    }
>>>
>>> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>    {
>>>    	int err = 0;
>>>    	struct kvm *kvm = vcpu->kvm;
>>> -	struct kvmppc_linear_info *ri = NULL;
>>> +	struct kvm_rma_info *ri = NULL;
>>>    	unsigned long hva;
>>>    	struct kvm_memory_slot *memslot;
>>>    	struct vm_area_struct *vma;
>>> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>
>>>    	} else {
>>>    		/* Set up to use an RMO region */
>>> -		rma_size = ri->npages;
>>> +		rma_size = kvm_rma_pages;
>>>    		if (rma_size>   memslot->npages)
>>>    			rma_size = memslot->npages;
>>>    		rma_size<<= PAGE_SHIFT;
>>> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>    			/* POWER7 */
>>>    			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>>>    			lpcr |= rmls<<   LPCR_RMLS_SH;
>>> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<   PAGE_SHIFT;
>>> +			kvm->arch.rmor = ri->base_pfn<<   PAGE_SHIFT;
>>>    		}
>>>    		kvm->arch.lpcr = lpcr;
>>>    		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>>>    			ri->base_pfn<<   PAGE_SHIFT, rma_size, lpcr);
>>>
>>>    		/* Initialize phys addrs of pages in RMO */
>>> -		npages = ri->npages;
>>> +		npages = kvm_rma_pages;
>>>    		porder = __ilog2(npages);
>>>    		physp = memslot->arch.slot_phys;
>>>    		if (physp) {
>>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
>>> index 4b865c5..8cd0dae 100644
>>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>>> @@ -21,13 +21,6 @@
>>>    #include<asm/kvm_book3s.h>
>>>
>>>    #include "book3s_hv_cma.h"
>>> -
>>> -#define KVM_LINEAR_RMA		0
>>> -#define KVM_LINEAR_HPT		1
>>> -
>>> -static void __init kvm_linear_init_one(ulong size, int count, int type);
>>> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
>>> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>>    /*
>>>     * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>>>     * should be power of 2.
>>> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>>     * By default we reserve 5% of memory for hash pagetable allocation.
>>>     */
>>>    static unsigned long kvm_cma_resv_ratio = 5;
>>> -
>>> -/*************** RMA *************/
>>> -
>>>    /*
>>> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
>>> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>>>     * Each RMA has to be physically contiguous and of a size that the
>>>     * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>>>     * and other larger sizes.  Since we are unlikely to be allocate that
>>>     * much physically contiguous memory after the system is up and running,
>>> - * we preallocate a set of RMAs in early boot for KVM to use.
>>> + * we preallocate a set of RMAs in early boot using CMA.
>>> + * should be power of 2.
>>>     */
>>> -static unsigned long kvm_rma_size = 64<<   20;	/* 64MB */
>>> -static unsigned long kvm_rma_count;
>>> +unsigned long kvm_rma_pages = (1<<   27)>>   PAGE_SHIFT;	/* 128MB */
>>> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>>>
>>>    /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>>>       Assumes POWER7 or PPC970. */
>>> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>
>>>    static int __init early_parse_rma_size(char *p)
>>>    {
>>> -	if (!p)
>>> -		return 1;
>>> +	unsigned long kvm_rma_size;
>>>
>>> +	pr_debug("%s(%s)\n", __func__, p);
>>> +	if (!p)
>>> +		return -EINVAL;
>>>    	kvm_rma_size = memparse(p,&p);
>>> -
>>> +	/*
>>> +	 * Check that the requested size is one supported in hardware
>>> +	 */
>>> +	if (lpcr_rmls(kvm_rma_size)<   0) {
>>> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
>>> +		return -EINVAL;
>>> +	}
>>> +	kvm_rma_pages = kvm_rma_size>>   PAGE_SHIFT;
>>>    	return 0;
>>>    }
>>>    early_param("kvm_rma_size", early_parse_rma_size);
>>>
>>> -static int __init early_parse_rma_count(char *p)
>>> +struct kvm_rma_info *kvm_alloc_rma()
>>>    {
>>> -	if (!p)
>>> -		return 1;
>>> -
>>> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
>>> -
>>> -	return 0;
>>> -}
>>> -early_param("kvm_rma_count", early_parse_rma_count);
>>> -
>>> -struct kvmppc_linear_info *kvm_alloc_rma(void)
>>> -{
>>> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
>>> +	struct page *page;
>>> +	struct kvm_rma_info *ri;
>>> +
>>> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
>>> +	if (!ri)
>>> +		return NULL;
>>> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
>>> +	if (!page)
>>> +		goto err_out;
>>> +	atomic_set(&ri->use_count, 1);
>>> +	ri->base_pfn = page_to_pfn(page);
>>> +	return ri;
>>> +err_out:
>>> +	kfree(ri);
>>> +	return NULL;
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>>>
>>> -void kvm_release_rma(struct kvmppc_linear_info *ri)
>>> +void kvm_release_rma(struct kvm_rma_info *ri)
>>>    {
>>> -	kvm_release_linear(ri);
>>> +	if (atomic_dec_and_test(&ri->use_count)) {
>>> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
>>> +		kfree(ri);
>>> +	}
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_release_rma);
>>>
>>> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_release_hpt);
>>>
>>> -/*************** generic *************/
>>> -
>>> -static LIST_HEAD(free_linears);
>>> -static DEFINE_SPINLOCK(linear_lock);
>>> -
>>> -static void __init kvm_linear_init_one(ulong size, int count, int type)
>> Please split the linar removal bits out into a separate patch :).
>>
>>
> That was the way I had in the earlier patchset. That will cause a bisect
> build break, because we consider warnings as error and we hit warning
> of unused function.
>
> I also realized that linear alloc functions are nearby and mostly fall
> in the same hunk. Hence folded it back.

Fair enough :)


Alex

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 15:32         ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:32 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>> contigous since we use the range when address translation is not enabled.
>>>
>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>> also remove the the linear allocator which not used any more
>>>
>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>> ---
> .... snip ....
>
>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>> index 550f592..55c8519 100644
>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>
>>>    static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>    {
>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>    	struct page *page;
>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>
>>> -	if (vmf->pgoff>= ri->npages)
>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>    		return VM_FAULT_SIGBUS;
>>>
>>>    	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>
>>>    static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>    {
>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>
>>>    	kvm_release_rma(ri);
>>>    	return 0;
>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>
>>>    long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>    {
>>> -	struct kvmppc_linear_info *ri;
>>>    	long fd;
>>> +	struct kvm_rma_info *ri;
>>> +	/*
>>> +	 * Only do this on PPC970 in HV mode
>>> +	 */
>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>> +		return -EINVAL;
>> Is this really what we want? User space may want to use an RMA on POWER7
>> systems, no?
> IIUC they will use virtual real mode area (VRMA) and not RMA

Then I suppose we should at least update the comment a bit further down 
the patch that indicates that on POWER7 systems we do support a real 
RMA. I can't really think of any reason why user space would want to use 
RMA over VRMA.

>
>>> +
>>> +	if (!kvm_rma_pages)
>>> +		return -EINVAL;
>>>
>>>    	ri = kvm_alloc_rma();
>>>    	if (!ri)
>>> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>    	if (fd<   0)
>>>    		kvm_release_rma(ri);
>>>
>>> -	ret->rma_size = ri->npages<<   PAGE_SHIFT;
>>> +	ret->rma_size = kvm_rma_pages<<   PAGE_SHIFT;
>>>    	return fd;
>>>    }
>>>
>>> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>    {
>>>    	int err = 0;
>>>    	struct kvm *kvm = vcpu->kvm;
>>> -	struct kvmppc_linear_info *ri = NULL;
>>> +	struct kvm_rma_info *ri = NULL;
>>>    	unsigned long hva;
>>>    	struct kvm_memory_slot *memslot;
>>>    	struct vm_area_struct *vma;
>>> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>
>>>    	} else {
>>>    		/* Set up to use an RMO region */
>>> -		rma_size = ri->npages;
>>> +		rma_size = kvm_rma_pages;
>>>    		if (rma_size>   memslot->npages)
>>>    			rma_size = memslot->npages;
>>>    		rma_size<<= PAGE_SHIFT;
>>> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>    			/* POWER7 */
>>>    			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>>>    			lpcr |= rmls<<   LPCR_RMLS_SH;
>>> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<   PAGE_SHIFT;
>>> +			kvm->arch.rmor = ri->base_pfn<<   PAGE_SHIFT;
>>>    		}
>>>    		kvm->arch.lpcr = lpcr;
>>>    		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>>>    			ri->base_pfn<<   PAGE_SHIFT, rma_size, lpcr);
>>>
>>>    		/* Initialize phys addrs of pages in RMO */
>>> -		npages = ri->npages;
>>> +		npages = kvm_rma_pages;
>>>    		porder = __ilog2(npages);
>>>    		physp = memslot->arch.slot_phys;
>>>    		if (physp) {
>>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
>>> index 4b865c5..8cd0dae 100644
>>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>>> @@ -21,13 +21,6 @@
>>>    #include<asm/kvm_book3s.h>
>>>
>>>    #include "book3s_hv_cma.h"
>>> -
>>> -#define KVM_LINEAR_RMA		0
>>> -#define KVM_LINEAR_HPT		1
>>> -
>>> -static void __init kvm_linear_init_one(ulong size, int count, int type);
>>> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
>>> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>>    /*
>>>     * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>>>     * should be power of 2.
>>> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>>     * By default we reserve 5% of memory for hash pagetable allocation.
>>>     */
>>>    static unsigned long kvm_cma_resv_ratio = 5;
>>> -
>>> -/*************** RMA *************/
>>> -
>>>    /*
>>> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
>>> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>>>     * Each RMA has to be physically contiguous and of a size that the
>>>     * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>>>     * and other larger sizes.  Since we are unlikely to be allocate that
>>>     * much physically contiguous memory after the system is up and running,
>>> - * we preallocate a set of RMAs in early boot for KVM to use.
>>> + * we preallocate a set of RMAs in early boot using CMA.
>>> + * should be power of 2.
>>>     */
>>> -static unsigned long kvm_rma_size = 64<<   20;	/* 64MB */
>>> -static unsigned long kvm_rma_count;
>>> +unsigned long kvm_rma_pages = (1<<   27)>>   PAGE_SHIFT;	/* 128MB */
>>> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>>>
>>>    /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>>>       Assumes POWER7 or PPC970. */
>>> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>
>>>    static int __init early_parse_rma_size(char *p)
>>>    {
>>> -	if (!p)
>>> -		return 1;
>>> +	unsigned long kvm_rma_size;
>>>
>>> +	pr_debug("%s(%s)\n", __func__, p);
>>> +	if (!p)
>>> +		return -EINVAL;
>>>    	kvm_rma_size = memparse(p,&p);
>>> -
>>> +	/*
>>> +	 * Check that the requested size is one supported in hardware
>>> +	 */
>>> +	if (lpcr_rmls(kvm_rma_size)<   0) {
>>> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
>>> +		return -EINVAL;
>>> +	}
>>> +	kvm_rma_pages = kvm_rma_size>>   PAGE_SHIFT;
>>>    	return 0;
>>>    }
>>>    early_param("kvm_rma_size", early_parse_rma_size);
>>>
>>> -static int __init early_parse_rma_count(char *p)
>>> +struct kvm_rma_info *kvm_alloc_rma()
>>>    {
>>> -	if (!p)
>>> -		return 1;
>>> -
>>> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
>>> -
>>> -	return 0;
>>> -}
>>> -early_param("kvm_rma_count", early_parse_rma_count);
>>> -
>>> -struct kvmppc_linear_info *kvm_alloc_rma(void)
>>> -{
>>> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
>>> +	struct page *page;
>>> +	struct kvm_rma_info *ri;
>>> +
>>> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
>>> +	if (!ri)
>>> +		return NULL;
>>> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
>>> +	if (!page)
>>> +		goto err_out;
>>> +	atomic_set(&ri->use_count, 1);
>>> +	ri->base_pfn = page_to_pfn(page);
>>> +	return ri;
>>> +err_out:
>>> +	kfree(ri);
>>> +	return NULL;
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>>>
>>> -void kvm_release_rma(struct kvmppc_linear_info *ri)
>>> +void kvm_release_rma(struct kvm_rma_info *ri)
>>>    {
>>> -	kvm_release_linear(ri);
>>> +	if (atomic_dec_and_test(&ri->use_count)) {
>>> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
>>> +		kfree(ri);
>>> +	}
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_release_rma);
>>>
>>> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_release_hpt);
>>>
>>> -/*************** generic *************/
>>> -
>>> -static LIST_HEAD(free_linears);
>>> -static DEFINE_SPINLOCK(linear_lock);
>>> -
>>> -static void __init kvm_linear_init_one(ulong size, int count, int type)
>> Please split the linar removal bits out into a separate patch :).
>>
>>
> That was the way I had in the earlier patchset. That will cause a bisect
> build break, because we consider warnings as error and we hit warning
> of unused function.
>
> I also realized that linear alloc functions are nearby and mostly fall
> in the same hunk. Hence folded it back.

Fair enough :)


Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 15:32         ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:32 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>> contigous since we use the range when address translation is not enabled.
>>>
>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>> also remove the the linear allocator which not used any more
>>>
>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>> ---
> .... snip ....
>
>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>> index 550f592..55c8519 100644
>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>
>>>    static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>    {
>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>    	struct page *page;
>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>
>>> -	if (vmf->pgoff>= ri->npages)
>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>    		return VM_FAULT_SIGBUS;
>>>
>>>    	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>
>>>    static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>    {
>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>
>>>    	kvm_release_rma(ri);
>>>    	return 0;
>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>
>>>    long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>    {
>>> -	struct kvmppc_linear_info *ri;
>>>    	long fd;
>>> +	struct kvm_rma_info *ri;
>>> +	/*
>>> +	 * Only do this on PPC970 in HV mode
>>> +	 */
>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>> +		return -EINVAL;
>> Is this really what we want? User space may want to use an RMA on POWER7
>> systems, no?
> IIUC they will use virtual real mode area (VRMA) and not RMA

Then I suppose we should at least update the comment a bit further down 
the patch that indicates that on POWER7 systems we do support a real 
RMA. I can't really think of any reason why user space would want to use 
RMA over VRMA.

>
>>> +
>>> +	if (!kvm_rma_pages)
>>> +		return -EINVAL;
>>>
>>>    	ri = kvm_alloc_rma();
>>>    	if (!ri)
>>> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>    	if (fd<   0)
>>>    		kvm_release_rma(ri);
>>>
>>> -	ret->rma_size = ri->npages<<   PAGE_SHIFT;
>>> +	ret->rma_size = kvm_rma_pages<<   PAGE_SHIFT;
>>>    	return fd;
>>>    }
>>>
>>> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>    {
>>>    	int err = 0;
>>>    	struct kvm *kvm = vcpu->kvm;
>>> -	struct kvmppc_linear_info *ri = NULL;
>>> +	struct kvm_rma_info *ri = NULL;
>>>    	unsigned long hva;
>>>    	struct kvm_memory_slot *memslot;
>>>    	struct vm_area_struct *vma;
>>> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>
>>>    	} else {
>>>    		/* Set up to use an RMO region */
>>> -		rma_size = ri->npages;
>>> +		rma_size = kvm_rma_pages;
>>>    		if (rma_size>   memslot->npages)
>>>    			rma_size = memslot->npages;
>>>    		rma_size<<= PAGE_SHIFT;
>>> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>>    			/* POWER7 */
>>>    			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>>>    			lpcr |= rmls<<   LPCR_RMLS_SH;
>>> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<   PAGE_SHIFT;
>>> +			kvm->arch.rmor = ri->base_pfn<<   PAGE_SHIFT;
>>>    		}
>>>    		kvm->arch.lpcr = lpcr;
>>>    		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>>>    			ri->base_pfn<<   PAGE_SHIFT, rma_size, lpcr);
>>>
>>>    		/* Initialize phys addrs of pages in RMO */
>>> -		npages = ri->npages;
>>> +		npages = kvm_rma_pages;
>>>    		porder = __ilog2(npages);
>>>    		physp = memslot->arch.slot_phys;
>>>    		if (physp) {
>>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
>>> index 4b865c5..8cd0dae 100644
>>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>>> @@ -21,13 +21,6 @@
>>>    #include<asm/kvm_book3s.h>
>>>
>>>    #include "book3s_hv_cma.h"
>>> -
>>> -#define KVM_LINEAR_RMA		0
>>> -#define KVM_LINEAR_HPT		1
>>> -
>>> -static void __init kvm_linear_init_one(ulong size, int count, int type);
>>> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
>>> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>>    /*
>>>     * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>>>     * should be power of 2.
>>> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>>     * By default we reserve 5% of memory for hash pagetable allocation.
>>>     */
>>>    static unsigned long kvm_cma_resv_ratio = 5;
>>> -
>>> -/*************** RMA *************/
>>> -
>>>    /*
>>> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
>>> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>>>     * Each RMA has to be physically contiguous and of a size that the
>>>     * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>>>     * and other larger sizes.  Since we are unlikely to be allocate that
>>>     * much physically contiguous memory after the system is up and running,
>>> - * we preallocate a set of RMAs in early boot for KVM to use.
>>> + * we preallocate a set of RMAs in early boot using CMA.
>>> + * should be power of 2.
>>>     */
>>> -static unsigned long kvm_rma_size = 64<<   20;	/* 64MB */
>>> -static unsigned long kvm_rma_count;
>>> +unsigned long kvm_rma_pages = (1<<   27)>>   PAGE_SHIFT;	/* 128MB */
>>> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>>>
>>>    /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>>>       Assumes POWER7 or PPC970. */
>>> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>
>>>    static int __init early_parse_rma_size(char *p)
>>>    {
>>> -	if (!p)
>>> -		return 1;
>>> +	unsigned long kvm_rma_size;
>>>
>>> +	pr_debug("%s(%s)\n", __func__, p);
>>> +	if (!p)
>>> +		return -EINVAL;
>>>    	kvm_rma_size = memparse(p,&p);
>>> -
>>> +	/*
>>> +	 * Check that the requested size is one supported in hardware
>>> +	 */
>>> +	if (lpcr_rmls(kvm_rma_size)<   0) {
>>> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
>>> +		return -EINVAL;
>>> +	}
>>> +	kvm_rma_pages = kvm_rma_size>>   PAGE_SHIFT;
>>>    	return 0;
>>>    }
>>>    early_param("kvm_rma_size", early_parse_rma_size);
>>>
>>> -static int __init early_parse_rma_count(char *p)
>>> +struct kvm_rma_info *kvm_alloc_rma()
>>>    {
>>> -	if (!p)
>>> -		return 1;
>>> -
>>> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
>>> -
>>> -	return 0;
>>> -}
>>> -early_param("kvm_rma_count", early_parse_rma_count);
>>> -
>>> -struct kvmppc_linear_info *kvm_alloc_rma(void)
>>> -{
>>> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
>>> +	struct page *page;
>>> +	struct kvm_rma_info *ri;
>>> +
>>> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
>>> +	if (!ri)
>>> +		return NULL;
>>> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
>>> +	if (!page)
>>> +		goto err_out;
>>> +	atomic_set(&ri->use_count, 1);
>>> +	ri->base_pfn = page_to_pfn(page);
>>> +	return ri;
>>> +err_out:
>>> +	kfree(ri);
>>> +	return NULL;
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>>>
>>> -void kvm_release_rma(struct kvmppc_linear_info *ri)
>>> +void kvm_release_rma(struct kvm_rma_info *ri)
>>>    {
>>> -	kvm_release_linear(ri);
>>> +	if (atomic_dec_and_test(&ri->use_count)) {
>>> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
>>> +		kfree(ri);
>>> +	}
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_release_rma);
>>>
>>> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>>>    }
>>>    EXPORT_SYMBOL_GPL(kvm_release_hpt);
>>>
>>> -/*************** generic *************/
>>> -
>>> -static LIST_HEAD(free_linears);
>>> -static DEFINE_SPINLOCK(linear_lock);
>>> -
>>> -static void __init kvm_linear_init_one(ulong size, int count, int type)
>> Please split the linar removal bits out into a separate patch :).
>>
>>
> That was the way I had in the earlier patchset. That will cause a bisect
> build break, because we consider warnings as error and we hit warning
> of unused function.
>
> I also realized that linear alloc functions are nearby and mostly fall
> in the same hunk. Hence folded it back.

Fair enough :)


Alex


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
  2013-07-02 15:31       ` Aneesh Kumar K.V
  (?)
  (?)
@ 2013-07-02 15:32         ` Alexander Graf
  -1 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:32 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 05:31 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Powerpc architecture uses a hash based page table mechanism for mapping virtual
>>> addresses to physical address. The architecture require this hash page table to
>>> be physically contiguous. With KVM on Powerpc currently we use early reservation
>>> mechanism for allocating guest hash page table. This implies that we need to
>>> reserve a big memory region to ensure we can create large number of guest
>>> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
>>> is not available to rest of the subsystems and and that implies we limit the total
>>> available memory in the host.
>>>
>>> This patch series switch the guest hash page table allocation to use
>>> contiguous memory allocator.
>>>
>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>> Is CMA a mandatory option in the kernel? Or can it be optionally
>> disabled? If it can be disabled, we should keep the preallocated
>> fallback case around for systems that have CMA disabled.
>>
> CMA is not a mandatory option. But we have
>
> config KVM_BOOK3S_64_HV
> 	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
> 	depends on KVM_BOOK3S_64
> 	select MMU_NOTIFIER
> 	select CMA
>
> ie, for book3s HV we select CMA and only this CMA needs is memblock
> which we already support

Ah, that was the hunk I did miss. Thanks a lot, then it's perfectly fine 
of course :). Very nice patch set btw.


Alex


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 15:32         ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:32 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 05:31 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Powerpc architecture uses a hash based page table mechanism for mapping virtual
>>> addresses to physical address. The architecture require this hash page table to
>>> be physically contiguous. With KVM on Powerpc currently we use early reservation
>>> mechanism for allocating guest hash page table. This implies that we need to
>>> reserve a big memory region to ensure we can create large number of guest
>>> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
>>> is not available to rest of the subsystems and and that implies we limit the total
>>> available memory in the host.
>>>
>>> This patch series switch the guest hash page table allocation to use
>>> contiguous memory allocator.
>>>
>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>> Is CMA a mandatory option in the kernel? Or can it be optionally
>> disabled? If it can be disabled, we should keep the preallocated
>> fallback case around for systems that have CMA disabled.
>>
> CMA is not a mandatory option. But we have
>
> config KVM_BOOK3S_64_HV
> 	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
> 	depends on KVM_BOOK3S_64
> 	select MMU_NOTIFIER
> 	select CMA
>
> ie, for book3s HV we select CMA and only this CMA needs is memblock
> which we already support

Ah, that was the hunk I did miss. Thanks a lot, then it's perfectly fine 
of course :). Very nice patch set btw.


Alex

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 15:32         ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:32 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

On 07/02/2013 05:31 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Powerpc architecture uses a hash based page table mechanism for mapping virtual
>>> addresses to physical address. The architecture require this hash page table to
>>> be physically contiguous. With KVM on Powerpc currently we use early reservation
>>> mechanism for allocating guest hash page table. This implies that we need to
>>> reserve a big memory region to ensure we can create large number of guest
>>> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
>>> is not available to rest of the subsystems and and that implies we limit the total
>>> available memory in the host.
>>>
>>> This patch series switch the guest hash page table allocation to use
>>> contiguous memory allocator.
>>>
>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>> Is CMA a mandatory option in the kernel? Or can it be optionally
>> disabled? If it can be disabled, we should keep the preallocated
>> fallback case around for systems that have CMA disabled.
>>
> CMA is not a mandatory option. But we have
>
> config KVM_BOOK3S_64_HV
> 	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
> 	depends on KVM_BOOK3S_64
> 	select MMU_NOTIFIER
> 	select CMA
>
> ie, for book3s HV we select CMA and only this CMA needs is memblock
> which we already support

Ah, that was the hunk I did miss. Thanks a lot, then it's perfectly fine 
of course :). Very nice patch set btw.


Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 15:32         ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 15:32 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On 07/02/2013 05:31 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>
>>> Powerpc architecture uses a hash based page table mechanism for mapping virtual
>>> addresses to physical address. The architecture require this hash page table to
>>> be physically contiguous. With KVM on Powerpc currently we use early reservation
>>> mechanism for allocating guest hash page table. This implies that we need to
>>> reserve a big memory region to ensure we can create large number of guest
>>> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
>>> is not available to rest of the subsystems and and that implies we limit the total
>>> available memory in the host.
>>>
>>> This patch series switch the guest hash page table allocation to use
>>> contiguous memory allocator.
>>>
>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>> Is CMA a mandatory option in the kernel? Or can it be optionally
>> disabled? If it can be disabled, we should keep the preallocated
>> fallback case around for systems that have CMA disabled.
>>
> CMA is not a mandatory option. But we have
>
> config KVM_BOOK3S_64_HV
> 	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
> 	depends on KVM_BOOK3S_64
> 	select MMU_NOTIFIER
> 	select CMA
>
> ie, for book3s HV we select CMA and only this CMA needs is memblock
> which we already support

Ah, that was the hunk I did miss. Thanks a lot, then it's perfectly fine 
of course :). Very nice patch set btw.


Alex


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
  2013-07-02  8:20   ` Marek Szyprowski
  (?)
@ 2013-07-02 15:33     ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:33 UTC (permalink / raw)
  To: Marek Szyprowski
  Cc: benh, paulus, agraf, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

Marek Szyprowski <m.szyprowski@samsung.com> writes:

> Hello,
>
> On 7/2/2013 7:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>
>> We want to use CMA for allocating hash page table and real mode area for
>> PPC64. Hence move DMA contiguous related changes into a seperate config
>> so that ppc64 can enable CMA without requiring DMA contiguous.
>>
>> Acked-by: Michal Nazarewicz <mina86@mina86.com>
>> Acked-by: Paul Mackerras <paulus@samba.org>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>
> OK. It looks that there is not that much that can be easily shared between
> dma-mapping cma provider and ppc/kvm cma allocator. I would prefer to merge
> patch 1/4 to my dma-mapping tree, because I plan some significant changes in
> cma code, see: 
> http://thread.gmane.org/gmane.linux.drivers.devicetree/40013/
> I think it is better to keep those changes together.
>
> For now I've merged your patch with removed defconfig updates. AFAIK such
> changes require separate handling to avoid pointless merge conflicts.

How do we get the defconfig changes done ?

> I've
> also prepared a topic branch for-v3.12-cma-dma, available at
> git://git.linaro.org/people/mszyprowski/linux-dma-mapping, which You can 
> merge
> together with your changes to ppc kernel trees.
>

Thanks. Will update accordingly as other patches get picked into
respective trees

-aneesh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
@ 2013-07-02 15:33     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:33 UTC (permalink / raw)
  To: Marek Szyprowski
  Cc: kvm, agraf, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev

Marek Szyprowski <m.szyprowski@samsung.com> writes:

> Hello,
>
> On 7/2/2013 7:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>
>> We want to use CMA for allocating hash page table and real mode area for
>> PPC64. Hence move DMA contiguous related changes into a seperate config
>> so that ppc64 can enable CMA without requiring DMA contiguous.
>>
>> Acked-by: Michal Nazarewicz <mina86@mina86.com>
>> Acked-by: Paul Mackerras <paulus@samba.org>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>
> OK. It looks that there is not that much that can be easily shared between
> dma-mapping cma provider and ppc/kvm cma allocator. I would prefer to merge
> patch 1/4 to my dma-mapping tree, because I plan some significant changes in
> cma code, see: 
> http://thread.gmane.org/gmane.linux.drivers.devicetree/40013/
> I think it is better to keep those changes together.
>
> For now I've merged your patch with removed defconfig updates. AFAIK such
> changes require separate handling to avoid pointless merge conflicts.

How do we get the defconfig changes done ?

> I've
> also prepared a topic branch for-v3.12-cma-dma, available at
> git://git.linaro.org/people/mszyprowski/linux-dma-mapping, which You can 
> merge
> together with your changes to ppc kernel trees.
>

Thanks. Will update accordingly as other patches get picked into
respective trees

-aneesh

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 15:29       ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:41 UTC (permalink / raw)
  To: Alexander Graf
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>
>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>> contigous since we use the range when address translation is not enabled.
>>
>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>> also remove the the linear allocator which not used any more
>>
>> Acked-by: Paul Mackerras<paulus@samba.org>
>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>> ---

.... snip ....

>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 550f592..55c8519 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>
>>   static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>   {
>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>   	struct page *page;
>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>
>> -	if (vmf->pgoff>= ri->npages)
>> +	if (vmf->pgoff>= kvm_rma_pages)
>>   		return VM_FAULT_SIGBUS;
>>
>>   	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>
>>   static int kvm_rma_release(struct inode *inode, struct file *filp)
>>   {
>> -	struct kvmppc_linear_info *ri = filp->private_data;
>> +	struct kvm_rma_info *ri = filp->private_data;
>>
>>   	kvm_release_rma(ri);
>>   	return 0;
>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>
>>   long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>   {
>> -	struct kvmppc_linear_info *ri;
>>   	long fd;
>> +	struct kvm_rma_info *ri;
>> +	/*
>> +	 * Only do this on PPC970 in HV mode
>> +	 */
>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>> +		return -EINVAL;
>
> Is this really what we want? User space may want to use an RMA on POWER7 
> systems, no?

IIUC they will use virtual real mode area (VRMA) and not RMA

>
>> +
>> +	if (!kvm_rma_pages)
>> +		return -EINVAL;
>>
>>   	ri = kvm_alloc_rma();
>>   	if (!ri)
>> @@ -1560,7 +1569,7 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>   	if (fd<  0)
>>   		kvm_release_rma(ri);
>>
>> -	ret->rma_size = ri->npages<<  PAGE_SHIFT;
>> +	ret->rma_size = kvm_rma_pages<<  PAGE_SHIFT;
>>   	return fd;
>>   }
>>
>> @@ -1725,7 +1734,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>   {
>>   	int err = 0;
>>   	struct kvm *kvm = vcpu->kvm;
>> -	struct kvmppc_linear_info *ri = NULL;
>> +	struct kvm_rma_info *ri = NULL;
>>   	unsigned long hva;
>>   	struct kvm_memory_slot *memslot;
>>   	struct vm_area_struct *vma;
>> @@ -1803,7 +1812,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>
>>   	} else {
>>   		/* Set up to use an RMO region */
>> -		rma_size = ri->npages;
>> +		rma_size = kvm_rma_pages;
>>   		if (rma_size>  memslot->npages)
>>   			rma_size = memslot->npages;
>>   		rma_size<<= PAGE_SHIFT;
>> @@ -1831,14 +1840,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
>>   			/* POWER7 */
>>   			lpcr&= ~(LPCR_VPM0 | LPCR_VRMA_L);
>>   			lpcr |= rmls<<  LPCR_RMLS_SH;
>> -			kvm->arch.rmor = kvm->arch.rma->base_pfn<<  PAGE_SHIFT;
>> +			kvm->arch.rmor = ri->base_pfn<<  PAGE_SHIFT;
>>   		}
>>   		kvm->arch.lpcr = lpcr;
>>   		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
>>   			ri->base_pfn<<  PAGE_SHIFT, rma_size, lpcr);
>>
>>   		/* Initialize phys addrs of pages in RMO */
>> -		npages = ri->npages;
>> +		npages = kvm_rma_pages;
>>   		porder = __ilog2(npages);
>>   		physp = memslot->arch.slot_phys;
>>   		if (physp) {
>> diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
>> index 4b865c5..8cd0dae 100644
>> --- a/arch/powerpc/kvm/book3s_hv_builtin.c
>> +++ b/arch/powerpc/kvm/book3s_hv_builtin.c
>> @@ -21,13 +21,6 @@
>>   #include<asm/kvm_book3s.h>
>>
>>   #include "book3s_hv_cma.h"
>> -
>> -#define KVM_LINEAR_RMA		0
>> -#define KVM_LINEAR_HPT		1
>> -
>> -static void __init kvm_linear_init_one(ulong size, int count, int type);
>> -static struct kvmppc_linear_info *kvm_alloc_linear(int type);
>> -static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>   /*
>>    * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
>>    * should be power of 2.
>> @@ -37,19 +30,17 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri);
>>    * By default we reserve 5% of memory for hash pagetable allocation.
>>    */
>>   static unsigned long kvm_cma_resv_ratio = 5;
>> -
>> -/*************** RMA *************/
>> -
>>   /*
>> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
>> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>>    * Each RMA has to be physically contiguous and of a size that the
>>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>>    * and other larger sizes.  Since we are unlikely to be allocate that
>>    * much physically contiguous memory after the system is up and running,
>> - * we preallocate a set of RMAs in early boot for KVM to use.
>> + * we preallocate a set of RMAs in early boot using CMA.
>> + * should be power of 2.
>>    */
>> -static unsigned long kvm_rma_size = 64<<  20;	/* 64MB */
>> -static unsigned long kvm_rma_count;
>> +unsigned long kvm_rma_pages = (1<<  27)>>  PAGE_SHIFT;	/* 128MB */
>> +EXPORT_SYMBOL_GPL(kvm_rma_pages);
>>
>>   /* Work out RMLS (real mode limit selector) field value for a given RMA size.
>>      Assumes POWER7 or PPC970. */
>> @@ -79,35 +70,50 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>
>>   static int __init early_parse_rma_size(char *p)
>>   {
>> -	if (!p)
>> -		return 1;
>> +	unsigned long kvm_rma_size;
>>
>> +	pr_debug("%s(%s)\n", __func__, p);
>> +	if (!p)
>> +		return -EINVAL;
>>   	kvm_rma_size = memparse(p,&p);
>> -
>> +	/*
>> +	 * Check that the requested size is one supported in hardware
>> +	 */
>> +	if (lpcr_rmls(kvm_rma_size)<  0) {
>> +		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
>> +		return -EINVAL;
>> +	}
>> +	kvm_rma_pages = kvm_rma_size>>  PAGE_SHIFT;
>>   	return 0;
>>   }
>>   early_param("kvm_rma_size", early_parse_rma_size);
>>
>> -static int __init early_parse_rma_count(char *p)
>> +struct kvm_rma_info *kvm_alloc_rma()
>>   {
>> -	if (!p)
>> -		return 1;
>> -
>> -	kvm_rma_count = simple_strtoul(p, NULL, 0);
>> -
>> -	return 0;
>> -}
>> -early_param("kvm_rma_count", early_parse_rma_count);
>> -
>> -struct kvmppc_linear_info *kvm_alloc_rma(void)
>> -{
>> -	return kvm_alloc_linear(KVM_LINEAR_RMA);
>> +	struct page *page;
>> +	struct kvm_rma_info *ri;
>> +
>> +	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
>> +	if (!ri)
>> +		return NULL;
>> +	page = kvm_alloc_cma(kvm_rma_pages, kvm_rma_pages);
>> +	if (!page)
>> +		goto err_out;
>> +	atomic_set(&ri->use_count, 1);
>> +	ri->base_pfn = page_to_pfn(page);
>> +	return ri;
>> +err_out:
>> +	kfree(ri);
>> +	return NULL;
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_alloc_rma);
>>
>> -void kvm_release_rma(struct kvmppc_linear_info *ri)
>> +void kvm_release_rma(struct kvm_rma_info *ri)
>>   {
>> -	kvm_release_linear(ri);
>> +	if (atomic_dec_and_test(&ri->use_count)) {
>> +		kvm_release_cma(pfn_to_page(ri->base_pfn), kvm_rma_pages);
>> +		kfree(ri);
>> +	}
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_release_rma);
>>
>> @@ -137,101 +143,6 @@ void kvm_release_hpt(struct page *page, unsigned long nr_pages)
>>   }
>>   EXPORT_SYMBOL_GPL(kvm_release_hpt);
>>
>> -/*************** generic *************/
>> -
>> -static LIST_HEAD(free_linears);
>> -static DEFINE_SPINLOCK(linear_lock);
>> -
>> -static void __init kvm_linear_init_one(ulong size, int count, int type)
>
> Please split the linar removal bits out into a separate patch :).
>
>

That was the way I had in the earlier patchset. That will cause a bisect
build break, because we consider warnings as error and we hit warning
of unused function.

I also realized that linear alloc functions are nearby and mostly fall
in the same hunk. Hence folded it back.

-aneesh


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 15:31       ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:43 UTC (permalink / raw)
  To: Alexander Graf
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>
>> Powerpc architecture uses a hash based page table mechanism for mapping virtual
>> addresses to physical address. The architecture require this hash page table to
>> be physically contiguous. With KVM on Powerpc currently we use early reservation
>> mechanism for allocating guest hash page table. This implies that we need to
>> reserve a big memory region to ensure we can create large number of guest
>> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
>> is not available to rest of the subsystems and and that implies we limit the total
>> available memory in the host.
>>
>> This patch series switch the guest hash page table allocation to use
>> contiguous memory allocator.
>>
>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>
> Is CMA a mandatory option in the kernel? Or can it be optionally 
> disabled? If it can be disabled, we should keep the preallocated 
> fallback case around for systems that have CMA disabled.
>

CMA is not a mandatory option. But we have 

config KVM_BOOK3S_64_HV
	bool "KVM support for POWER7 and PPC970 using hypervisor mode in host"
	depends on KVM_BOOK3S_64
	select MMU_NOTIFIER
	select CMA

ie, for book3s HV we select CMA and only this CMA needs is memblock
which we already support

-aneesh


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
@ 2013-07-02 15:33     ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 15:45 UTC (permalink / raw)
  To: Marek Szyprowski
  Cc: benh, paulus, agraf, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

Marek Szyprowski <m.szyprowski@samsung.com> writes:

> Hello,
>
> On 7/2/2013 7:45 AM, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>
>> We want to use CMA for allocating hash page table and real mode area for
>> PPC64. Hence move DMA contiguous related changes into a seperate config
>> so that ppc64 can enable CMA without requiring DMA contiguous.
>>
>> Acked-by: Michal Nazarewicz <mina86@mina86.com>
>> Acked-by: Paul Mackerras <paulus@samba.org>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
>
> OK. It looks that there is not that much that can be easily shared between
> dma-mapping cma provider and ppc/kvm cma allocator. I would prefer to merge
> patch 1/4 to my dma-mapping tree, because I plan some significant changes in
> cma code, see: 
> http://thread.gmane.org/gmane.linux.drivers.devicetree/40013/
> I think it is better to keep those changes together.
>
> For now I've merged your patch with removed defconfig updates. AFAIK such
> changes require separate handling to avoid pointless merge conflicts.

How do we get the defconfig changes done ?

> I've
> also prepared a topic branch for-v3.12-cma-dma, available at
> git://git.linaro.org/people/mszyprowski/linux-dma-mapping, which You can 
> merge
> together with your changes to ppc kernel trees.
>

Thanks. Will update accordingly as other patches get picked into
respective trees

-aneesh


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
  2013-07-02 15:32         ` Alexander Graf
  (?)
@ 2013-07-02 16:28           ` Aneesh Kumar K.V
  -1 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 16:28 UTC (permalink / raw)
  To: Alexander Graf
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
>> Alexander Graf<agraf@suse.de>  writes:
>>
>>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>>
>>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>>> contigous since we use the range when address translation is not enabled.
>>>>
>>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>>> also remove the the linear allocator which not used any more
>>>>
>>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>>> ---
>> .... snip ....
>>
>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>> index 550f592..55c8519 100644
>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>>
>>>>    static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>>    	struct page *page;
>>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>>
>>>> -	if (vmf->pgoff>= ri->npages)
>>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>>    		return VM_FAULT_SIGBUS;
>>>>
>>>>    	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>>
>>>>    static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>>
>>>>    	kvm_release_rma(ri);
>>>>    	return 0;
>>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>>
>>>>    long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri;
>>>>    	long fd;
>>>> +	struct kvm_rma_info *ri;
>>>> +	/*
>>>> +	 * Only do this on PPC970 in HV mode
>>>> +	 */
>>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>>> +		return -EINVAL;
>>> Is this really what we want? User space may want to use an RMA on POWER7
>>> systems, no?
>> IIUC they will use virtual real mode area (VRMA) and not RMA
>
> Then I suppose we should at least update the comment a bit further down 
> the patch that indicates that on POWER7 systems we do support a real 
> RMA. I can't really think of any reason why user space would want to use 
> RMA over VRMA.
>

where ? We have comments like

/* On POWER7, use VRMA; on PPC970, give up */

-aneesh

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 16:28           ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 16:28 UTC (permalink / raw)
  To: Alexander Graf
  Cc: kvm, kvm-ppc, linux-mm, paulus, mina86, linuxppc-dev, m.szyprowski

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
>> Alexander Graf<agraf@suse.de>  writes:
>>
>>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>>
>>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>>> contigous since we use the range when address translation is not enabled.
>>>>
>>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>>> also remove the the linear allocator which not used any more
>>>>
>>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>>> ---
>> .... snip ....
>>
>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>> index 550f592..55c8519 100644
>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>>
>>>>    static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>>    	struct page *page;
>>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>>
>>>> -	if (vmf->pgoff>= ri->npages)
>>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>>    		return VM_FAULT_SIGBUS;
>>>>
>>>>    	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>>
>>>>    static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>>
>>>>    	kvm_release_rma(ri);
>>>>    	return 0;
>>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>>
>>>>    long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri;
>>>>    	long fd;
>>>> +	struct kvm_rma_info *ri;
>>>> +	/*
>>>> +	 * Only do this on PPC970 in HV mode
>>>> +	 */
>>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>>> +		return -EINVAL;
>>> Is this really what we want? User space may want to use an RMA on POWER7
>>> systems, no?
>> IIUC they will use virtual real mode area (VRMA) and not RMA
>
> Then I suppose we should at least update the comment a bit further down 
> the patch that indicates that on POWER7 systems we do support a real 
> RMA. I can't really think of any reason why user space would want to use 
> RMA over VRMA.
>

where ? We have comments like

/* On POWER7, use VRMA; on PPC970, give up */

-aneesh

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
  2013-07-02 16:28           ` Aneesh Kumar K.V
  (?)
  (?)
@ 2013-07-02 16:36             ` Alexander Graf
  -1 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 16:36 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

On 07/02/2013 06:28 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
>>> Alexander Graf<agraf@suse.de>   writes:
>>>
>>>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>>>
>>>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>>>> contigous since we use the range when address translation is not enabled.
>>>>>
>>>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>>>> also remove the the linear allocator which not used any more
>>>>>
>>>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>>>> ---
>>> .... snip ....
>>>
>>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>>> index 550f592..55c8519 100644
>>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>>>
>>>>>     static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>>>     	struct page *page;
>>>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>>>
>>>>> -	if (vmf->pgoff>= ri->npages)
>>>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>>>     		return VM_FAULT_SIGBUS;
>>>>>
>>>>>     	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>>>
>>>>>     static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>>>
>>>>>     	kvm_release_rma(ri);
>>>>>     	return 0;
>>>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>>>
>>>>>     long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri;
>>>>>     	long fd;
>>>>> +	struct kvm_rma_info *ri;
>>>>> +	/*
>>>>> +	 * Only do this on PPC970 in HV mode
>>>>> +	 */
>>>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>>>> +		return -EINVAL;
>>>> Is this really what we want? User space may want to use an RMA on POWER7
>>>> systems, no?
>>> IIUC they will use virtual real mode area (VRMA) and not RMA
>> Then I suppose we should at least update the comment a bit further down
>> the patch that indicates that on POWER7 systems we do support a real
>> RMA. I can't really think of any reason why user space would want to use
>> RMA over VRMA.
>>
> where ? We have comments like
>
> /* On POWER7, use VRMA; on PPC970, give up */

>   /*
> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>    * Each RMA has to be physically contiguous and of a size that the
>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>    * and other larger sizes.  Since we are unlikely to be allocate that
>    * much physically contiguous memory after the system is up and running,
> - * we preallocate a set of RMAs in early boot for KVM to use.
> + * we preallocate a set of RMAs in early boot using CMA.
> + * should be power of 2.
>    */

This could be falsely interpreted as "POWER7 can use an RMA".


Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 16:36             ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 16:36 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

On 07/02/2013 06:28 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
>>> Alexander Graf<agraf@suse.de>   writes:
>>>
>>>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>>>
>>>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>>>> contigous since we use the range when address translation is not enabled.
>>>>>
>>>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>>>> also remove the the linear allocator which not used any more
>>>>>
>>>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>>>> ---
>>> .... snip ....
>>>
>>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>>> index 550f592..55c8519 100644
>>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>>>
>>>>>     static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>>>     	struct page *page;
>>>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>>>
>>>>> -	if (vmf->pgoff>= ri->npages)
>>>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>>>     		return VM_FAULT_SIGBUS;
>>>>>
>>>>>     	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>>>
>>>>>     static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>>>
>>>>>     	kvm_release_rma(ri);
>>>>>     	return 0;
>>>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>>>
>>>>>     long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri;
>>>>>     	long fd;
>>>>> +	struct kvm_rma_info *ri;
>>>>> +	/*
>>>>> +	 * Only do this on PPC970 in HV mode
>>>>> +	 */
>>>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>>>> +		return -EINVAL;
>>>> Is this really what we want? User space may want to use an RMA on POWER7
>>>> systems, no?
>>> IIUC they will use virtual real mode area (VRMA) and not RMA
>> Then I suppose we should at least update the comment a bit further down
>> the patch that indicates that on POWER7 systems we do support a real
>> RMA. I can't really think of any reason why user space would want to use
>> RMA over VRMA.
>>
> where ? We have comments like
>
> /* On POWER7, use VRMA; on PPC970, give up */

>   /*
> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>    * Each RMA has to be physically contiguous and of a size that the
>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>    * and other larger sizes.  Since we are unlikely to be allocate that
>    * much physically contiguous memory after the system is up and running,
> - * we preallocate a set of RMAs in early boot for KVM to use.
> + * we preallocate a set of RMAs in early boot using CMA.
> + * should be power of 2.
>    */

This could be falsely interpreted as "POWER7 can use an RMA".


Alex

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 16:36             ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 16:36 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, kvm-ppc, linux-mm, paulus, mina86, linuxppc-dev, m.szyprowski

On 07/02/2013 06:28 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
>>> Alexander Graf<agraf@suse.de>   writes:
>>>
>>>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>>>
>>>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>>>> contigous since we use the range when address translation is not enabled.
>>>>>
>>>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>>>> also remove the the linear allocator which not used any more
>>>>>
>>>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>>>> ---
>>> .... snip ....
>>>
>>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>>> index 550f592..55c8519 100644
>>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>>>
>>>>>     static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>>>     	struct page *page;
>>>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>>>
>>>>> -	if (vmf->pgoff>= ri->npages)
>>>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>>>     		return VM_FAULT_SIGBUS;
>>>>>
>>>>>     	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>>>
>>>>>     static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>>>
>>>>>     	kvm_release_rma(ri);
>>>>>     	return 0;
>>>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>>>
>>>>>     long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri;
>>>>>     	long fd;
>>>>> +	struct kvm_rma_info *ri;
>>>>> +	/*
>>>>> +	 * Only do this on PPC970 in HV mode
>>>>> +	 */
>>>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>>>> +		return -EINVAL;
>>>> Is this really what we want? User space may want to use an RMA on POWER7
>>>> systems, no?
>>> IIUC they will use virtual real mode area (VRMA) and not RMA
>> Then I suppose we should at least update the comment a bit further down
>> the patch that indicates that on POWER7 systems we do support a real
>> RMA. I can't really think of any reason why user space would want to use
>> RMA over VRMA.
>>
> where ? We have comments like
>
> /* On POWER7, use VRMA; on PPC970, give up */

>   /*
> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>    * Each RMA has to be physically contiguous and of a size that the
>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>    * and other larger sizes.  Since we are unlikely to be allocate that
>    * much physically contiguous memory after the system is up and running,
> - * we preallocate a set of RMAs in early boot for KVM to use.
> + * we preallocate a set of RMAs in early boot using CMA.
> + * should be power of 2.
>    */

This could be falsely interpreted as "POWER7 can use an RMA".


Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 16:36             ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 16:36 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

On 07/02/2013 06:28 PM, Aneesh Kumar K.V wrote:
> Alexander Graf<agraf@suse.de>  writes:
>
>> On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
>>> Alexander Graf<agraf@suse.de>   writes:
>>>
>>>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>>>
>>>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>>>> contigous since we use the range when address translation is not enabled.
>>>>>
>>>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>>>> also remove the the linear allocator which not used any more
>>>>>
>>>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>>>> ---
>>> .... snip ....
>>>
>>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>>> index 550f592..55c8519 100644
>>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>>>
>>>>>     static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>>>     	struct page *page;
>>>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>>>
>>>>> -	if (vmf->pgoff>= ri->npages)
>>>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>>>     		return VM_FAULT_SIGBUS;
>>>>>
>>>>>     	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>>>
>>>>>     static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>>>
>>>>>     	kvm_release_rma(ri);
>>>>>     	return 0;
>>>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>>>
>>>>>     long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>>>     {
>>>>> -	struct kvmppc_linear_info *ri;
>>>>>     	long fd;
>>>>> +	struct kvm_rma_info *ri;
>>>>> +	/*
>>>>> +	 * Only do this on PPC970 in HV mode
>>>>> +	 */
>>>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>>>> +		return -EINVAL;
>>>> Is this really what we want? User space may want to use an RMA on POWER7
>>>> systems, no?
>>> IIUC they will use virtual real mode area (VRMA) and not RMA
>> Then I suppose we should at least update the comment a bit further down
>> the patch that indicates that on POWER7 systems we do support a real
>> RMA. I can't really think of any reason why user space would want to use
>> RMA over VRMA.
>>
> where ? We have comments like
>
> /* On POWER7, use VRMA; on PPC970, give up */

>   /*
> - * This maintains a list of RMAs (real mode areas) for KVM guests to use.
> + * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
>    * Each RMA has to be physically contiguous and of a size that the
>    * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
>    * and other larger sizes.  Since we are unlikely to be allocate that
>    * much physically contiguous memory after the system is up and running,
> - * we preallocate a set of RMAs in early boot for KVM to use.
> + * we preallocate a set of RMAs in early boot using CMA.
> + * should be power of 2.
>    */

This could be falsely interpreted as "POWER7 can use an RMA".


Alex


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation
@ 2013-07-02 16:28           ` Aneesh Kumar K.V
  0 siblings, 0 replies; 67+ messages in thread
From: Aneesh Kumar K.V @ 2013-07-02 16:40 UTC (permalink / raw)
  To: Alexander Graf
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski

Alexander Graf <agraf@suse.de> writes:

> On 07/02/2013 05:29 PM, Aneesh Kumar K.V wrote:
>> Alexander Graf<agraf@suse.de>  writes:
>>
>>> On 07/02/2013 07:45 AM, Aneesh Kumar K.V wrote:
>>>> From: "Aneesh Kumar K.V"<aneesh.kumar@linux.vnet.ibm.com>
>>>>
>>>> Older version of power architecture use Real Mode Offset register and Real Mode Limit
>>>> Selector for mapping guest Real Mode Area. The guest RMA should be physically
>>>> contigous since we use the range when address translation is not enabled.
>>>>
>>>> This patch switch RMA allocation code to use contigous memory allocator. The patch
>>>> also remove the the linear allocator which not used any more
>>>>
>>>> Acked-by: Paul Mackerras<paulus@samba.org>
>>>> Signed-off-by: Aneesh Kumar K.V<aneesh.kumar@linux.vnet.ibm.com>
>>>> ---
>> .... snip ....
>>
>>>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>>>> index 550f592..55c8519 100644
>>>> --- a/arch/powerpc/kvm/book3s_hv.c
>>>> +++ b/arch/powerpc/kvm/book3s_hv.c
>>>> @@ -1511,10 +1511,10 @@ static inline int lpcr_rmls(unsigned long rma_size)
>>>>
>>>>    static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri = vma->vm_file->private_data;
>>>>    	struct page *page;
>>>> +	struct kvm_rma_info *ri = vma->vm_file->private_data;
>>>>
>>>> -	if (vmf->pgoff>= ri->npages)
>>>> +	if (vmf->pgoff>= kvm_rma_pages)
>>>>    		return VM_FAULT_SIGBUS;
>>>>
>>>>    	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
>>>> @@ -1536,7 +1536,7 @@ static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
>>>>
>>>>    static int kvm_rma_release(struct inode *inode, struct file *filp)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri = filp->private_data;
>>>> +	struct kvm_rma_info *ri = filp->private_data;
>>>>
>>>>    	kvm_release_rma(ri);
>>>>    	return 0;
>>>> @@ -1549,8 +1549,17 @@ static const struct file_operations kvm_rma_fops = {
>>>>
>>>>    long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
>>>>    {
>>>> -	struct kvmppc_linear_info *ri;
>>>>    	long fd;
>>>> +	struct kvm_rma_info *ri;
>>>> +	/*
>>>> +	 * Only do this on PPC970 in HV mode
>>>> +	 */
>>>> +	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
>>>> +	    !cpu_has_feature(CPU_FTR_ARCH_201))
>>>> +		return -EINVAL;
>>> Is this really what we want? User space may want to use an RMA on POWER7
>>> systems, no?
>> IIUC they will use virtual real mode area (VRMA) and not RMA
>
> Then I suppose we should at least update the comment a bit further down 
> the patch that indicates that on POWER7 systems we do support a real 
> RMA. I can't really think of any reason why user space would want to use 
> RMA over VRMA.
>

where ? We have comments like

/* On POWER7, use VRMA; on PPC970, give up */

-aneesh


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
  2013-07-02 15:12     ` Alexander Graf
  (?)
@ 2013-07-02 22:28       ` Benjamin Herrenschmidt
  -1 siblings, 0 replies; 67+ messages in thread
From: Benjamin Herrenschmidt @ 2013-07-02 22:28 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Aneesh Kumar K.V, paulus, m.szyprowski, mina86, linux-mm,
	linuxppc-dev, kvm-ppc, kvm

On Tue, 2013-07-02 at 17:12 +0200, Alexander Graf wrote:
> Is CMA a mandatory option in the kernel? Or can it be optionally 
> disabled? If it can be disabled, we should keep the preallocated 
> fallback case around for systems that have CMA disabled.

Why ? More junk code to keep around ...

If CMA is disabled, we can limit ourselves to dynamic allocation (with
limitation to 16M hash table).

Cheers,
Ben.


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 22:28       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 67+ messages in thread
From: Benjamin Herrenschmidt @ 2013-07-02 22:28 UTC (permalink / raw)
  To: Alexander Graf
  Cc: kvm-ppc, kvm, mina86, linux-mm, paulus, Aneesh Kumar K.V,
	linuxppc-dev, m.szyprowski

On Tue, 2013-07-02 at 17:12 +0200, Alexander Graf wrote:
> Is CMA a mandatory option in the kernel? Or can it be optionally 
> disabled? If it can be disabled, we should keep the preallocated 
> fallback case around for systems that have CMA disabled.

Why ? More junk code to keep around ...

If CMA is disabled, we can limit ourselves to dynamic allocation (with
limitation to 16M hash table).

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 22:28       ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 67+ messages in thread
From: Benjamin Herrenschmidt @ 2013-07-02 22:28 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Aneesh Kumar K.V, paulus, m.szyprowski, mina86, linux-mm,
	linuxppc-dev, kvm-ppc, kvm

On Tue, 2013-07-02 at 17:12 +0200, Alexander Graf wrote:
> Is CMA a mandatory option in the kernel? Or can it be optionally 
> disabled? If it can be disabled, we should keep the preallocated 
> fallback case around for systems that have CMA disabled.

Why ? More junk code to keep around ...

If CMA is disabled, we can limit ourselves to dynamic allocation (with
limitation to 16M hash table).

Cheers,
Ben.



^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
  2013-07-02 22:28       ` Benjamin Herrenschmidt
  (?)
@ 2013-07-02 22:31         ` Alexander Graf
  -1 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 22:31 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Aneesh Kumar K.V, paulus, m.szyprowski, mina86, linux-mm,
	linuxppc-dev, kvm-ppc, kvm


On 03.07.2013, at 00:28, Benjamin Herrenschmidt wrote:

> On Tue, 2013-07-02 at 17:12 +0200, Alexander Graf wrote:
>> Is CMA a mandatory option in the kernel? Or can it be optionally 
>> disabled? If it can be disabled, we should keep the preallocated 
>> fallback case around for systems that have CMA disabled.
> 
> Why ? More junk code to keep around ...
> 
> If CMA is disabled, we can limit ourselves to dynamic allocation (with
> limitation to 16M hash table).

Aneesh adds a requirement for CMA on the KVM option in Kconfig, so all is well.


Alex

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 22:31         ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 22:31 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: kvm-ppc, kvm, mina86, linux-mm, paulus, Aneesh Kumar K.V,
	linuxppc-dev, m.szyprowski


On 03.07.2013, at 00:28, Benjamin Herrenschmidt wrote:

> On Tue, 2013-07-02 at 17:12 +0200, Alexander Graf wrote:
>> Is CMA a mandatory option in the kernel? Or can it be optionally=20
>> disabled? If it can be disabled, we should keep the preallocated=20
>> fallback case around for systems that have CMA disabled.
>=20
> Why ? More junk code to keep around ...
>=20
> If CMA is disabled, we can limit ourselves to dynamic allocation (with
> limitation to 16M hash table).

Aneesh adds a requirement for CMA on the KVM option in Kconfig, so all =
is well.


Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-02 22:31         ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-02 22:31 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Aneesh Kumar K.V, paulus, m.szyprowski, mina86, linux-mm,
	linuxppc-dev, kvm-ppc, kvm


On 03.07.2013, at 00:28, Benjamin Herrenschmidt wrote:

> On Tue, 2013-07-02 at 17:12 +0200, Alexander Graf wrote:
>> Is CMA a mandatory option in the kernel? Or can it be optionally 
>> disabled? If it can be disabled, we should keep the preallocated 
>> fallback case around for systems that have CMA disabled.
> 
> Why ? More junk code to keep around ...
> 
> If CMA is disabled, we can limit ourselves to dynamic allocation (with
> limitation to 16M hash table).

Aneesh adds a requirement for CMA on the KVM option in Kconfig, so all is well.


Alex


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
  2013-07-02  5:45   ` Aneesh Kumar K.V
  (?)
@ 2013-07-03  6:15     ` Paul Mackerras
  -1 siblings, 0 replies; 67+ messages in thread
From: Paul Mackerras @ 2013-07-03  6:15 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, agraf, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On Tue, Jul 02, 2013 at 11:15:16AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Powerpc architecture uses a hash based page table mechanism for mapping virtual
> addresses to physical address. The architecture require this hash page table to
> be physically contiguous. With KVM on Powerpc currently we use early reservation
> mechanism for allocating guest hash page table. This implies that we need to
> reserve a big memory region to ensure we can create large number of guest
> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
> is not available to rest of the subsystems and and that implies we limit the total
> available memory in the host.
> 
> This patch series switch the guest hash page table allocation to use
> contiguous memory allocator.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Acked-by: Paul Mackerras <paulus@samba.org>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-03  6:15     ` Paul Mackerras
  0 siblings, 0 replies; 67+ messages in thread
From: Paul Mackerras @ 2013-07-03  6:15 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, agraf, mina86, linux-mm, kvm-ppc, linuxppc-dev, m.szyprowski

On Tue, Jul 02, 2013 at 11:15:16AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Powerpc architecture uses a hash based page table mechanism for mapping virtual
> addresses to physical address. The architecture require this hash page table to
> be physically contiguous. With KVM on Powerpc currently we use early reservation
> mechanism for allocating guest hash page table. This implies that we need to
> reserve a big memory region to ensure we can create large number of guest
> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
> is not available to rest of the subsystems and and that implies we limit the total
> available memory in the host.
> 
> This patch series switch the guest hash page table allocation to use
> contiguous memory allocator.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Acked-by: Paul Mackerras <paulus@samba.org>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation
@ 2013-07-03  6:15     ` Paul Mackerras
  0 siblings, 0 replies; 67+ messages in thread
From: Paul Mackerras @ 2013-07-03  6:15 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, agraf, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On Tue, Jul 02, 2013 at 11:15:16AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Powerpc architecture uses a hash based page table mechanism for mapping virtual
> addresses to physical address. The architecture require this hash page table to
> be physically contiguous. With KVM on Powerpc currently we use early reservation
> mechanism for allocating guest hash page table. This implies that we need to
> reserve a big memory region to ensure we can create large number of guest
> simultaneously with KVM on Power. Another disadvantage is that the reserved memory
> is not available to rest of the subsystems and and that implies we limit the total
> available memory in the host.
> 
> This patch series switch the guest hash page table allocation to use
> contiguous memory allocator.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Acked-by: Paul Mackerras <paulus@samba.org>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 4/4] powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
  2013-07-02  5:45   ` Aneesh Kumar K.V
  (?)
  (?)
@ 2013-07-03  6:16     ` Paul Mackerras
  -1 siblings, 0 replies; 67+ messages in thread
From: Paul Mackerras @ 2013-07-03  6:16 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, agraf, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On Tue, Jul 02, 2013 at 11:15:18AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Both RMA and hash page table request will be a multiple of 256K. We can use
> a chunk size of 256K to track the free/used 256K chunk in the bitmap. This
> should help to reduce the bitmap size.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Acked-by: Paul Mackerras <paulus@samba.org>

Thanks!

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 4/4] powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
@ 2013-07-03  6:16     ` Paul Mackerras
  0 siblings, 0 replies; 67+ messages in thread
From: Paul Mackerras @ 2013-07-03  6:16 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, agraf, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On Tue, Jul 02, 2013 at 11:15:18AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Both RMA and hash page table request will be a multiple of 256K. We can use
> a chunk size of 256K to track the free/used 256K chunk in the bitmap. This
> should help to reduce the bitmap size.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Acked-by: Paul Mackerras <paulus@samba.org>

Thanks!

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 4/4] powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
@ 2013-07-03  6:16     ` Paul Mackerras
  0 siblings, 0 replies; 67+ messages in thread
From: Paul Mackerras @ 2013-07-03  6:16 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, agraf, mina86, linux-mm, kvm-ppc, linuxppc-dev, m.szyprowski

On Tue, Jul 02, 2013 at 11:15:18AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Both RMA and hash page table request will be a multiple of 256K. We can use
> a chunk size of 256K to track the free/used 256K chunk in the bitmap. This
> should help to reduce the bitmap size.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Acked-by: Paul Mackerras <paulus@samba.org>

Thanks!

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 4/4] powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation.
@ 2013-07-03  6:16     ` Paul Mackerras
  0 siblings, 0 replies; 67+ messages in thread
From: Paul Mackerras @ 2013-07-03  6:16 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, agraf, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm

On Tue, Jul 02, 2013 at 11:15:18AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> Both RMA and hash page table request will be a multiple of 256K. We can use
> a chunk size of 256K to track the free/used 256K chunk in the bitmap. This
> should help to reduce the bitmap size.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Acked-by: Paul Mackerras <paulus@samba.org>

Thanks!

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
  2013-07-02  5:45 ` Aneesh Kumar K.V
  (?)
  (?)
@ 2013-07-08 14:21   ` Alexander Graf
  -1 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-08 14:21 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm


On 02.07.2013, at 07:45, Aneesh Kumar K.V wrote:

> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> We want to use CMA for allocating hash page table and real mode area for
> PPC64. Hence move DMA contiguous related changes into a seperate config
> so that ppc64 can enable CMA without requiring DMA contiguous.
> 
> Acked-by: Michal Nazarewicz <mina86@mina86.com>
> Acked-by: Paul Mackerras <paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Thanks, applied all to kvm-ppc-queue. Please provide a cover letter next time :).


Alex


^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
@ 2013-07-08 14:21   ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-08 14:21 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm


On 02.07.2013, at 07:45, Aneesh Kumar K.V wrote:

> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> We want to use CMA for allocating hash page table and real mode area for
> PPC64. Hence move DMA contiguous related changes into a seperate config
> so that ppc64 can enable CMA without requiring DMA contiguous.
> 
> Acked-by: Michal Nazarewicz <mina86@mina86.com>
> Acked-by: Paul Mackerras <paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Thanks, applied all to kvm-ppc-queue. Please provide a cover letter next time :).


Alex

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
@ 2013-07-08 14:21   ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-08 14:21 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: kvm, mina86, linux-mm, paulus, kvm-ppc, linuxppc-dev, m.szyprowski


On 02.07.2013, at 07:45, Aneesh Kumar K.V wrote:

> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>=20
> We want to use CMA for allocating hash page table and real mode area =
for
> PPC64. Hence move DMA contiguous related changes into a seperate =
config
> so that ppc64 can enable CMA without requiring DMA contiguous.
>=20
> Acked-by: Michal Nazarewicz <mina86@mina86.com>
> Acked-by: Paul Mackerras <paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Thanks, applied all to kvm-ppc-queue. Please provide a cover letter next =
time :).


Alex

^ permalink raw reply	[flat|nested] 67+ messages in thread

* Re: [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config
@ 2013-07-08 14:21   ` Alexander Graf
  0 siblings, 0 replies; 67+ messages in thread
From: Alexander Graf @ 2013-07-08 14:21 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: benh, paulus, m.szyprowski, mina86, linux-mm, linuxppc-dev, kvm-ppc, kvm


On 02.07.2013, at 07:45, Aneesh Kumar K.V wrote:

> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
> 
> We want to use CMA for allocating hash page table and real mode area for
> PPC64. Hence move DMA contiguous related changes into a seperate config
> so that ppc64 can enable CMA without requiring DMA contiguous.
> 
> Acked-by: Michal Nazarewicz <mina86@mina86.com>
> Acked-by: Paul Mackerras <paulus@samba.org>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

Thanks, applied all to kvm-ppc-queue. Please provide a cover letter next time :).


Alex


^ permalink raw reply	[flat|nested] 67+ messages in thread

end of thread, other threads:[~2013-07-08 14:21 UTC | newest]

Thread overview: 67+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-02  5:45 [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config Aneesh Kumar K.V
2013-07-02  5:57 ` Aneesh Kumar K.V
2013-07-02  5:45 ` Aneesh Kumar K.V
2013-07-02  5:45 ` [PATCH -V3 2/4] powerpc/kvm: Contiguous memory allocator based hash page table allocation Aneesh Kumar K.V
2013-07-02  5:57   ` Aneesh Kumar K.V
2013-07-02  5:45   ` Aneesh Kumar K.V
2013-07-02 15:12   ` Alexander Graf
2013-07-02 15:12     ` Alexander Graf
2013-07-02 15:12     ` Alexander Graf
2013-07-02 15:12     ` Alexander Graf
2013-07-02 15:31     ` Aneesh Kumar K.V
2013-07-02 15:43       ` Aneesh Kumar K.V
2013-07-02 15:31       ` Aneesh Kumar K.V
2013-07-02 15:31       ` Aneesh Kumar K.V
2013-07-02 15:32       ` Alexander Graf
2013-07-02 15:32         ` Alexander Graf
2013-07-02 15:32         ` Alexander Graf
2013-07-02 15:32         ` Alexander Graf
2013-07-02 22:28     ` Benjamin Herrenschmidt
2013-07-02 22:28       ` Benjamin Herrenschmidt
2013-07-02 22:28       ` Benjamin Herrenschmidt
2013-07-02 22:31       ` Alexander Graf
2013-07-02 22:31         ` Alexander Graf
2013-07-02 22:31         ` Alexander Graf
2013-07-03  6:15   ` Paul Mackerras
2013-07-03  6:15     ` Paul Mackerras
2013-07-03  6:15     ` Paul Mackerras
2013-07-02  5:45 ` [PATCH -V3 3/4] powerpc/kvm: Contiguous memory allocator based RMA allocation Aneesh Kumar K.V
2013-07-02  5:57   ` Aneesh Kumar K.V
2013-07-02  5:45   ` Aneesh Kumar K.V
2013-07-02 15:17   ` Alexander Graf
2013-07-02 15:17     ` Alexander Graf
2013-07-02 15:17     ` Alexander Graf
2013-07-02 15:17     ` Alexander Graf
2013-07-02 15:29     ` Aneesh Kumar K.V
2013-07-02 15:41       ` Aneesh Kumar K.V
2013-07-02 15:29       ` Aneesh Kumar K.V
2013-07-02 15:29       ` Aneesh Kumar K.V
2013-07-02 15:32       ` Alexander Graf
2013-07-02 15:32         ` Alexander Graf
2013-07-02 15:32         ` Alexander Graf
2013-07-02 15:32         ` Alexander Graf
2013-07-02 16:28         ` Aneesh Kumar K.V
2013-07-02 16:40           ` Aneesh Kumar K.V
2013-07-02 16:28           ` Aneesh Kumar K.V
2013-07-02 16:36           ` Alexander Graf
2013-07-02 16:36             ` Alexander Graf
2013-07-02 16:36             ` Alexander Graf
2013-07-02 16:36             ` Alexander Graf
2013-07-02  5:45 ` [PATCH -V3 4/4] powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation Aneesh Kumar K.V
2013-07-02  5:57   ` Aneesh Kumar K.V
2013-07-02  5:45   ` Aneesh Kumar K.V
2013-07-02  6:29   ` virtual machine windows freeze on copy data to an samba share Marko Weber | ZBF
2013-07-03  6:16   ` [PATCH -V3 4/4] powerpc/kvm: Use 256K chunk to track both RMA and hash page table allocation Paul Mackerras
2013-07-03  6:16     ` Paul Mackerras
2013-07-03  6:16     ` Paul Mackerras
2013-07-03  6:16     ` Paul Mackerras
2013-07-02  8:20 ` [PATCH -V3 1/4] mm/cma: Move dma contiguous changes into a seperate config Marek Szyprowski
2013-07-02  8:20   ` Marek Szyprowski
2013-07-02  8:20   ` Marek Szyprowski
2013-07-02 15:33   ` Aneesh Kumar K.V
2013-07-02 15:45     ` Aneesh Kumar K.V
2013-07-02 15:33     ` Aneesh Kumar K.V
2013-07-08 14:21 ` Alexander Graf
2013-07-08 14:21   ` Alexander Graf
2013-07-08 14:21   ` Alexander Graf
2013-07-08 14:21   ` Alexander Graf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.