All of lore.kernel.org
 help / color / mirror / Atom feed
* [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
@ 2018-10-26  7:58 Marc Orr
  2018-10-26  7:58 ` [kvm PATCH v4 1/2] kvm: vmx: refactor vmx_msrs struct for vmalloc Marc Orr
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Marc Orr @ 2018-10-26  7:58 UTC (permalink / raw)
  To: kvm, jmattson, rientjes, konrad.wilk, linux-mm, akpm, pbonzini,
	rkrcmar, willy, sean.j.christopherson
  Cc: Marc Orr

A couple of patches to allocate vmx vcpus with vmalloc instead of
kalloc, which enables vcpu allocation to succeeed when contiguous
physical memory is sparse.

Compared to the last version of these patches, this version:
1. Splits out the refactoring of the vmx_msrs struct into it's own
patch, as suggested by Sean Christopherson <sean.j.christopherson@intel.com>.
2. Leverages the __vmalloc() API rather than introducing a new vzalloc()
API, as suggested by Michal Hocko <mhocko@kernel.org>.

Marc Orr (2):
  kvm: vmx: refactor vmx_msrs struct for vmalloc
  kvm: vmx: use vmalloc() to allocate vcpus

 arch/x86/kvm/vmx.c  | 92 +++++++++++++++++++++++++++++++++++++++++----
 virt/kvm/kvm_main.c | 28 ++++++++------
 2 files changed, 100 insertions(+), 20 deletions(-)

-- 
2.19.1.568.g152ad8e336-goog

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [kvm PATCH v4 1/2] kvm: vmx: refactor vmx_msrs struct for vmalloc
  2018-10-26  7:58 [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus Marc Orr
@ 2018-10-26  7:58 ` Marc Orr
  2018-10-26 11:02   ` Sean Christopherson
  2018-10-26  7:59 ` [kvm PATCH v4 2/2] kvm: vmx: use vmalloc() to allocate vcpus Marc Orr
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 19+ messages in thread
From: Marc Orr @ 2018-10-26  7:58 UTC (permalink / raw)
  To: kvm, jmattson, rientjes, konrad.wilk, linux-mm, akpm, pbonzini,
	rkrcmar, willy, sean.j.christopherson
  Cc: Marc Orr

Previously, the vmx_msrs struct relied being aligned within a struct
that is backed by the direct map (e.g., memory allocated with kalloc()).
Specifically, this enabled the virtual addresses associated with the
struct to be translated to physical addresses. However, we'd like to
refactor the host struct, vcpu_vmx, to be allocated with vmalloc(), so
that allocation will succeed when contiguous physical memory is scarce.

Thus, this patch refactors how vmx_msrs is declared and allocated, to
ensure that it can be mapped to the physical address space, even when
vmx_msrs resides within in a vmalloc()'d struct.

Signed-off-by: Marc Orr <marcorr@google.com>
---
 arch/x86/kvm/vmx.c | 57 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index abeeb45d1c33..3c0303cc101d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -970,8 +970,25 @@ static inline int pi_test_sn(struct pi_desc *pi_desc)
 
 struct vmx_msrs {
 	unsigned int		nr;
-	struct vmx_msr_entry	val[NR_AUTOLOAD_MSRS];
+	struct vmx_msr_entry	*val;
 };
+struct kmem_cache *vmx_msr_entry_cache;
+
+/*
+ * To prevent vmx_msr_entry array from crossing a page boundary, require:
+ * sizeof(*vmx_msrs.vmx_msr_entry.val) to be a power of two. This is guaranteed
+ * through compile-time asserts that:
+ *   - NR_AUTOLOAD_MSRS * sizeof(struct vmx_msr_entry) is a power of two
+ *   - NR_AUTOLOAD_MSRS * sizeof(struct vmx_msr_entry) <= PAGE_SIZE
+ *   - The allocation of vmx_msrs.vmx_msr_entry.val is aligned to its size.
+ */
+#define CHECK_POWER_OF_TWO(val) \
+	BUILD_BUG_ON_MSG(!((val) && !((val) & ((val) - 1))), \
+	#val " is not a power of two.")
+#define CHECK_INTRA_PAGE(val) do { \
+		CHECK_POWER_OF_TWO(val); \
+		BUILD_BUG_ON(!(val <= PAGE_SIZE)); \
+	} while (0)
 
 struct vcpu_vmx {
 	struct kvm_vcpu       vcpu;
@@ -11489,6 +11506,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (!vmx)
 		return ERR_PTR(-ENOMEM);
 
+	vmx->msr_autoload.guest.val =
+		kmem_cache_zalloc(vmx_msr_entry_cache, GFP_KERNEL);
+	if (!vmx->msr_autoload.guest.val) {
+		err = -ENOMEM;
+		goto free_vmx;
+	}
+	vmx->msr_autoload.host.val =
+		kmem_cache_zalloc(vmx_msr_entry_cache, GFP_KERNEL);
+	if (!vmx->msr_autoload.host.val) {
+		err = -ENOMEM;
+		goto free_msr_autoload_guest;
+	}
+
 	vmx->vpid = allocate_vpid();
 
 	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
@@ -11576,6 +11606,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	kvm_vcpu_uninit(&vmx->vcpu);
 free_vcpu:
 	free_vpid(vmx->vpid);
+	kmem_cache_free(vmx_msr_entry_cache, vmx->msr_autoload.host.val);
+free_msr_autoload_guest:
+	kmem_cache_free(vmx_msr_entry_cache, vmx->msr_autoload.guest.val);
+free_vmx:
 	kmem_cache_free(kvm_vcpu_cache, vmx);
 	return ERR_PTR(err);
 }
@@ -15153,6 +15187,10 @@ module_exit(vmx_exit);
 static int __init vmx_init(void)
 {
 	int r;
+	size_t vmx_msr_entry_size =
+		sizeof(struct vmx_msr_entry) * NR_AUTOLOAD_MSRS;
+
+	CHECK_INTRA_PAGE(vmx_msr_entry_size);
 
 #if IS_ENABLED(CONFIG_HYPERV)
 	/*
@@ -15184,9 +15222,21 @@ static int __init vmx_init(void)
 #endif
 
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-		     __alignof__(struct vcpu_vmx), THIS_MODULE);
+		__alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
 		return r;
+	/*
+	 * A vmx_msr_entry array resides exclusively within the kernel. Thus,
+	 * use kmem_cache_create_usercopy(), with the usersize argument set to
+	 * ZERO, to blacklist copying vmx_msr_entry to/from user space.
+	 */
+	vmx_msr_entry_cache =
+		kmem_cache_create_usercopy("vmx_msr_entry", vmx_msr_entry_size,
+				  vmx_msr_entry_size, SLAB_ACCOUNT, 0, 0, NULL);
+	if (!vmx_msr_entry_cache) {
+		r = -ENOMEM;
+		goto out;
+	}
 
 	/*
 	 * Must be called after kvm_init() so enable_ept is properly set
@@ -15210,5 +15260,8 @@ static int __init vmx_init(void)
 	vmx_check_vmcs12_offsets();
 
 	return 0;
+out:
+	kvm_exit();
+	return r;
 }
 module_init(vmx_init);
-- 
2.19.1.568.g152ad8e336-goog

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [kvm PATCH v4 2/2] kvm: vmx: use vmalloc() to allocate vcpus
  2018-10-26  7:58 [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus Marc Orr
  2018-10-26  7:58 ` [kvm PATCH v4 1/2] kvm: vmx: refactor vmx_msrs struct for vmalloc Marc Orr
@ 2018-10-26  7:59 ` Marc Orr
  2018-10-26 12:29 ` [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus Matthew Wilcox
  2018-10-29  1:58 ` Wanpeng Li
  3 siblings, 0 replies; 19+ messages in thread
From: Marc Orr @ 2018-10-26  7:59 UTC (permalink / raw)
  To: kvm, jmattson, rientjes, konrad.wilk, linux-mm, akpm, pbonzini,
	rkrcmar, willy, sean.j.christopherson
  Cc: Marc Orr

Previously, vcpus were allocated through the kmem_cache_zalloc() API,
which requires the underlying physical memory to be contiguous.
Because the x86 vcpu struct, struct vcpu_vmx, is relatively large
(e.g., currently 47680 bytes on my setup), it can become hard to find
contiguous memory.

At the same time, the comments in the code indicate that the primary
reason for using the kmem_cache_zalloc() API is to align the memory
rather than to provide physical contiguity.

Thus, this patch updates the vcpu allocation logic for vmx to use the
vmalloc() API.

Signed-off-by: Marc Orr <marcorr@google.com>
---
 arch/x86/kvm/vmx.c  | 37 ++++++++++++++++++++++++++++++-------
 virt/kvm/kvm_main.c | 28 ++++++++++++++++------------
 2 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3c0303cc101d..8eef21656f60 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -898,7 +898,14 @@ struct nested_vmx {
 #define POSTED_INTR_ON  0
 #define POSTED_INTR_SN  1
 
-/* Posted-Interrupt Descriptor */
+/*
+ * Posted-Interrupt Descriptor
+ *
+ * Note, the physical address of this structure is used by VMX. Furthermore, the
+ * translation code assumes that the entire pi_desc struct resides within a
+ * single page, which will be true because the struct is 64 bytes and 64-byte
+ * aligned.
+ */
 struct pi_desc {
 	u32 pir[8];     /* Posted interrupt requested */
 	union {
@@ -6633,6 +6640,14 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	}
 
 	if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
+		/*
+		 * Note, pi_desc is contained within a single
+		 * page because the struct is 64 bytes and 64-byte aligned.
+		 */
+		phys_addr_t pi_desc_phys =
+			page_to_phys(vmalloc_to_page(&vmx->pi_desc)) +
+			(u64)&vmx->pi_desc % PAGE_SIZE;
+
 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
 		vmcs_write64(EOI_EXIT_BITMAP1, 0);
 		vmcs_write64(EOI_EXIT_BITMAP2, 0);
@@ -6641,7 +6656,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		vmcs_write16(GUEST_INTR_STATUS, 0);
 
 		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
-		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
+		vmcs_write64(POSTED_INTR_DESC_ADDR, pi_desc_phys);
 	}
 
 	if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
@@ -11493,13 +11508,18 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 	free_loaded_vmcs(vmx->loaded_vmcs);
 	kfree(vmx->guest_msrs);
 	kvm_vcpu_uninit(vcpu);
-	kmem_cache_free(kvm_vcpu_cache, vmx);
+	kmem_cache_free(vmx_msr_entry_cache, vmx->msr_autoload.guest.val);
+	kmem_cache_free(vmx_msr_entry_cache, vmx->msr_autoload.host.val);
+	vfree(vmx);
 }
 
 static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 {
 	int err;
-	struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	struct vcpu_vmx *vmx =
+		__vmalloc(sizeof(struct vcpu_vmx),
+			  GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT,
+			  PAGE_KERNEL);
 	unsigned long *msr_bitmap;
 	int cpu;
 
@@ -11610,7 +11630,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 free_msr_autoload_guest:
 	kmem_cache_free(vmx_msr_entry_cache, vmx->msr_autoload.guest.val);
 free_vmx:
-	kmem_cache_free(kvm_vcpu_cache, vmx);
+	vfree(vmx);
 	return ERR_PTR(err);
 }
 
@@ -15221,8 +15241,11 @@ static int __init vmx_init(void)
 	}
 #endif
 
-	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-		__alignof__(struct vcpu_vmx), THIS_MODULE);
+	/*
+	 * Disable kmem cache; vmalloc will be used instead
+	 * to avoid OOM'ing when memory is available but not contiguous.
+	 */
+	r = kvm_init(&vmx_x86_ops, 0, 0, THIS_MODULE);
 	if (r)
 		return r;
 	/*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 786ade1843a2..8b979e7c3ecd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4038,18 +4038,22 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 		goto out_free_2;
 	register_reboot_notifier(&kvm_reboot_notifier);
 
-	/* A kmem cache lets us meet the alignment requirements of fx_save. */
-	if (!vcpu_align)
-		vcpu_align = __alignof__(struct kvm_vcpu);
-	kvm_vcpu_cache =
-		kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align,
-					   SLAB_ACCOUNT,
-					   offsetof(struct kvm_vcpu, arch),
-					   sizeof_field(struct kvm_vcpu, arch),
-					   NULL);
-	if (!kvm_vcpu_cache) {
-		r = -ENOMEM;
-		goto out_free_3;
+	/*
+	 * When vcpu_size is zero,
+	 * architecture-specific code manages its own vcpu allocation.
+	 */
+	kvm_vcpu_cache = NULL;
+	if (vcpu_size) {
+		if (!vcpu_align)
+			vcpu_align = __alignof__(struct kvm_vcpu);
+		kvm_vcpu_cache = kmem_cache_create_usercopy(
+			"kvm_vcpu", vcpu_size, vcpu_align, SLAB_ACCOUNT,
+			offsetof(struct kvm_vcpu, arch),
+			sizeof_field(struct kvm_vcpu, arch), NULL);
+		if (!kvm_vcpu_cache) {
+			r = -ENOMEM;
+			goto out_free_3;
+		}
 	}
 
 	r = kvm_async_pf_init();
-- 
2.19.1.568.g152ad8e336-goog

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 1/2] kvm: vmx: refactor vmx_msrs struct for vmalloc
  2018-10-26  7:58 ` [kvm PATCH v4 1/2] kvm: vmx: refactor vmx_msrs struct for vmalloc Marc Orr
@ 2018-10-26 11:02   ` Sean Christopherson
  0 siblings, 0 replies; 19+ messages in thread
From: Sean Christopherson @ 2018-10-26 11:02 UTC (permalink / raw)
  To: Marc Orr
  Cc: kvm, jmattson, rientjes, konrad.wilk, linux-mm, akpm, pbonzini,
	rkrcmar, willy

On Fri, Oct 26, 2018 at 12:58:59AM -0700, Marc Orr wrote:
> Previously, the vmx_msrs struct relied being aligned within a struct
> that is backed by the direct map (e.g., memory allocated with kalloc()).
> Specifically, this enabled the virtual addresses associated with the
> struct to be translated to physical addresses. However, we'd like to
> refactor the host struct, vcpu_vmx, to be allocated with vmalloc(), so
> that allocation will succeed when contiguous physical memory is scarce.

IMO the changelog should call out that he MSR load/store lists are
referenced in the VMCS by their physical address and therefore must be
contiguous in physical memory.  Without that knowledge it may not be
obvious as to why we care about keeping the address contigous.

> Thus, this patch refactors how vmx_msrs is declared and allocated, to
> ensure that it can be mapped to the physical address space, even when
> vmx_msrs resides within in a vmalloc()'d struct.
> 
> Signed-off-by: Marc Orr <marcorr@google.com>
> ---
>  arch/x86/kvm/vmx.c | 57 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 55 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index abeeb45d1c33..3c0303cc101d 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -970,8 +970,25 @@ static inline int pi_test_sn(struct pi_desc *pi_desc)
>  
>  struct vmx_msrs {
>  	unsigned int		nr;
> -	struct vmx_msr_entry	val[NR_AUTOLOAD_MSRS];
> +	struct vmx_msr_entry	*val;
>  };
> +struct kmem_cache *vmx_msr_entry_cache;
> +
> +/*
> + * To prevent vmx_msr_entry array from crossing a page boundary, require:
> + * sizeof(*vmx_msrs.vmx_msr_entry.val) to be a power of two. This is guaranteed
> + * through compile-time asserts that:
> + *   - NR_AUTOLOAD_MSRS * sizeof(struct vmx_msr_entry) is a power of two
> + *   - NR_AUTOLOAD_MSRS * sizeof(struct vmx_msr_entry) <= PAGE_SIZE
> + *   - The allocation of vmx_msrs.vmx_msr_entry.val is aligned to its size.
> + */

Nit: the alignment isn't an assertion, it's simply "the code".  It'd
     also be nice to fold in the requirement about the lists being
     physically contigous.

Maybe:

	/*
	 * The VMCS references the MSR load/store lists by their physical
	 * address, i.e. the vmx_msr_entry structs must be contigous in
	 * physical memory.  Ensure this by aligning allocations to the max
	 * size of the lists and asserting that the max size is a power of
	 * two and less than PAGE_SIZE.
	 */

> +#define CHECK_POWER_OF_TWO(val) \
> +	BUILD_BUG_ON_MSG(!((val) && !((val) & ((val) - 1))), \
> +	#val " is not a power of two.")
> +#define CHECK_INTRA_PAGE(val) do { \
> +		CHECK_POWER_OF_TWO(val); \
> +		BUILD_BUG_ON(!(val <= PAGE_SIZE)); \
> +	} while (0)

You can use is_power_of_2(), no need to define your own.  And I think
it'd be better to use BUILD_BUG_ON directly in vmx_init() given that
the big comment is all about vmx_msrs, i.e. it's probably not worth
defining CHECK_INTRA_PAGE at this time.

>  struct vcpu_vmx {
>  	struct kvm_vcpu       vcpu;
> @@ -11489,6 +11506,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  	if (!vmx)
>  		return ERR_PTR(-ENOMEM);
>  
> +	vmx->msr_autoload.guest.val =
> +		kmem_cache_zalloc(vmx_msr_entry_cache, GFP_KERNEL);

Technically this doesn't need GFP_ZERO, we should never read an MSR
entry that hasn't been explicitly written, i.e. we always check
vmx_msrs.nr before dereferencing vmx_msrs.val.

> +	if (!vmx->msr_autoload.guest.val) {
> +		err = -ENOMEM;
> +		goto free_vmx;
> +	}
> +	vmx->msr_autoload.host.val =
> +		kmem_cache_zalloc(vmx_msr_entry_cache, GFP_KERNEL);
> +	if (!vmx->msr_autoload.host.val) {
> +		err = -ENOMEM;
> +		goto free_msr_autoload_guest;
> +	}
> +
>  	vmx->vpid = allocate_vpid();
>  
>  	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
> @@ -11576,6 +11606,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  	kvm_vcpu_uninit(&vmx->vcpu);
>  free_vcpu:
>  	free_vpid(vmx->vpid);
> +	kmem_cache_free(vmx_msr_entry_cache, vmx->msr_autoload.host.val);
> +free_msr_autoload_guest:
> +	kmem_cache_free(vmx_msr_entry_cache, vmx->msr_autoload.guest.val);
> +free_vmx:
>  	kmem_cache_free(kvm_vcpu_cache, vmx);
>  	return ERR_PTR(err);
>  }
> @@ -15153,6 +15187,10 @@ module_exit(vmx_exit);
>  static int __init vmx_init(void)
>  {
>  	int r;
> +	size_t vmx_msr_entry_size =
> +		sizeof(struct vmx_msr_entry) * NR_AUTOLOAD_MSRS;
> +
> +	CHECK_INTRA_PAGE(vmx_msr_entry_size);
>  
>  #if IS_ENABLED(CONFIG_HYPERV)
>  	/*
> @@ -15184,9 +15222,21 @@ static int __init vmx_init(void)
>  #endif
>  
>  	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
> -		     __alignof__(struct vcpu_vmx), THIS_MODULE);
> +		__alignof__(struct vcpu_vmx), THIS_MODULE);

Unrelated whitespace change.

>  	if (r)
>  		return r;
> +	/*
> +	 * A vmx_msr_entry array resides exclusively within the kernel. Thus,
> +	 * use kmem_cache_create_usercopy(), with the usersize argument set to
> +	 * ZERO, to blacklist copying vmx_msr_entry to/from user space.
> +	 */
> +	vmx_msr_entry_cache =
> +		kmem_cache_create_usercopy("vmx_msr_entry", vmx_msr_entry_size,
> +				  vmx_msr_entry_size, SLAB_ACCOUNT, 0, 0, NULL);
> +	if (!vmx_msr_entry_cache) {
> +		r = -ENOMEM;
> +		goto out;
> +	}
>  
>  	/*
>  	 * Must be called after kvm_init() so enable_ept is properly set
> @@ -15210,5 +15260,8 @@ static int __init vmx_init(void)
>  	vmx_check_vmcs12_offsets();
>  
>  	return 0;
> +out:
> +	kvm_exit();
> +	return r;

Three things:
  - This needs to be vmx_exit(), e.g. to undo the eVMCS stuff.
  - The cache needs to be destroyed in vmx_exit().
  - Either remove the label/goto or convert the vmx_setup_l1d_flush()
    failure path to use "goto out".

>  }
>  module_init(vmx_init);
> -- 
> 2.19.1.568.g152ad8e336-goog
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-26  7:58 [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus Marc Orr
  2018-10-26  7:58 ` [kvm PATCH v4 1/2] kvm: vmx: refactor vmx_msrs struct for vmalloc Marc Orr
  2018-10-26  7:59 ` [kvm PATCH v4 2/2] kvm: vmx: use vmalloc() to allocate vcpus Marc Orr
@ 2018-10-26 12:29 ` Matthew Wilcox
  2018-10-26 14:45   ` Matthew Wilcox
  2018-10-29  1:58 ` Wanpeng Li
  3 siblings, 1 reply; 19+ messages in thread
From: Matthew Wilcox @ 2018-10-26 12:29 UTC (permalink / raw)
  To: Marc Orr
  Cc: kvm, jmattson, rientjes, konrad.wilk, linux-mm, akpm, pbonzini,
	rkrcmar, sean.j.christopherson

On Fri, Oct 26, 2018 at 12:58:58AM -0700, Marc Orr wrote:
> A couple of patches to allocate vmx vcpus with vmalloc instead of
> kalloc, which enables vcpu allocation to succeeed when contiguous
> physical memory is sparse.

A question that may have been asked already, but if so I didn't see it ...
does kvm_vcpu need to be so damn big?  It's 22kB with the random .config
I happen to have (which gets rounded to 32kB, an order-3 allocation).  If
we can knock 6kB off it (either by allocating pieces separately), it becomes
an order-2 allocation.  So, biggest chunks:

        struct kvm_vcpu_arch       arch;                 /*   576 21568 */

        struct kvm_mmu             mmu;                  /*   336   400 */
        struct kvm_mmu             nested_mmu;           /*   736   400 */
        struct fpu                 user_fpu;             /*  2176  4160 */
        struct fpu                 guest_fpu;            /*  6336  4160 */
        struct kvm_cpuid_entry2    cpuid_entries[80];    /* 10580  3200 */
        struct x86_emulate_ctxt    emulate_ctxt;         /* 13792  2656 */
        struct kvm_pmu             pmu;                  /* 17344  1520 */
        struct kvm_vcpu_hv         hyperv;               /* 18872  1872 */
                gfn_t              gfns[64];             /* 20832   512 */

that's about 19kB of the 22kB right there.  Can any of them be shrunk
in size or allocated separately?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-26 12:29 ` [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus Matthew Wilcox
@ 2018-10-26 14:45   ` Matthew Wilcox
  2018-10-26 14:49     ` Dave Hansen
  0 siblings, 1 reply; 19+ messages in thread
From: Matthew Wilcox @ 2018-10-26 14:45 UTC (permalink / raw)
  To: Marc Orr
  Cc: kvm, jmattson, rientjes, konrad.wilk, linux-mm, akpm, pbonzini,
	rkrcmar, sean.j.christopherson, Dave Hansen

On Fri, Oct 26, 2018 at 05:29:48AM -0700, Matthew Wilcox wrote:
> A question that may have been asked already, but if so I didn't see it ...
> does kvm_vcpu need to be so damn big?  It's 22kB with the random .config
> I happen to have (which gets rounded to 32kB, an order-3 allocation).  If
> we can knock 6kB off it (either by allocating pieces separately), it becomes
> an order-2 allocation.  So, biggest chunks:
> 
>         struct kvm_vcpu_arch       arch;                 /*   576 21568 */
> 
>         struct kvm_mmu             mmu;                  /*   336   400 */
>         struct kvm_mmu             nested_mmu;           /*   736   400 */
>         struct fpu                 user_fpu;             /*  2176  4160 */
>         struct fpu                 guest_fpu;            /*  6336  4160 */
>         struct kvm_cpuid_entry2    cpuid_entries[80];    /* 10580  3200 */
>         struct x86_emulate_ctxt    emulate_ctxt;         /* 13792  2656 */
>         struct kvm_pmu             pmu;                  /* 17344  1520 */
>         struct kvm_vcpu_hv         hyperv;               /* 18872  1872 */
>                 gfn_t              gfns[64];             /* 20832   512 */
> 
> that's about 19kB of the 22kB right there.  Can any of them be shrunk
> in size or allocated separately?

I just had a fun conversation with Dave Hansen (cc'd) in which he
indicated that the fpu should definitely be dynamically allocated.
See also his commits from 2015 around XSAVE.

0c8c0f03e3a292e031596484275c14cf39c0ab7a
4109ca066b6b899ac7549bf3aac94b178ac95891

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-26 14:45   ` Matthew Wilcox
@ 2018-10-26 14:49     ` Dave Hansen
  2018-10-31 13:06       ` Marc Orr
  0 siblings, 1 reply; 19+ messages in thread
From: Dave Hansen @ 2018-10-26 14:49 UTC (permalink / raw)
  To: Matthew Wilcox, Marc Orr
  Cc: kvm, jmattson, rientjes, konrad.wilk, linux-mm, akpm, pbonzini,
	rkrcmar, sean.j.christopherson, Dave Hansen

On 10/26/18 7:45 AM, Matthew Wilcox wrote:
>         struct fpu                 user_fpu;             /*  2176  4160 */
>         struct fpu                 guest_fpu;            /*  6336  4160 */

Those are *not* supposed to be embedded in any other structures.  My bad
for not documenting this better.

It also seems really goofy that we need an xsave buffer in the
task_struct for user fpu state, then another in the vcpu.  Isn't one for
user state enough?

In any case, I'd suggest getting rid of 'user_fpu', then either moving
'guest_fpu' to the bottom of the structure, or just make it a 'struct
fpu *' and dynamically allocating it separately.

To do this, I'd take fpu__init_task_struct_size(), and break it apart a
bit to tell you the size of the 'struct fpu' separately from the size of
the 'task struct'.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-26  7:58 [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus Marc Orr
                   ` (2 preceding siblings ...)
  2018-10-26 12:29 ` [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus Matthew Wilcox
@ 2018-10-29  1:58 ` Wanpeng Li
  2018-10-29 16:25   ` Jim Mattson
  3 siblings, 1 reply; 19+ messages in thread
From: Wanpeng Li @ 2018-10-29  1:58 UTC (permalink / raw)
  To: Marc Orr
  Cc: kvm, Jim Mattson, David Rientjes, Konrad Rzeszutek Wilk,
	linux-mm, Andrew Morton, Paolo Bonzini, Radim Krcmar, willy,
	Sean Christopherson

On Fri, 26 Oct 2018 at 15:59, Marc Orr <marcorr@google.com> wrote:
>
> A couple of patches to allocate vmx vcpus with vmalloc instead of
> kalloc, which enables vcpu allocation to succeeed when contiguous
> physical memory is sparse.

We have not yet encounter memory is too fragmented to allocate kvm
related metadata in our overcommit pools, is this true requirement
from the product environments?

Regards,
Wanpeng Li

>
> Compared to the last version of these patches, this version:
> 1. Splits out the refactoring of the vmx_msrs struct into it's own
> patch, as suggested by Sean Christopherson <sean.j.christopherson@intel.com>.
> 2. Leverages the __vmalloc() API rather than introducing a new vzalloc()
> API, as suggested by Michal Hocko <mhocko@kernel.org>.
>
> Marc Orr (2):
>   kvm: vmx: refactor vmx_msrs struct for vmalloc
>   kvm: vmx: use vmalloc() to allocate vcpus
>
>  arch/x86/kvm/vmx.c  | 92 +++++++++++++++++++++++++++++++++++++++++----
>  virt/kvm/kvm_main.c | 28 ++++++++------
>  2 files changed, 100 insertions(+), 20 deletions(-)
>
> --
> 2.19.1.568.g152ad8e336-goog
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-29  1:58 ` Wanpeng Li
@ 2018-10-29 16:25   ` Jim Mattson
  2018-10-29 16:48     ` Matthew Wilcox
  0 siblings, 1 reply; 19+ messages in thread
From: Jim Mattson @ 2018-10-29 16:25 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: Marc Orr, kvm, David Rientjes, Konrad Rzeszutek Wilk, linux-mm,
	Andrew Morton, Paolo Bonzini, Radim Krcmar, willy,
	Sean Christopherson

On Sun, Oct 28, 2018 at 6:58 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
> We have not yet encounter memory is too fragmented to allocate kvm
> related metadata in our overcommit pools, is this true requirement
> from the product environments?

Yes.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-29 16:25   ` Jim Mattson
@ 2018-10-29 16:48     ` Matthew Wilcox
  2018-10-29 18:12       ` Jim Mattson
  2018-10-31 13:17       ` Marc Orr
  0 siblings, 2 replies; 19+ messages in thread
From: Matthew Wilcox @ 2018-10-29 16:48 UTC (permalink / raw)
  To: Jim Mattson
  Cc: Wanpeng Li, Marc Orr, kvm, David Rientjes, Konrad Rzeszutek Wilk,
	linux-mm, Andrew Morton, Paolo Bonzini, Radim Krcmar,
	Sean Christopherson

On Mon, Oct 29, 2018 at 09:25:05AM -0700, Jim Mattson wrote:
> On Sun, Oct 28, 2018 at 6:58 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
> > We have not yet encounter memory is too fragmented to allocate kvm
> > related metadata in our overcommit pools, is this true requirement
> > from the product environments?
> 
> Yes.

Are your logs granular enough to determine if turning this into an
order-2 allocation (by splitting out "struct fpu" allocations) will be
sufficient to resolve your problem, or do we need to turn it into an
order-1 or vmalloc allocation to achieve your production goals?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-29 16:48     ` Matthew Wilcox
@ 2018-10-29 18:12       ` Jim Mattson
  2018-10-29 19:16         ` Marc Orr
  2018-10-31 13:17       ` Marc Orr
  1 sibling, 1 reply; 19+ messages in thread
From: Jim Mattson @ 2018-10-29 18:12 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Wanpeng Li, Marc Orr, kvm, David Rientjes, Konrad Rzeszutek Wilk,
	linux-mm, Andrew Morton, Paolo Bonzini, Radim Krcmar,
	Sean Christopherson

On Mon, Oct 29, 2018 at 9:48 AM, Matthew Wilcox <willy@infradead.org> wrote:
> On Mon, Oct 29, 2018 at 09:25:05AM -0700, Jim Mattson wrote:
>> On Sun, Oct 28, 2018 at 6:58 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
>> > We have not yet encounter memory is too fragmented to allocate kvm
>> > related metadata in our overcommit pools, is this true requirement
>> > from the product environments?
>>
>> Yes.
>
> Are your logs granular enough to determine if turning this into an
> order-2 allocation (by splitting out "struct fpu" allocations) will be
> sufficient to resolve your problem, or do we need to turn it into an
> order-1 or vmalloc allocation to achieve your production goals?

Turning this into an order-2 allocation should suffice.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-29 18:12       ` Jim Mattson
@ 2018-10-29 19:16         ` Marc Orr
  2018-10-29 19:22           ` Marc Orr
  0 siblings, 1 reply; 19+ messages in thread
From: Marc Orr @ 2018-10-29 19:16 UTC (permalink / raw)
  To: Jim Mattson
  Cc: willy, Wanpeng Li, kvm, David Rientjes, Konrad Rzeszutek Wilk,
	linux-mm, akpm, pbonzini, rkrcmar, sean.j.christopherson

Thanks for all the discussion on this. Give me a bit to investigate
Dave's suggestions around refactoring the fpu state, and I'll report
back with what I find.
Thanks,
Marc
On Mon, Oct 29, 2018 at 11:12 AM Jim Mattson <jmattson@google.com> wrote:
>
> On Mon, Oct 29, 2018 at 9:48 AM, Matthew Wilcox <willy@infradead.org> wrote:
> > On Mon, Oct 29, 2018 at 09:25:05AM -0700, Jim Mattson wrote:
> >> On Sun, Oct 28, 2018 at 6:58 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
> >> > We have not yet encounter memory is too fragmented to allocate kvm
> >> > related metadata in our overcommit pools, is this true requirement
> >> > from the product environments?
> >>
> >> Yes.
> >
> > Are your logs granular enough to determine if turning this into an
> > order-2 allocation (by splitting out "struct fpu" allocations) will be
> > sufficient to resolve your problem, or do we need to turn it into an
> > order-1 or vmalloc allocation to achieve your production goals?
>
> Turning this into an order-2 allocation should suffice.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-29 19:16         ` Marc Orr
@ 2018-10-29 19:22           ` Marc Orr
  0 siblings, 0 replies; 19+ messages in thread
From: Marc Orr @ 2018-10-29 19:22 UTC (permalink / raw)
  To: Jim Mattson
  Cc: willy, Wanpeng Li, kvm, David Rientjes, Konrad Rzeszutek Wilk,
	linux-mm, akpm, pbonzini, rkrcmar, sean.j.christopherson

On Mon, Oct 29, 2018 at 12:16 PM Marc Orr <marcorr@google.com> wrote:
>
> Thanks for all the discussion on this. Give me a bit to investigate
> Dave's suggestions around refactoring the fpu state, and I'll report
> back with what I find.
> Thanks,
> Marc

Also, sorry for top-posting on my last email!

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-26 14:49     ` Dave Hansen
@ 2018-10-31 13:06       ` Marc Orr
  0 siblings, 0 replies; 19+ messages in thread
From: Marc Orr @ 2018-10-31 13:06 UTC (permalink / raw)
  To: dave.hansen
  Cc: willy, kvm, Jim Mattson, David Rientjes, Konrad Rzeszutek Wilk,
	linux-mm, akpm, pbonzini, rkrcmar, sean.j.christopherson,
	dave.hansen

On Fri, Oct 26, 2018 at 7:49 AM Dave Hansen <dave.hansen@intel.com> wrote:
>
> On 10/26/18 7:45 AM, Matthew Wilcox wrote:
> >         struct fpu                 user_fpu;             /*  2176  4160 */
> >         struct fpu                 guest_fpu;            /*  6336  4160 */
>
> Those are *not* supposed to be embedded in any other structures.  My bad
> for not documenting this better.
>
> It also seems really goofy that we need an xsave buffer in the
> task_struct for user fpu state, then another in the vcpu.  Isn't one for
> user state enough?
>
> In any case, I'd suggest getting rid of 'user_fpu', then either moving
> 'guest_fpu' to the bottom of the structure, or just make it a 'struct
> fpu *' and dynamically allocating it separately.

I've written a patch to get rid of user_fpu, as suggested here and
will be sending that out shortly.

>
> To do this, I'd take fpu__init_task_struct_size(), and break it apart a
> bit to tell you the size of the 'struct fpu' separately from the size of
> the 'task struct'.

I've written a 2nd patch to make guest_cpu  a 'struct fpu *' and
dynamically allocate it separately. The reason I went with this
suggestion, rather than moving 'struct fpu' to the bottom of
kvm_vcpu_arch is because I believe that solution would still expand
the kvm_vcpu_arch by the size of the fpu, according to which
fpregs_state was in use.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-29 16:48     ` Matthew Wilcox
  2018-10-29 18:12       ` Jim Mattson
@ 2018-10-31 13:17       ` Marc Orr
  2018-10-31 13:27         ` Matthew Wilcox
  1 sibling, 1 reply; 19+ messages in thread
From: Marc Orr @ 2018-10-31 13:17 UTC (permalink / raw)
  To: willy
  Cc: Jim Mattson, Wanpeng Li, kvm, David Rientjes,
	Konrad Rzeszutek Wilk, linux-mm, akpm, pbonzini, rkrcmar,
	sean.j.christopherson

On Mon, Oct 29, 2018 at 9:48 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Mon, Oct 29, 2018 at 09:25:05AM -0700, Jim Mattson wrote:
> > On Sun, Oct 28, 2018 at 6:58 PM, Wanpeng Li <kernellwp@gmail.com> wrote:
> > > We have not yet encounter memory is too fragmented to allocate kvm
> > > related metadata in our overcommit pools, is this true requirement
> > > from the product environments?
> >
> > Yes.
>
> Are your logs granular enough to determine if turning this into an
> order-2 allocation (by splitting out "struct fpu" allocations) will be
> sufficient to resolve your problem, or do we need to turn it into an
> order-1 or vmalloc allocation to achieve your production goals?

As noted in my response to Dave Hansen, I've got his suggestions done
and they were successful in drastically reducing the size of the
vcpu_vmx struct, which is great. Specifically, on an upstream kernel,
I've reduced the size of the struct from 23680 down to 15168, which is
order 2.

All that being said, I don't really understand why we wouldn't convert
this memory allocation from a kmalloc() into a vmalloc(). From my
point of view, we are still close to bloating vcpu_vmx into an order 3
allocation, and it's common for vendors to append to both vcpu_vmx
directly, or more likely to its embedded structs. Though, arguably,
vendors should not be doing that.

Most importantly, it just isn't obvious to me why kmalloc() is
preferred over vmalloc(). From my point of view, vmalloc() does the
exact same thing as kmalloc(), except that it works when contiguous
memory is sparse, which seems better to me.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-31 13:17       ` Marc Orr
@ 2018-10-31 13:27         ` Matthew Wilcox
  2018-10-31 13:48           ` Marc Orr
  0 siblings, 1 reply; 19+ messages in thread
From: Matthew Wilcox @ 2018-10-31 13:27 UTC (permalink / raw)
  To: Marc Orr
  Cc: Jim Mattson, Wanpeng Li, kvm, David Rientjes,
	Konrad Rzeszutek Wilk, linux-mm, akpm, pbonzini, rkrcmar,
	sean.j.christopherson

On Wed, Oct 31, 2018 at 01:17:40PM +0000, Marc Orr wrote:
> All that being said, I don't really understand why we wouldn't convert
> this memory allocation from a kmalloc() into a vmalloc(). From my
> point of view, we are still close to bloating vcpu_vmx into an order 3
> allocation, and it's common for vendors to append to both vcpu_vmx
> directly, or more likely to its embedded structs. Though, arguably,
> vendors should not be doing that.
> 
> Most importantly, it just isn't obvious to me why kmalloc() is
> preferred over vmalloc(). From my point of view, vmalloc() does the
> exact same thing as kmalloc(), except that it works when contiguous
> memory is sparse, which seems better to me.

It's ever-so-slightly faster to access kmalloc memory than vmalloc memory;
kmalloc memory comes from the main kernel mapping, generally mapped with
1GB pages while vmalloc memory is necessarily accessed using 4kB pages,
taking an extra two steps in the page table hierarchy.  There's more
software overhead involved too; in addition to allocating the physical
pages (which both kmalloc and vmalloc have to do), vmalloc has to allocate
a vmap_area and a vm_struct to describe the virtual mapping.

The virtual address space can also get fragmented, just like the physical
address space does, potentially leading to the amusing scenario where
you can allocate physically contiguous memory, but not find a contiguous
chunk of vmalloc space to put it in.

For larger allocations, we tend to prefer kvmalloc() which gives us
the best of both worlds, allocating from kmalloc first and vmalloc as a
fallback, but you've got some fun gyrations to go through to do physical
mapping, so that's not entirely appropriate for your case.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-31 13:27         ` Matthew Wilcox
@ 2018-10-31 13:48           ` Marc Orr
  2018-10-31 14:21             ` Matthew Wilcox
  0 siblings, 1 reply; 19+ messages in thread
From: Marc Orr @ 2018-10-31 13:48 UTC (permalink / raw)
  To: willy
  Cc: Jim Mattson, Wanpeng Li, kvm, David Rientjes,
	Konrad Rzeszutek Wilk, linux-mm, akpm, pbonzini, rkrcmar,
	sean.j.christopherson

On Wed, Oct 31, 2018 at 6:27 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Wed, Oct 31, 2018 at 01:17:40PM +0000, Marc Orr wrote:
> > All that being said, I don't really understand why we wouldn't convert
> > this memory allocation from a kmalloc() into a vmalloc(). From my
> > point of view, we are still close to bloating vcpu_vmx into an order 3
> > allocation, and it's common for vendors to append to both vcpu_vmx
> > directly, or more likely to its embedded structs. Though, arguably,
> > vendors should not be doing that.
> >
> > Most importantly, it just isn't obvious to me why kmalloc() is
> > preferred over vmalloc(). From my point of view, vmalloc() does the
> > exact same thing as kmalloc(), except that it works when contiguous
> > memory is sparse, which seems better to me.
>
> It's ever-so-slightly faster to access kmalloc memory than vmalloc memory;
> kmalloc memory comes from the main kernel mapping, generally mapped with
> 1GB pages while vmalloc memory is necessarily accessed using 4kB pages,
> taking an extra two steps in the page table hierarchy.  There's more
> software overhead involved too; in addition to allocating the physical
> pages (which both kmalloc and vmalloc have to do), vmalloc has to allocate
> a vmap_area and a vm_struct to describe the virtual mapping.
>
> The virtual address space can also get fragmented, just like the physical
> address space does, potentially leading to the amusing scenario where
> you can allocate physically contiguous memory, but not find a contiguous
> chunk of vmalloc space to put it in.
>
> For larger allocations, we tend to prefer kvmalloc() which gives us
> the best of both worlds, allocating from kmalloc first and vmalloc as a
> fallback, but you've got some fun gyrations to go through to do physical
> mapping, so that's not entirely appropriate for your case.

Thanks for the explanation. Is there a way to dynamically detect the
memory allocation done by kvmalloc() (i.e., kmalloc() or vmalloc())?
If so, we could use kvmalloc(), and add two code paths to do the
physical mapping, according to whether the underlying memory was
allocated with kmalloc() or vmalloc().

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-31 13:48           ` Marc Orr
@ 2018-10-31 14:21             ` Matthew Wilcox
  2018-10-31 21:19               ` Marc Orr
  0 siblings, 1 reply; 19+ messages in thread
From: Matthew Wilcox @ 2018-10-31 14:21 UTC (permalink / raw)
  To: Marc Orr
  Cc: Jim Mattson, Wanpeng Li, kvm, David Rientjes,
	Konrad Rzeszutek Wilk, linux-mm, akpm, pbonzini, rkrcmar,
	sean.j.christopherson

On Wed, Oct 31, 2018 at 01:48:44PM +0000, Marc Orr wrote:
> Thanks for the explanation. Is there a way to dynamically detect the
> memory allocation done by kvmalloc() (i.e., kmalloc() or vmalloc())?
> If so, we could use kvmalloc(), and add two code paths to do the
> physical mapping, according to whether the underlying memory was
> allocated with kmalloc() or vmalloc().

Yes -- it's used in the implementation of kvfree():

        if (is_vmalloc_addr(addr))
                vfree(addr);
        else
                kfree(addr);

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus
  2018-10-31 14:21             ` Matthew Wilcox
@ 2018-10-31 21:19               ` Marc Orr
  0 siblings, 0 replies; 19+ messages in thread
From: Marc Orr @ 2018-10-31 21:19 UTC (permalink / raw)
  To: willy
  Cc: Jim Mattson, Wanpeng Li, kvm, David Rientjes,
	Konrad Rzeszutek Wilk, linux-mm, akpm, pbonzini, rkrcmar,
	sean.j.christopherson

On Wed, Oct 31, 2018 at 7:21 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Wed, Oct 31, 2018 at 01:48:44PM +0000, Marc Orr wrote:
> > Thanks for the explanation. Is there a way to dynamically detect the
> > memory allocation done by kvmalloc() (i.e., kmalloc() or vmalloc())?
> > If so, we could use kvmalloc(), and add two code paths to do the
> > physical mapping, according to whether the underlying memory was
> > allocated with kmalloc() or vmalloc().
>
> Yes -- it's used in the implementation of kvfree():
>
>         if (is_vmalloc_addr(addr))
>                 vfree(addr);
>         else
>                 kfree(addr);
>

I can drop the vmalloc() patches (unless someone else thinks we should
proceed with a kvmalloc() version). I discussed them with my
colleagues, and the consensus on our end is we shouldn't let these
structs bloat so big. Thanks for everyone's help to reduce them by 2x
the fpu struct! I'll be sending out another version of the patch
series, with the two fpu patches and minus the vmalloc() patches,
after I hear back from Dave on a question I just sent.

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2018-10-31 21:19 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-26  7:58 [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus Marc Orr
2018-10-26  7:58 ` [kvm PATCH v4 1/2] kvm: vmx: refactor vmx_msrs struct for vmalloc Marc Orr
2018-10-26 11:02   ` Sean Christopherson
2018-10-26  7:59 ` [kvm PATCH v4 2/2] kvm: vmx: use vmalloc() to allocate vcpus Marc Orr
2018-10-26 12:29 ` [kvm PATCH v4 0/2] use vmalloc to allocate vmx vcpus Matthew Wilcox
2018-10-26 14:45   ` Matthew Wilcox
2018-10-26 14:49     ` Dave Hansen
2018-10-31 13:06       ` Marc Orr
2018-10-29  1:58 ` Wanpeng Li
2018-10-29 16:25   ` Jim Mattson
2018-10-29 16:48     ` Matthew Wilcox
2018-10-29 18:12       ` Jim Mattson
2018-10-29 19:16         ` Marc Orr
2018-10-29 19:22           ` Marc Orr
2018-10-31 13:17       ` Marc Orr
2018-10-31 13:27         ` Matthew Wilcox
2018-10-31 13:48           ` Marc Orr
2018-10-31 14:21             ` Matthew Wilcox
2018-10-31 21:19               ` Marc Orr

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.