All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michael Roth <michael.roth@amd.com>
To: Chao Peng <chao.p.peng@linux.intel.com>
Cc: <kvm@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-mm@kvack.org>, <linux-fsdevel@vger.kernel.org>,
	<linux-api@vger.kernel.org>, <linux-doc@vger.kernel.org>,
	<qemu-devel@nongnu.org>, Paolo Bonzini <pbonzini@redhat.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	<x86@kernel.org>, "H . Peter Anvin" <hpa@zytor.com>,
	Hugh Dickins <hughd@google.com>, Jeff Layton <jlayton@kernel.org>,
	"J . Bruce Fields" <bfields@fieldses.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Mike Rapoport <rppt@kernel.org>,
	Steven Price <steven.price@arm.com>,
	"Maciej S . Szmigiero" <mail@maciej.szmigiero.name>,
	Vlastimil Babka <vbabka@suse.cz>,
	Vishal Annapurve <vannapurve@google.com>,
	Yu Zhang <yu.c.zhang@linux.intel.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	<luto@kernel.org>, <jun.nakajima@intel.com>,
	<dave.hansen@intel.com>, <ak@linux.intel.com>, <david@redhat.com>,
	<aarcange@redhat.com>, <ddutile@redhat.com>,
	<dhildenb@redhat.com>, Quentin Perret <qperret@google.com>,
	<mhocko@suse.com>
Subject: Re: [PATCH v6 7/8] KVM: Enable and expose KVM_MEM_PRIVATE
Date: Thu, 23 Jun 2022 17:07:51 -0500	[thread overview]
Message-ID: <20220623220751.emt3iqq77faxfzzy@amd.com> (raw)
In-Reply-To: <20220519153713.819591-8-chao.p.peng@linux.intel.com>

On Thu, May 19, 2022 at 11:37:12PM +0800, Chao Peng wrote:
> Register private memslot to fd-based memory backing store and handle the
> memfile notifiers to zap the existing mappings.
> 
> Currently the register is happened at memslot creating time and the
> initial support does not include page migration/swap.
> 
> KVM_MEM_PRIVATE is not exposed by default, architecture code can turn
> on it by implementing kvm_arch_private_mem_supported().
> 
> A 'kvm' reference is added in memslot structure since in
> memfile_notifier callbacks we can only obtain a memslot reference while
> kvm is need to do the zapping. The zapping itself reuses code from
> existing mmu notifier handling.
> 
> Co-developed-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
>  include/linux/kvm_host.h |  10 ++-
>  virt/kvm/kvm_main.c      | 132 ++++++++++++++++++++++++++++++++++++---
>  2 files changed, 131 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index b0a7910505ed..00efb4b96bc7 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -246,7 +246,7 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
>  #endif
>  
> -#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
> +#if defined(KVM_ARCH_WANT_MMU_NOTIFIER) || defined(CONFIG_MEMFILE_NOTIFIER)
>  struct kvm_gfn_range {
>  	struct kvm_memory_slot *slot;
>  	gfn_t start;
> @@ -577,6 +577,7 @@ struct kvm_memory_slot {
>  	struct file *private_file;
>  	loff_t private_offset;
>  	struct memfile_notifier notifier;
> +	struct kvm *kvm;
>  };
>  
>  static inline bool kvm_slot_is_private(const struct kvm_memory_slot *slot)
> @@ -769,9 +770,13 @@ struct kvm {
>  	struct hlist_head irq_ack_notifier_list;
>  #endif
>  
> +#if (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) ||\
> +	defined(CONFIG_MEMFILE_NOTIFIER)
> +	unsigned long mmu_notifier_seq;
> +#endif
> +
>  #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
>  	struct mmu_notifier mmu_notifier;
> -	unsigned long mmu_notifier_seq;
>  	long mmu_notifier_count;
>  	unsigned long mmu_notifier_range_start;
>  	unsigned long mmu_notifier_range_end;
> @@ -1438,6 +1443,7 @@ bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
>  int kvm_arch_post_init_vm(struct kvm *kvm);
>  void kvm_arch_pre_destroy_vm(struct kvm *kvm);
>  int kvm_arch_create_vm_debugfs(struct kvm *kvm);
> +bool kvm_arch_private_mem_supported(struct kvm *kvm);
>  
>  #ifndef __KVM_HAVE_ARCH_VM_ALLOC
>  /*
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index db9d39a2d3a6..f93ac7cdfb53 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -843,6 +843,73 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
>  
>  #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
>  
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +static void kvm_private_mem_notifier_handler(struct memfile_notifier *notifier,
> +					     pgoff_t start, pgoff_t end)
> +{
> +	int idx;
> +	struct kvm_memory_slot *slot = container_of(notifier,
> +						    struct kvm_memory_slot,
> +						    notifier);
> +	struct kvm_gfn_range gfn_range = {
> +		.slot		= slot,
> +		.start		= start - (slot->private_offset >> PAGE_SHIFT),
> +		.end		= end - (slot->private_offset >> PAGE_SHIFT),

This code assumes that 'end' is greater than slot->private_offset, but
even if slot->private_offset is non-zero, nothing stops userspace from
allocating pages in the range of 0 through slot->private_offset, which
will still end up triggering this notifier. In that case gfn_range.end
will end up going negative, and the below code will limit that to
slot->npages and do a populate/invalidate for the entire range.

Not sure if this covers all the cases, but this fixes the issue for me:

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 903ffdb5f01c..4c744d8f7527 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -872,6 +872,19 @@ static void kvm_private_mem_notifier_handler(struct memfile_notifier *notifier,
                .may_block      = true,
        };

        struct kvm *kvm = slot->kvm;
+
+       if (slot->private_offset > end)
+               return;
+


  reply	other threads:[~2022-06-23 23:00 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-19 15:37 [PATCH v6 0/8] KVM: mm: fd-based approach for supporting KVM guest private memory Chao Peng
2022-05-19 15:37 ` [PATCH v6 1/8] mm: Introduce memfile_notifier Chao Peng
2022-05-19 15:37 ` [PATCH v6 2/8] mm/shmem: Support memfile_notifier Chao Peng
2022-05-19 15:37 ` [PATCH v6 3/8] mm/memfd: Introduce MFD_INACCESSIBLE flag Chao Peng
2022-05-31 19:15   ` Vishal Annapurve
2022-06-01 10:17     ` Chao Peng
2022-06-01 12:11       ` Gupta, Pankaj
2022-06-02 10:07         ` Chao Peng
2022-06-14 20:23           ` Sean Christopherson
2022-06-15  8:53             ` Chao Peng
2022-05-19 15:37 ` [PATCH v6 4/8] KVM: Extend the memslot to support fd-based private memory Chao Peng
2022-05-20 17:57   ` Andy Lutomirski
2022-05-20 18:31     ` Sean Christopherson
2022-05-22  4:03       ` Andy Lutomirski
2022-05-23 13:21       ` Chao Peng
2022-05-23 15:22         ` Sean Christopherson
2022-05-30 13:26           ` Chao Peng
2022-06-10 16:14             ` Sean Christopherson
2022-06-14  6:45               ` Chao Peng
2022-06-23 22:59       ` Michael Roth
2022-06-24  8:54         ` Chao Peng
2022-06-24 13:01           ` Michael Roth
2022-06-17 20:52   ` Sean Christopherson
2022-06-17 21:27     ` Sean Christopherson
2022-06-20 14:09       ` Chao Peng
2022-06-20 14:08     ` Chao Peng
2022-05-19 15:37 ` [PATCH v6 5/8] KVM: Add KVM_EXIT_MEMORY_FAULT exit Chao Peng
2022-05-19 15:37 ` [PATCH v6 6/8] KVM: Handle page fault for private memory Chao Peng
2022-06-17 21:30   ` Sean Christopherson
2022-06-20 14:16     ` Chao Peng
2022-08-19  0:40     ` Kirill A. Shutemov
2022-08-25 23:43       ` Sean Christopherson
2022-06-24  3:58   ` Nikunj A. Dadhania
2022-06-24  9:02     ` Chao Peng
2022-06-30 19:14       ` Vishal Annapurve
2022-06-30 22:21         ` Michael Roth
2022-07-01  1:21           ` Xiaoyao Li
2022-07-07 20:08             ` Sean Christopherson
2022-07-08  3:29               ` Xiaoyao Li
2022-07-20 23:08                 ` Vishal Annapurve
2022-07-21  9:45                   ` Chao Peng
2022-05-19 15:37 ` [PATCH v6 7/8] KVM: Enable and expose KVM_MEM_PRIVATE Chao Peng
2022-06-23 22:07   ` Michael Roth [this message]
2022-06-24  8:43     ` Chao Peng
2022-05-19 15:37 ` [PATCH v6 8/8] memfd_create.2: Describe MFD_INACCESSIBLE flag Chao Peng
2022-06-06 20:09 ` [PATCH v6 0/8] KVM: mm: fd-based approach for supporting KVM guest private memory Vishal Annapurve
2022-06-07  6:57   ` Chao Peng
2022-06-08  0:55     ` Marc Orr
2022-06-08  2:18       ` Chao Peng
2022-06-08 19:37         ` Vishal Annapurve
2022-06-09 20:29           ` Sean Christopherson
2022-06-14  7:28             ` Chao Peng
2022-06-14 17:37               ` Andy Lutomirski
2022-06-14 19:08                 ` Sean Christopherson
2022-06-14 20:59                   ` Andy Lutomirski
2022-06-15  9:17                     ` Chao Peng
2022-06-15 14:29                       ` Sean Christopherson
2022-06-10  0:11         ` Marc Orr

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220623220751.emt3iqq77faxfzzy@amd.com \
    --to=michael.roth@amd.com \
    --cc=aarcange@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=bfields@fieldses.org \
    --cc=bp@alien8.de \
    --cc=chao.p.peng@linux.intel.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@intel.com \
    --cc=david@redhat.com \
    --cc=ddutile@redhat.com \
    --cc=dhildenb@redhat.com \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jlayton@kernel.org \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=jun.nakajima@intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mail@maciej.szmigiero.name \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qperret@google.com \
    --cc=rppt@kernel.org \
    --cc=seanjc@google.com \
    --cc=steven.price@arm.com \
    --cc=tglx@linutronix.de \
    --cc=vannapurve@google.com \
    --cc=vbabka@suse.cz \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=x86@kernel.org \
    --cc=yu.c.zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.