All of lore.kernel.org
 help / color / mirror / Atom feed
From: Fuad Tabba <tabba@google.com>
To: Chao Peng <chao.p.peng@linux.intel.com>
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
	linux-api@vger.kernel.org, linux-doc@vger.kernel.org,
	qemu-devel@nongnu.org, Paolo Bonzini <pbonzini@redhat.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>,
	Hugh Dickins <hughd@google.com>, Jeff Layton <jlayton@kernel.org>,
	"J . Bruce Fields" <bfields@fieldses.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Shuah Khan <shuah@kernel.org>, Mike Rapoport <rppt@kernel.org>,
	Steven Price <steven.price@arm.com>,
	"Maciej S . Szmigiero" <mail@maciej.szmigiero.name>,
	Vlastimil Babka <vbabka@suse.cz>,
	Vishal Annapurve <vannapurve@google.com>,
	Yu Zhang <yu.c.zhang@linux.intel.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	luto@kernel.org, jun.nakajima@intel.com, dave.hansen@intel.com,
	ak@linux.intel.com, david@redhat.com, aarcange@redhat.com,
	ddutile@redhat.com, dhildenb@redhat.com,
	Quentin Perret <qperret@google.com>,
	Michael Roth <michael.roth@amd.com>,
	mhocko@suse.com, Muchun Song <songmuchun@bytedance.com>,
	wei.w.wang@intel.com
Subject: Re: [PATCH v8 8/8] KVM: Enable and expose KVM_MEM_PRIVATE
Date: Thu, 6 Oct 2022 09:55:31 +0100	[thread overview]
Message-ID: <CA+EHjTwXPrHYb2us7+vrdS9jwYXv3j5UniG0bpb6dKgV77A=8A@mail.gmail.com> (raw)
In-Reply-To: <20220915142913.2213336-9-chao.p.peng@linux.intel.com>

Hi,

On Thu, Sep 15, 2022 at 3:37 PM Chao Peng <chao.p.peng@linux.intel.com> wrote:
>
> Expose KVM_MEM_PRIVATE and memslot fields private_fd/offset to
> userspace. KVM will register/unregister private memslot to fd-based
> memory backing store and response to invalidation event from
> inaccessible_notifier to zap the existing memory mappings in the
> secondary page table.
>
> Whether KVM_MEM_PRIVATE is actually exposed to userspace is determined
> by architecture code which can turn on it by overriding the default
> kvm_arch_has_private_mem().
>
> A 'kvm' reference is added in memslot structure since in
> inaccessible_notifier callback we can only obtain a memslot reference
> but 'kvm' is needed to do the zapping.
>
> Co-developed-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
>  include/linux/kvm_host.h |   1 +
>  virt/kvm/kvm_main.c      | 116 +++++++++++++++++++++++++++++++++++++--
>  2 files changed, 111 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index b9906cdf468b..cb4eefac709c 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -589,6 +589,7 @@ struct kvm_memory_slot {
>         struct file *private_file;
>         loff_t private_offset;
>         struct inaccessible_notifier notifier;
> +       struct kvm *kvm;
>  };
>
>  static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 97d893f7482c..87e239d35b96 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -983,6 +983,57 @@ static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
>                 xa_erase(&kvm->mem_attr_array, index);
>         return r;
>  }
> +
> +static void kvm_private_notifier_invalidate(struct inaccessible_notifier *notifier,
> +                                           pgoff_t start, pgoff_t end)
> +{
> +       struct kvm_memory_slot *slot = container_of(notifier,
> +                                                   struct kvm_memory_slot,
> +                                                   notifier);
> +       unsigned long base_pgoff = slot->private_offset >> PAGE_SHIFT;
> +       gfn_t start_gfn = slot->base_gfn;
> +       gfn_t end_gfn = slot->base_gfn + slot->npages;
> +
> +
> +       if (start > base_pgoff)
> +               start_gfn = slot->base_gfn + start - base_pgoff;
> +
> +       if (end < base_pgoff + slot->npages)
> +               end_gfn = slot->base_gfn + end - base_pgoff;
> +
> +       if (start_gfn >= end_gfn)
> +               return;
> +
> +       kvm_zap_gfn_range(slot->kvm, start_gfn, end_gfn);
> +}
> +
> +static struct inaccessible_notifier_ops kvm_private_notifier_ops = {
> +       .invalidate = kvm_private_notifier_invalidate,
> +};
> +
> +static inline void kvm_private_mem_register(struct kvm_memory_slot *slot)
> +{
> +       slot->notifier.ops = &kvm_private_notifier_ops;
> +       inaccessible_register_notifier(slot->private_file, &slot->notifier);
> +}
> +
> +static inline void kvm_private_mem_unregister(struct kvm_memory_slot *slot)
> +{
> +       inaccessible_unregister_notifier(slot->private_file, &slot->notifier);
> +}
> +
> +#else /* !CONFIG_HAVE_KVM_PRIVATE_MEM */
> +
> +static inline void kvm_private_mem_register(struct kvm_memory_slot *slot)
> +{
> +       WARN_ON_ONCE(1);
> +}
> +
> +static inline void kvm_private_mem_unregister(struct kvm_memory_slot *slot)
> +{
> +       WARN_ON_ONCE(1);
> +}
> +
>  #endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
>
>  #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
> @@ -1029,6 +1080,11 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
>  /* This does not remove the slot from struct kvm_memslots data structures */
>  static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
>  {
> +       if (slot->flags & KVM_MEM_PRIVATE) {
> +               kvm_private_mem_unregister(slot);
> +               fput(slot->private_file);
> +       }
> +
>         kvm_destroy_dirty_bitmap(slot);
>
>         kvm_arch_free_memslot(kvm, slot);
> @@ -1600,10 +1656,16 @@ bool __weak kvm_arch_has_private_mem(struct kvm *kvm)
>         return false;
>  }
>
> -static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
> +static int check_memory_region_flags(struct kvm *kvm,
> +                                    const struct kvm_user_mem_region *mem)
>  {
>         u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
>
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +       if (kvm_arch_has_private_mem(kvm))
> +               valid_flags |= KVM_MEM_PRIVATE;
> +#endif
> +
>  #ifdef __KVM_HAVE_READONLY_MEM
>         valid_flags |= KVM_MEM_READONLY;
>  #endif
> @@ -1679,6 +1741,9 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
>  {
>         int r;
>
> +       if (change == KVM_MR_CREATE && new->flags & KVM_MEM_PRIVATE)
> +               kvm_private_mem_register(new);
> +

From the discussion I had with Kirill in the first patch *, should
this check that the private_fd is inaccessible?

[*] https://lore.kernel.org/all/20221003110129.bbee7kawhw5ed745@box.shutemov.name/

Cheers,
/fuad

>         /*
>          * If dirty logging is disabled, nullify the bitmap; the old bitmap
>          * will be freed on "commit".  If logging is enabled in both old and
> @@ -1707,6 +1772,9 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
>         if (r && new && new->dirty_bitmap && (!old || !old->dirty_bitmap))
>                 kvm_destroy_dirty_bitmap(new);
>
> +       if (r && change == KVM_MR_CREATE && new->flags & KVM_MEM_PRIVATE)
> +               kvm_private_mem_unregister(new);
> +
>         return r;
>  }
>
> @@ -2004,7 +2072,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
>         int as_id, id;
>         int r;
>
> -       r = check_memory_region_flags(mem);
> +       r = check_memory_region_flags(kvm, mem);
>         if (r)
>                 return r;
>
> @@ -2023,6 +2091,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
>              !access_ok((void __user *)(unsigned long)mem->userspace_addr,
>                         mem->memory_size))
>                 return -EINVAL;
> +       if (mem->flags & KVM_MEM_PRIVATE &&
> +               (mem->private_offset & (PAGE_SIZE - 1) ||
> +                mem->private_offset > U64_MAX - mem->memory_size))
> +               return -EINVAL;
>         if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
>                 return -EINVAL;
>         if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
> @@ -2061,6 +2133,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
>                 if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
>                         return -EINVAL;
>         } else { /* Modify an existing slot. */
> +               /* Private memslots are immutable, they can only be deleted. */
> +               if (mem->flags & KVM_MEM_PRIVATE)
> +                       return -EINVAL;
>                 if ((mem->userspace_addr != old->userspace_addr) ||
>                     (npages != old->npages) ||
>                     ((mem->flags ^ old->flags) & KVM_MEM_READONLY))
> @@ -2089,10 +2164,27 @@ int __kvm_set_memory_region(struct kvm *kvm,
>         new->npages = npages;
>         new->flags = mem->flags;
>         new->userspace_addr = mem->userspace_addr;
> +       if (mem->flags & KVM_MEM_PRIVATE) {
> +               new->private_file = fget(mem->private_fd);
> +               if (!new->private_file) {
> +                       r = -EINVAL;
> +                       goto out;
> +               }
> +               new->private_offset = mem->private_offset;
> +       }
> +
> +       new->kvm = kvm;
>
>         r = kvm_set_memslot(kvm, old, new, change);
>         if (r)
> -               kfree(new);
> +               goto out;
> +
> +       return 0;
> +
> +out:
> +       if (new->private_file)
> +               fput(new->private_file);
> +       kfree(new);
>         return r;
>  }
>  EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
> @@ -4747,16 +4839,28 @@ static long kvm_vm_ioctl(struct file *filp,
>         }
>         case KVM_SET_USER_MEMORY_REGION: {
>                 struct kvm_user_mem_region mem;
> -               unsigned long size = sizeof(struct kvm_userspace_memory_region);
> +               unsigned int flags_offset = offsetof(typeof(mem), flags);
> +               unsigned long size;
> +               u32 flags;
>
>                 kvm_sanity_check_user_mem_region_alias();
>
> +               memset(&mem, 0, sizeof(mem));
> +
>                 r = -EFAULT;
> -               if (copy_from_user(&mem, argp, size);
> +               if (get_user(flags, (u32 __user *)(argp + flags_offset)))
> +                       goto out;
> +
> +               if (flags & KVM_MEM_PRIVATE)
> +                       size = sizeof(struct kvm_userspace_memory_region_ext);
> +               else
> +                       size = sizeof(struct kvm_userspace_memory_region);
> +
> +               if (copy_from_user(&mem, argp, size))
>                         goto out;
>
>                 r = -EINVAL;
> -               if (mem.flags & KVM_MEM_PRIVATE)
> +               if ((flags ^ mem.flags) & KVM_MEM_PRIVATE)
>                         goto out;
>
>                 r = kvm_vm_ioctl_set_memory_region(kvm, &mem);
> --
> 2.25.1
>

  parent reply	other threads:[~2022-10-06  8:56 UTC|newest]

Thread overview: 97+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-15 14:29 [PATCH v8 0/8] KVM: mm: fd-based approach for supporting KVM Chao Peng
2022-09-15 14:29 ` [PATCH v8 1/8] mm/memfd: Introduce userspace inaccessible memfd Chao Peng
2022-09-19  9:12   ` David Hildenbrand
2022-09-19 19:10     ` Sean Christopherson
2022-09-21 21:10       ` Andy Lutomirski
2022-09-22 13:23         ` Wang, Wei W
2022-09-23 15:20         ` Fuad Tabba
2022-09-23 15:19       ` Fuad Tabba
2022-09-26 14:23         ` Chao Peng
2022-09-26 15:51           ` Fuad Tabba
2022-09-27 22:47             ` Sean Christopherson
2022-09-30 16:19               ` Fuad Tabba
2022-10-13 13:34                 ` Chao Peng
2022-10-17 10:31                   ` Fuad Tabba
2022-10-17 14:58                     ` Chao Peng
2022-10-17 19:05                       ` Fuad Tabba
2022-10-19 13:30                         ` Chao Peng
2022-10-18  0:33                 ` Sean Christopherson
2022-10-19 15:04                   ` Fuad Tabba
2022-09-23  0:58     ` Kirill A . Shutemov
2022-09-26 10:35       ` David Hildenbrand
2022-09-26 14:48         ` Kirill A. Shutemov
2022-09-26 14:53           ` David Hildenbrand
2022-09-27 23:23             ` Sean Christopherson
2022-09-28 13:36               ` Kirill A. Shutemov
2022-09-22 13:26   ` Wang, Wei W
2022-09-22 19:49     ` Sean Christopherson
2022-09-23  0:53       ` Kirill A . Shutemov
2022-09-23 15:20         ` Fuad Tabba
2022-09-30 16:14   ` Fuad Tabba
2022-09-30 16:23     ` Kirill A . Shutemov
2022-10-03  7:33       ` Fuad Tabba
2022-10-03 11:01         ` Kirill A. Shutemov
2022-10-04 15:39           ` Fuad Tabba
2022-10-06  8:50   ` Fuad Tabba
2022-10-06 13:04     ` Kirill A. Shutemov
2022-10-17 13:00   ` Vlastimil Babka
2022-10-17 16:19     ` Kirill A . Shutemov
2022-10-17 16:39       ` Gupta, Pankaj
2022-10-17 21:56         ` Kirill A . Shutemov
2022-10-18 13:42           ` Vishal Annapurve
2022-10-19 15:32             ` Kirill A . Shutemov
2022-10-20 10:50               ` Vishal Annapurve
2022-10-21 13:54                 ` Chao Peng
2022-10-21 16:53                   ` Sean Christopherson
2022-10-19 12:23   ` Vishal Annapurve
2022-10-21 13:47     ` Chao Peng
2022-10-21 16:18       ` Sean Christopherson
2022-10-24 14:59         ` Kirill A . Shutemov
2022-10-24 15:26           ` David Hildenbrand
2022-11-03 16:27           ` Vishal Annapurve
2022-09-15 14:29 ` [PATCH v8 2/8] KVM: Extend the memslot to support fd-based private memory Chao Peng
2022-09-16  9:14   ` Bagas Sanjaya
2022-09-16  9:53     ` Chao Peng
2022-09-26 10:26   ` Fuad Tabba
2022-09-26 14:04     ` Chao Peng
2022-09-29 22:45   ` Isaku Yamahata
2022-09-29 23:22     ` Sean Christopherson
2022-10-05 13:04   ` Jarkko Sakkinen
2022-10-05 22:05     ` Jarkko Sakkinen
2022-10-06  9:00   ` Fuad Tabba
2022-10-06 14:58   ` Jarkko Sakkinen
2022-10-06 15:07     ` Jarkko Sakkinen
2022-10-06 15:34       ` Sean Christopherson
2022-10-07 11:14         ` Jarkko Sakkinen
2022-10-07 14:58           ` Sean Christopherson
2022-10-07 21:54             ` Jarkko Sakkinen
2022-10-08 16:15               ` Jarkko Sakkinen
2022-10-08 17:35                 ` Jarkko Sakkinen
2022-10-10  8:25                   ` Chao Peng
2022-10-12  8:14                     ` Jarkko Sakkinen
2022-09-15 14:29 ` [PATCH v8 3/8] KVM: Add KVM_EXIT_MEMORY_FAULT exit Chao Peng
2022-09-16  9:17   ` Bagas Sanjaya
2022-09-16  9:54     ` Chao Peng
2022-09-15 14:29 ` [PATCH v8 4/8] KVM: Use gfn instead of hva for mmu_notifier_retry Chao Peng
2022-09-15 14:29 ` [PATCH v8 5/8] KVM: Register/unregister the guest private memory regions Chao Peng
2022-09-26 10:36   ` Fuad Tabba
2022-09-26 14:07     ` Chao Peng
2022-10-11  9:48   ` Fuad Tabba
2022-10-12  2:35     ` Chao Peng
2022-10-17 10:15       ` Fuad Tabba
2022-10-17 22:17         ` Sean Christopherson
2022-10-19 13:23           ` Chao Peng
2022-10-19 15:02             ` Fuad Tabba
2022-10-19 16:09               ` Sean Christopherson
2022-10-19 18:32                 ` Fuad Tabba
2022-09-15 14:29 ` [PATCH v8 6/8] KVM: Update lpage info when private/shared memory are mixed Chao Peng
2022-09-29 16:52   ` Isaku Yamahata
2022-09-30  8:59     ` Chao Peng
2022-09-15 14:29 ` [PATCH v8 7/8] KVM: Handle page fault for private memory Chao Peng
2022-10-14 18:57   ` Sean Christopherson
2022-10-17 14:48     ` Chao Peng
2022-09-15 14:29 ` [PATCH v8 8/8] KVM: Enable and expose KVM_MEM_PRIVATE Chao Peng
2022-10-04 14:55   ` Jarkko Sakkinen
2022-10-10  8:31     ` Chao Peng
2022-10-06  8:55   ` Fuad Tabba [this message]
2022-10-10  8:33     ` Chao Peng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CA+EHjTwXPrHYb2us7+vrdS9jwYXv3j5UniG0bpb6dKgV77A=8A@mail.gmail.com' \
    --to=tabba@google.com \
    --cc=aarcange@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=bfields@fieldses.org \
    --cc=bp@alien8.de \
    --cc=chao.p.peng@linux.intel.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@intel.com \
    --cc=david@redhat.com \
    --cc=ddutile@redhat.com \
    --cc=dhildenb@redhat.com \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jlayton@kernel.org \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=jun.nakajima@intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mail@maciej.szmigiero.name \
    --cc=mhocko@suse.com \
    --cc=michael.roth@amd.com \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qperret@google.com \
    --cc=rppt@kernel.org \
    --cc=seanjc@google.com \
    --cc=shuah@kernel.org \
    --cc=songmuchun@bytedance.com \
    --cc=steven.price@arm.com \
    --cc=tglx@linutronix.de \
    --cc=vannapurve@google.com \
    --cc=vbabka@suse.cz \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=wei.w.wang@intel.com \
    --cc=x86@kernel.org \
    --cc=yu.c.zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.