linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Isaku Yamahata <isaku.yamahata@gmail.com>
To: Yuan Yao <yuan.yao@linux.intel.com>
Cc: Sean Christopherson <seanjc@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Marc Zyngier <maz@kernel.org>,
	Oliver Upton <oliver.upton@linux.dev>,
	Huacai Chen <chenhuacai@kernel.org>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Anup Patel <anup@brainfault.org>,
	Paul Walmsley <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Albert Ou <aou@eecs.berkeley.edu>,
	"Matthew Wilcox (Oracle)" <willy@infradead.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Paul Moore <paul@paul-moore.com>,
	James Morris <jmorris@namei.org>,
	"Serge E. Hallyn" <serge@hallyn.com>,
	kvm@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
	kvmarm@lists.linux.dev, linux-mips@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org, kvm-riscv@lists.infradead.org,
	linux-riscv@lists.infradead.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, linux-security-module@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	Chao Peng <chao.p.peng@linux.intel.com>,
	Fuad Tabba <tabba@google.com>,
	Jarkko Sakkinen <jarkko@kernel.org>,
	Yu Zhang <yu.c.zhang@linux.intel.com>,
	Vishal Annapurve <vannapurve@google.com>,
	Ackerley Tng <ackerleytng@google.com>,
	Maciej Szmigiero <mail@maciej.szmigiero.name>,
	Vlastimil Babka <vbabka@suse.cz>,
	David Hildenbrand <david@redhat.com>,
	Quentin Perret <qperret@google.com>,
	Michael Roth <michael.roth@amd.com>, Wang <wei.w.wang@intel.com>,
	Liam Merwick <liam.merwick@oracle.com>,
	Isaku Yamahata <isaku.yamahata@gmail.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Subject: Re: [RFC PATCH v11 12/29] KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory
Date: Fri, 21 Jul 2023 15:27:04 -0700	[thread overview]
Message-ID: <20230721222704.GJ25699@ls.amr.corp.intel.com> (raw)
In-Reply-To: <20230721061314.3ls6stdawz53drv3@yy-desk-7060>

On Fri, Jul 21, 2023 at 02:13:14PM +0800,
Yuan Yao <yuan.yao@linux.intel.com> wrote:

> On Tue, Jul 18, 2023 at 04:44:55PM -0700, Sean Christopherson wrote:
> > TODO
> >
> > Cc: Fuad Tabba <tabba@google.com>
> > Cc: Vishal Annapurve <vannapurve@google.com>
> > Cc: Ackerley Tng <ackerleytng@google.com>
> > Cc: Jarkko Sakkinen <jarkko@kernel.org>
> > Cc: Maciej Szmigiero <mail@maciej.szmigiero.name>
> > Cc: Vlastimil Babka <vbabka@suse.cz>
> > Cc: David Hildenbrand <david@redhat.com>
> > Cc: Quentin Perret <qperret@google.com>
> > Cc: Michael Roth <michael.roth@amd.com>
> > Cc: Wang <wei.w.wang@intel.com>
> > Cc: Liam Merwick <liam.merwick@oracle.com>
> > Cc: Isaku Yamahata <isaku.yamahata@gmail.com>
> > Co-developed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > Co-developed-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> > Signed-off-by: Yu Zhang <yu.c.zhang@linux.intel.com>
> > Co-developed-by: Chao Peng <chao.p.peng@linux.intel.com>
> > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> > Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> >  include/linux/kvm_host.h   |  48 +++
> >  include/uapi/linux/kvm.h   |  14 +-
> >  include/uapi/linux/magic.h |   1 +
> >  virt/kvm/Kconfig           |   4 +
> >  virt/kvm/Makefile.kvm      |   1 +
> >  virt/kvm/guest_mem.c       | 591 +++++++++++++++++++++++++++++++++++++
> >  virt/kvm/kvm_main.c        |  58 +++-
> >  virt/kvm/kvm_mm.h          |  38 +++
> >  8 files changed, 750 insertions(+), 5 deletions(-)
> >  create mode 100644 virt/kvm/guest_mem.c
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 97db63da6227..0d1e2ee8ae7a 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -592,8 +592,20 @@ struct kvm_memory_slot {
> >  	u32 flags;
> >  	short id;
> >  	u16 as_id;
> > +
> > +#ifdef CONFIG_KVM_PRIVATE_MEM
> > +	struct {
> > +		struct file __rcu *file;
> > +		pgoff_t pgoff;
> > +	} gmem;
> > +#endif
> >  };
> >
> > +static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot)
> > +{
> > +	return slot && (slot->flags & KVM_MEM_PRIVATE);
> > +}
> > +
> >  static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
> >  {
> >  	return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
> > @@ -688,6 +700,17 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
> >  }
> >  #endif
> >
> > +/*
> > + * Arch code must define kvm_arch_has_private_mem if support for private memory
> > + * is enabled.
> > + */
> > +#if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> > +static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> > +{
> > +	return false;
> > +}
> > +#endif
> > +
> >  struct kvm_memslots {
> >  	u64 generation;
> >  	atomic_long_t last_used_slot;
> > @@ -1380,6 +1403,7 @@ void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
> >  void kvm_mmu_invalidate_begin(struct kvm *kvm);
> >  void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end);
> >  void kvm_mmu_invalidate_end(struct kvm *kvm);
> > +bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
> >
> >  long kvm_arch_dev_ioctl(struct file *filp,
> >  			unsigned int ioctl, unsigned long arg);
> > @@ -2313,6 +2337,30 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn
> >
> >  bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
> >  					 struct kvm_gfn_range *range);
> > +
> > +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> > +{
> > +	return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) &&
> > +	       kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE;
> > +}
> > +#else
> > +static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> > +{
> > +	return false;
> > +}
> >  #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
> >
> > +#ifdef CONFIG_KVM_PRIVATE_MEM
> > +int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
> > +			      gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
> > +#else
> > +static inline int kvm_gmem_get_pfn(struct kvm *kvm,
> > +				   struct kvm_memory_slot *slot, gfn_t gfn,
> > +				   kvm_pfn_t *pfn, int *max_order)
> > +{
> > +	KVM_BUG_ON(1, kvm);
> > +	return -EIO;
> > +}
> > +#endif /* CONFIG_KVM_PRIVATE_MEM */
> > +
> >  #endif
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index f065c57db327..9b344fc98598 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -102,7 +102,10 @@ struct kvm_userspace_memory_region2 {
> >  	__u64 guest_phys_addr;
> >  	__u64 memory_size;
> >  	__u64 userspace_addr;
> > -	__u64 pad[16];
> > +	__u64 gmem_offset;
> > +	__u32 gmem_fd;
> > +	__u32 pad1;
> > +	__u64 pad2[14];
> >  };
> >
> >  /*
> > @@ -112,6 +115,7 @@ struct kvm_userspace_memory_region2 {
> >   */
> >  #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
> >  #define KVM_MEM_READONLY	(1UL << 1)
> > +#define KVM_MEM_PRIVATE		(1UL << 2)
> >
> >  /* for KVM_IRQ_LINE */
> >  struct kvm_irq_level {
> > @@ -2284,4 +2288,12 @@ struct kvm_memory_attributes {
> >
> >  #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
> >
> > +#define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
> > +
> > +struct kvm_create_guest_memfd {
> > +	__u64 size;
> > +	__u64 flags;
> > +	__u64 reserved[6];
> > +};
> > +
> >  #endif /* __LINUX_KVM_H */
> > diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> > index 6325d1d0e90f..15041aa7d9ae 100644
> > --- a/include/uapi/linux/magic.h
> > +++ b/include/uapi/linux/magic.h
> > @@ -101,5 +101,6 @@
> >  #define DMA_BUF_MAGIC		0x444d4142	/* "DMAB" */
> >  #define DEVMEM_MAGIC		0x454d444d	/* "DMEM" */
> >  #define SECRETMEM_MAGIC		0x5345434d	/* "SECM" */
> > +#define GUEST_MEMORY_MAGIC	0x474d454d	/* "GMEM" */
> >
> >  #endif /* __LINUX_MAGIC_H__ */
> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> > index 8375bc49f97d..3ee3205e0b39 100644
> > --- a/virt/kvm/Kconfig
> > +++ b/virt/kvm/Kconfig
> > @@ -103,3 +103,7 @@ config KVM_GENERIC_MMU_NOTIFIER
> >  config KVM_GENERIC_MEMORY_ATTRIBUTES
> >         select KVM_GENERIC_MMU_NOTIFIER
> >         bool
> > +
> > +config KVM_PRIVATE_MEM
> > +       select XARRAY_MULTI
> > +       bool
> > diff --git a/virt/kvm/Makefile.kvm b/virt/kvm/Makefile.kvm
> > index 2c27d5d0c367..a5a61bbe7f4c 100644
> > --- a/virt/kvm/Makefile.kvm
> > +++ b/virt/kvm/Makefile.kvm
> > @@ -12,3 +12,4 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
> >  kvm-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
> >  kvm-$(CONFIG_HAVE_KVM_DIRTY_RING) += $(KVM)/dirty_ring.o
> >  kvm-$(CONFIG_HAVE_KVM_PFNCACHE) += $(KVM)/pfncache.o
> > +kvm-$(CONFIG_KVM_PRIVATE_MEM) += $(KVM)/guest_mem.o
> > diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c
> > new file mode 100644
> > index 000000000000..1b705fd63fa8
> > --- /dev/null
> > +++ b/virt/kvm/guest_mem.c
> > @@ -0,0 +1,591 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <linux/backing-dev.h>
> > +#include <linux/falloc.h>
> > +#include <linux/kvm_host.h>
> > +#include <linux/pagemap.h>
> > +#include <linux/pseudo_fs.h>
> > +
> > +#include <uapi/linux/magic.h>
> > +
> > +#include "kvm_mm.h"
> > +
> > +static struct vfsmount *kvm_gmem_mnt;
> > +
> > +struct kvm_gmem {
> > +	struct kvm *kvm;
> > +	struct xarray bindings;
> > +	struct list_head entry;
> > +};
> > +
> > +static struct folio *kvm_gmem_get_folio(struct file *file, pgoff_t index)
> > +{
> > +	struct folio *folio;
> > +
> > +	/* TODO: Support huge pages. */
> > +	folio = filemap_grab_folio(file->f_mapping, index);
> > +	if (!folio)
> > +		return NULL;
> > +
> > +	/*
> > +	 * Use the up-to-date flag to track whether or not the memory has been
> > +	 * zeroed before being handed off to the guest.  There is no backing
> > +	 * storage for the memory, so the folio will remain up-to-date until
> > +	 * it's removed.
> > +	 *
> > +	 * TODO: Skip clearing pages when trusted firmware will do it when
> > +	 * assigning memory to the guest.
> > +	 */
> > +	if (!folio_test_uptodate(folio)) {
> > +		unsigned long nr_pages = folio_nr_pages(folio);
> > +		unsigned long i;
> > +
> > +		for (i = 0; i < nr_pages; i++)
> > +			clear_highpage(folio_page(folio, i));
> > +
> > +		folio_mark_uptodate(folio);
> > +	}
> > +
> > +	/*
> > +	 * Ignore accessed, referenced, and dirty flags.  The memory is
> > +	 * unevictable and there is no storage to write back to.
> > +	 */
> > +	return folio;
> > +}
> > +
> > +static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
> > +				      pgoff_t end)
> > +{
> > +	struct kvm_memory_slot *slot;
> > +	struct kvm *kvm = gmem->kvm;
> > +	unsigned long index;
> > +	bool flush = false;
> > +
> > +	KVM_MMU_LOCK(kvm);
> > +
> > +	kvm_mmu_invalidate_begin(kvm);
> > +
> > +	xa_for_each_range(&gmem->bindings, index, slot, start, end - 1) {
> > +		pgoff_t pgoff = slot->gmem.pgoff;
> > +
> > +		struct kvm_gfn_range gfn_range = {
> > +			.start = slot->base_gfn + max(pgoff, start) - pgoff,
> > +			.end = slot->base_gfn + min(pgoff + slot->npages, end) - pgoff,
> > +			.slot = slot,
> > +			.may_block = true,
> > +		};
> > +
> > +		flush |= kvm_mmu_unmap_gfn_range(kvm, &gfn_range);
> > +	}
> > +
> > +	if (flush)
> > +		kvm_flush_remote_tlbs(kvm);
> > +
> > +	KVM_MMU_UNLOCK(kvm);
> > +}
> > +
> > +static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
> > +				    pgoff_t end)
> > +{
> > +	struct kvm *kvm = gmem->kvm;
> > +
> > +	KVM_MMU_LOCK(kvm);
> > +	if (xa_find(&gmem->bindings, &start, end - 1, XA_PRESENT))
> > +		kvm_mmu_invalidate_end(kvm);
> > +	KVM_MMU_UNLOCK(kvm);
> > +}
> > +
> > +static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
> > +{
> > +	struct list_head *gmem_list = &inode->i_mapping->private_list;
> > +	pgoff_t start = offset >> PAGE_SHIFT;
> > +	pgoff_t end = (offset + len) >> PAGE_SHIFT;
> > +	struct kvm_gmem *gmem;
> > +
> > +	/*
> > +	 * Bindings must stable across invalidation to ensure the start+end
> > +	 * are balanced.
> > +	 */
> > +	filemap_invalidate_lock(inode->i_mapping);
> > +
> > +	list_for_each_entry(gmem, gmem_list, entry)
> > +		kvm_gmem_invalidate_begin(gmem, start, end);
> > +
> > +	truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
> > +
> > +	list_for_each_entry(gmem, gmem_list, entry)
> > +		kvm_gmem_invalidate_end(gmem, start, end);
> > +
> > +	filemap_invalidate_unlock(inode->i_mapping);
> > +
> > +	return 0;
> > +}
> > +
> > +static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
> > +{
> > +	struct address_space *mapping = inode->i_mapping;
> > +	pgoff_t start, index, end;
> > +	int r;
> > +
> > +	/* Dedicated guest is immutable by default. */
> > +	if (offset + len > i_size_read(inode))
> > +		return -EINVAL;
> > +
> > +	filemap_invalidate_lock_shared(mapping);
> > +
> > +	start = offset >> PAGE_SHIFT;
> > +	end = (offset + len) >> PAGE_SHIFT;
> > +
> > +	r = 0;
> > +	for (index = start; index < end; ) {
> > +		struct folio *folio;
> > +
> > +		if (signal_pending(current)) {
> > +			r = -EINTR;
> > +			break;
> > +		}
> > +
> > +		folio = kvm_gmem_get_folio(inode, index);
> > +		if (!folio) {
> > +			r = -ENOMEM;
> > +			break;
> > +		}
> > +
> > +		index = folio_next_index(folio);
> > +
> > +		folio_unlock(folio);
> > +		folio_put(folio);
> > +
> > +		/* 64-bit only, wrapping the index should be impossible. */
> > +		if (WARN_ON_ONCE(!index))
> > +			break;
> > +
> > +		cond_resched();
> > +	}
> > +
> > +	filemap_invalidate_unlock_shared(mapping);
> > +
> > +	return r;
> > +}
> > +
> > +static long kvm_gmem_fallocate(struct file *file, int mode, loff_t offset,
> > +			       loff_t len)
> > +{
> > +	int ret;
> > +
> > +	if (!(mode & FALLOC_FL_KEEP_SIZE))
> > +		return -EOPNOTSUPP;
> > +
> > +	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
> > +		return -EOPNOTSUPP;
> > +
> > +	if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
> > +		return -EINVAL;
> > +
> > +	if (mode & FALLOC_FL_PUNCH_HOLE)
> > +		ret = kvm_gmem_punch_hole(file_inode(file), offset, len);
> > +	else
> > +		ret = kvm_gmem_allocate(file_inode(file), offset, len);
> > +
> > +	if (!ret)
> > +		file_modified(file);
> > +	return ret;
> > +}
> > +
> > +static int kvm_gmem_release(struct inode *inode, struct file *file)
> > +{
> > +	struct kvm_gmem *gmem = file->private_data;
> > +	struct kvm_memory_slot *slot;
> > +	struct kvm *kvm = gmem->kvm;
> > +	unsigned long index;
> > +
> > +	filemap_invalidate_lock(inode->i_mapping);
> > +
> > +	/*
> > +	 * Prevent concurrent attempts to *unbind* a memslot.  This is the last
> > +	 * reference to the file and thus no new bindings can be created, but
> > +	 * dereferencing the slot for existing bindings needs to be protected
> > +	 * against memslot updates, specifically so that unbind doesn't race
> > +	 * and free the memslot (kvm_gmem_get_file() will return NULL).
> > +	 */
> > +	mutex_lock(&kvm->slots_lock);
> > +
> > +	xa_for_each(&gmem->bindings, index, slot)
> > +		rcu_assign_pointer(slot->gmem.file, NULL);
> > +
> > +	synchronize_rcu();
> > +
> > +	/*
> > +	 * All in-flight operations are gone and new bindings can be created.
> > +	 * Zap all SPTEs pointed at by this file.  Do not free the backing
> > +	 * memory, as its lifetime is associated with the inode, not the file.
> > +	 */
> > +	kvm_gmem_invalidate_begin(gmem, 0, -1ul);
> > +	kvm_gmem_invalidate_end(gmem, 0, -1ul);
> > +
> > +	mutex_unlock(&kvm->slots_lock);
> > +
> > +	list_del(&gmem->entry);
> > +
> > +	filemap_invalidate_unlock(inode->i_mapping);
> > +
> > +	xa_destroy(&gmem->bindings);
> > +	kfree(gmem);
> > +
> > +	kvm_put_kvm(kvm);
> > +
> > +	return 0;
> > +}
> > +
> > +static struct file *kvm_gmem_get_file(struct kvm_memory_slot *slot)
> > +{
> > +	struct file *file;
> > +
> > +	rcu_read_lock();
> > +
> > +	file = rcu_dereference(slot->gmem.file);
> > +	if (file && !get_file_rcu(file))
> > +		file = NULL;
> > +
> > +	rcu_read_unlock();
> > +
> > +	return file;
> > +}
> > +
> > +static const struct file_operations kvm_gmem_fops = {
> > +	.open		= generic_file_open,
> > +	.release	= kvm_gmem_release,
> > +	.fallocate	= kvm_gmem_fallocate,
> > +};
> > +
> > +static int kvm_gmem_migrate_folio(struct address_space *mapping,
> > +				  struct folio *dst, struct folio *src,
> > +				  enum migrate_mode mode)
> > +{
> > +	WARN_ON_ONCE(1);
> > +	return -EINVAL;
> > +}
> > +
> > +static int kvm_gmem_error_page(struct address_space *mapping, struct page *page)
> > +{
> > +	struct list_head *gmem_list = &mapping->private_list;
> > +	struct kvm_memory_slot *slot;
> > +	struct kvm_gmem *gmem;
> > +	unsigned long index;
> > +	pgoff_t start, end;
> > +	gfn_t gfn;
> > +
> > +	filemap_invalidate_lock_shared(mapping);
> > +
> > +	start = page->index;
> > +	end = start + thp_nr_pages(page);
> > +
> > +	list_for_each_entry(gmem, gmem_list, entry) {
> > +		xa_for_each_range(&gmem->bindings, index, slot, start, end - 1) {
> > +			for (gfn = start; gfn < end; gfn++) {
> 
> Why the start end range used as gfn here ?
> 
> the page->index is offset of inode's page cache mapping and
> gmem address space, IIUC, gfn calculation should follow same
> way as kvm_gmem_invalidate_begin().

Also instead of sending signal multiple times, we can utilize lsb argument.
Something like this?

diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c
index a14eaac9dbad..8072ac901855 100644
--- a/virt/kvm/guest_mem.c
+++ b/virt/kvm/guest_mem.c
@@ -349,20 +349,35 @@ static int kvm_gmem_error_page(struct address_space *mapping, struct page *page)
        struct kvm_gmem *gmem;
        unsigned long index;
        pgoff_t start, end;
-       gfn_t gfn;
+       unsigned int order;
+       int nr_pages;
+       gfn_t gfn, gfn_end;
 
        filemap_invalidate_lock_shared(mapping);
 
        start = page->index;
        end = start + thp_nr_pages(page);
+       nr_pages = thp_nr_pages(page);
+       order = thp_order(page);
 
        list_for_each_entry(gmem, gmem_list, entry) {
                xa_for_each_range(&gmem->bindings, index, slot, start, end - 1) {
-                       for (gfn = start; gfn < end; gfn++) {
-                               if (WARN_ON_ONCE(gfn < slot->base_gfn ||
-                                               gfn >= slot->base_gfn + slot->npages))
-                                       continue;
+                       gfn = slot->base_gfn + page->index - slot->gmem.pgoff;
 
+                       if (page->index + nr_pages <= slot->gmem.pgoff + slot->npages &&
+                           !(gfn & ~((1ULL << order) - 1))) {
+                               /*
+                                * FIXME: Tell userspace that the *private*
+                                * memory encountered an error.
+                                */
+                               send_sig_mceerr(BUS_MCEERR_AR,
+                                               (void __user *)gfn_to_hva_memslot(slot, gfn),
+                                               order, current);
+                               break;
+                       }
+
+                       gfn_end = min(gfn + nr_pages, slot->base_gfn + slot->npages);
+                       for (; gfn < gfn_end; gfn++) {
                                /*
                                 * FIXME: Tell userspace that the *private*
                                 * memory encountered an error.

-- 
Isaku Yamahata <isaku.yamahata@gmail.com>

  reply	other threads:[~2023-07-21 22:27 UTC|newest]

Thread overview: 132+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-18 23:44 [RFC PATCH v11 00/29] KVM: guest_memfd() and per-page attributes Sean Christopherson
2023-07-18 23:44 ` [RFC PATCH v11 01/29] KVM: Wrap kvm_gfn_range.pte in a per-action union Sean Christopherson
2023-07-19 13:39   ` Jarkko Sakkinen
2023-07-19 15:39     ` Sean Christopherson
2023-07-19 16:55   ` Paolo Bonzini
2023-07-26 20:22     ` Sean Christopherson
2023-07-21  6:26   ` Yan Zhao
2023-07-21 10:45     ` Xu Yilun
2023-07-25 18:05       ` Sean Christopherson
2023-07-18 23:44 ` [RFC PATCH v11 02/29] KVM: Tweak kvm_hva_range and hva_handler_t to allow reusing for gfn ranges Sean Christopherson
2023-07-19 17:12   ` Paolo Bonzini
2023-07-18 23:44 ` [RFC PATCH v11 03/29] KVM: Use gfn instead of hva for mmu_notifier_retry Sean Christopherson
2023-07-19 17:12   ` Paolo Bonzini
2023-07-18 23:44 ` [RFC PATCH v11 04/29] KVM: PPC: Drop dead code related to KVM_ARCH_WANT_MMU_NOTIFIER Sean Christopherson
2023-07-19 17:34   ` Paolo Bonzini
2023-07-18 23:44 ` [RFC PATCH v11 05/29] KVM: Convert KVM_ARCH_WANT_MMU_NOTIFIER to CONFIG_KVM_GENERIC_MMU_NOTIFIER Sean Christopherson
2023-07-19  7:31   ` Yuan Yao
2023-07-19 14:15     ` Sean Christopherson
2023-07-20  1:15       ` Yuan Yao
2023-07-18 23:44 ` [RFC PATCH v11 06/29] KVM: Introduce KVM_SET_USER_MEMORY_REGION2 Sean Christopherson
2023-07-21  9:03   ` Paolo Bonzini
2023-07-28  9:25   ` Quentin Perret
2023-07-29  0:03     ` Sean Christopherson
2023-07-31  9:30       ` Quentin Perret
2023-07-31 15:58       ` Paolo Bonzini
2023-07-18 23:44 ` [RFC PATCH v11 07/29] KVM: Add KVM_EXIT_MEMORY_FAULT exit Sean Christopherson
2023-07-19  7:54   ` Yuan Yao
2023-07-19 14:16     ` Sean Christopherson
2023-07-18 23:44 ` [RFC PATCH v11 08/29] KVM: Introduce per-page memory attributes Sean Christopherson
2023-07-20  8:09   ` Yuan Yao
2023-07-20 19:02     ` Isaku Yamahata
2023-07-20 20:20       ` Sean Christopherson
2023-07-21 10:57   ` Paolo Bonzini
2023-07-21 15:56   ` Xiaoyao Li
2023-07-24  4:43   ` Xu Yilun
2023-07-26 15:59     ` Sean Christopherson
2023-07-27  3:24       ` Xu Yilun
2023-08-02 20:31   ` Isaku Yamahata
2023-08-14  0:44   ` Binbin Wu
2023-08-14 21:54     ` Sean Christopherson
2023-07-18 23:44 ` [RFC PATCH v11 09/29] KVM: x86: Disallow hugepages when memory attributes are mixed Sean Christopherson
2023-07-21 11:59   ` Paolo Bonzini
2023-07-21 17:41     ` Sean Christopherson
2023-07-18 23:44 ` [RFC PATCH v11 10/29] mm: Add AS_UNMOVABLE to mark mapping as completely unmovable Sean Christopherson
2023-07-25 10:24   ` Kirill A . Shutemov
2023-07-25 12:51     ` Matthew Wilcox
2023-07-26 11:36       ` Kirill A . Shutemov
2023-07-28 16:02       ` Vlastimil Babka
2023-07-28 16:13         ` Paolo Bonzini
2023-09-01  8:23       ` Vlastimil Babka
2023-07-18 23:44 ` [RFC PATCH v11 11/29] security: Export security_inode_init_security_anon() for use by KVM Sean Christopherson
2023-07-19  2:14   ` Paul Moore
2023-07-31 10:46   ` Vlastimil Babka
2023-07-18 23:44 ` [RFC PATCH v11 12/29] KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory Sean Christopherson
2023-07-19 17:21   ` Vishal Annapurve
2023-07-19 17:47     ` Sean Christopherson
2023-07-20 14:45   ` Xiaoyao Li
2023-07-20 15:14     ` Sean Christopherson
2023-07-20 21:28   ` Isaku Yamahata
2023-07-21  6:13   ` Yuan Yao
2023-07-21 22:27     ` Isaku Yamahata [this message]
2023-07-21 22:33       ` Sean Christopherson
2023-07-21 15:05   ` Xiaoyao Li
2023-07-21 15:42     ` Xiaoyao Li
2023-07-21 17:42       ` Sean Christopherson
2023-07-21 17:17   ` Paolo Bonzini
2023-07-21 17:50     ` Sean Christopherson
2023-07-25 15:09   ` Wang, Wei W
2023-07-25 16:03     ` Sean Christopherson
2023-07-26  1:51       ` Wang, Wei W
2023-07-31 16:23       ` Fuad Tabba
2023-07-26 17:18   ` Elliot Berman
2023-07-26 19:28     ` Sean Christopherson
2023-07-27 10:39   ` Fuad Tabba
2023-07-27 17:13     ` Sean Christopherson
2023-07-31 13:46       ` Fuad Tabba
2023-08-03 19:15   ` Ryan Afranji
2023-08-07 23:06   ` Ackerley Tng
2023-08-08 21:13     ` Sean Christopherson
2023-08-10 23:57       ` Vishal Annapurve
2023-08-11 17:44         ` Sean Christopherson
2023-08-15 18:43       ` Ackerley Tng
2023-08-15 20:03         ` Sean Christopherson
2023-08-21 17:30           ` Ackerley Tng
2023-08-21 19:33             ` Sean Christopherson
2023-08-28 22:56               ` Ackerley Tng
2023-08-29  2:53                 ` Elliot Berman
2023-09-14 19:12                   ` Sean Christopherson
2023-09-14 18:15                 ` Sean Christopherson
2023-09-14 23:19                   ` Ackerley Tng
2023-09-15  0:33                     ` Sean Christopherson
2023-08-30 15:12   ` Binbin Wu
2023-08-30 16:44     ` Ackerley Tng
2023-09-01  3:45       ` Binbin Wu
2023-09-01 16:46         ` Ackerley Tng
2023-07-18 23:44 ` [RFC PATCH v11 13/29] KVM: Add transparent hugepage support for dedicated guest memory Sean Christopherson
2023-07-21 15:07   ` Paolo Bonzini
2023-07-21 17:13     ` Sean Christopherson
2023-09-06 22:10       ` Paolo Bonzini
2023-07-18 23:44 ` [RFC PATCH v11 14/29] KVM: x86/mmu: Handle page fault for private memory Sean Christopherson
2023-07-21 15:09   ` Paolo Bonzini
2023-07-18 23:44 ` [RFC PATCH v11 15/29] KVM: Drop superfluous __KVM_VCPU_MULTIPLE_ADDRESS_SPACE macro Sean Christopherson
2023-07-21 15:07   ` Paolo Bonzini
2023-07-18 23:44 ` [RFC PATCH v11 16/29] KVM: Allow arch code to track number of memslot address spaces per VM Sean Christopherson
2023-07-21 15:12   ` Paolo Bonzini
2023-07-18 23:45 ` [RFC PATCH v11 17/29] KVM: x86: Add support for "protected VMs" that can utilize private memory Sean Christopherson
2023-07-18 23:45 ` [RFC PATCH v11 18/29] KVM: selftests: Drop unused kvm_userspace_memory_region_find() helper Sean Christopherson
2023-07-21 15:14   ` Paolo Bonzini
2023-07-18 23:45 ` [RFC PATCH v11 19/29] KVM: selftests: Convert lib's mem regions to KVM_SET_USER_MEMORY_REGION2 Sean Christopherson
2023-07-18 23:45 ` [RFC PATCH v11 20/29] KVM: selftests: Add support for creating private memslots Sean Christopherson
2023-07-18 23:45 ` [RFC PATCH v11 21/29] KVM: selftests: Add helpers to convert guest memory b/w private and shared Sean Christopherson
2023-07-18 23:45 ` [RFC PATCH v11 22/29] KVM: selftests: Add helpers to do KVM_HC_MAP_GPA_RANGE hypercalls (x86) Sean Christopherson
2023-07-18 23:45 ` [RFC PATCH v11 23/29] KVM: selftests: Introduce VM "shape" to allow tests to specify the VM type Sean Christopherson
2023-07-18 23:45 ` [RFC PATCH v11 24/29] KVM: selftests: Add GUEST_SYNC[1-6] macros for synchronizing more data Sean Christopherson
2023-07-18 23:45 ` [RFC PATCH v11 25/29] KVM: selftests: Add x86-only selftest for private memory conversions Sean Christopherson
2023-07-18 23:45 ` [RFC PATCH v11 26/29] KVM: selftests: Add KVM_SET_USER_MEMORY_REGION2 helper Sean Christopherson
2023-07-18 23:45 ` [RFC PATCH v11 27/29] KVM: selftests: Expand set_memory_region_test to validate guest_memfd() Sean Christopherson
2023-08-07 23:17   ` Ackerley Tng
2023-07-18 23:45 ` [RFC PATCH v11 28/29] KVM: selftests: Add basic selftest for guest_memfd() Sean Christopherson
2023-08-07 23:20   ` Ackerley Tng
2023-08-18 23:03     ` Sean Christopherson
2023-08-07 23:25   ` Ackerley Tng
2023-08-18 23:01     ` Sean Christopherson
2023-08-21 19:49       ` Ackerley Tng
2023-07-18 23:45 ` [RFC PATCH v11 29/29] KVM: selftests: Test KVM exit behavior for private memory/access Sean Christopherson
2023-07-24  6:38 ` [RFC PATCH v11 00/29] KVM: guest_memfd() and per-page attributes Nikunj A. Dadhania
2023-07-24 17:00   ` Sean Christopherson
2023-07-26 11:20     ` Nikunj A. Dadhania
2023-07-26 14:24       ` Sean Christopherson
2023-07-27  6:42         ` Nikunj A. Dadhania
2023-08-03 11:03       ` Vlastimil Babka
2023-07-24 20:16 ` Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230721222704.GJ25699@ls.amr.corp.intel.com \
    --to=isaku.yamahata@gmail.com \
    --cc=ackerleytng@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=anup@brainfault.org \
    --cc=aou@eecs.berkeley.edu \
    --cc=chao.p.peng@linux.intel.com \
    --cc=chenhuacai@kernel.org \
    --cc=david@redhat.com \
    --cc=jarkko@kernel.org \
    --cc=jmorris@namei.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kvm-riscv@lists.infradead.org \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.linux.dev \
    --cc=liam.merwick@oracle.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mips@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=linux-security-module@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mail@maciej.szmigiero.name \
    --cc=maz@kernel.org \
    --cc=michael.roth@amd.com \
    --cc=mpe@ellerman.id.au \
    --cc=oliver.upton@linux.dev \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=paul@paul-moore.com \
    --cc=pbonzini@redhat.com \
    --cc=qperret@google.com \
    --cc=seanjc@google.com \
    --cc=serge@hallyn.com \
    --cc=tabba@google.com \
    --cc=vannapurve@google.com \
    --cc=vbabka@suse.cz \
    --cc=wei.w.wang@intel.com \
    --cc=willy@infradead.org \
    --cc=yu.c.zhang@linux.intel.com \
    --cc=yuan.yao@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).