linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Chao Peng <chao.p.peng@linux.intel.com>
To: "Alex Bennée" <alex.bennee@linaro.org>
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, linux-fsdevel@vger.kernel.org,
	linux-arch@vger.kernel.org, linux-api@vger.kernel.org,
	linux-doc@vger.kernel.org, qemu-devel@nongnu.org,
	Paolo Bonzini <pbonzini@redhat.com>,
	Jonathan Corbet <corbet@lwn.net>,
	Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>,
	Hugh Dickins <hughd@google.com>, Jeff Layton <jlayton@kernel.org>,
	"J . Bruce Fields" <bfields@fieldses.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Shuah Khan <shuah@kernel.org>, Mike Rapoport <rppt@kernel.org>,
	Steven Price <steven.price@arm.com>,
	"Maciej S . Szmigiero" <mail@maciej.szmigiero.name>,
	Vlastimil Babka <vbabka@suse.cz>,
	Vishal Annapurve <vannapurve@google.com>,
	Yu Zhang <yu.c.zhang@linux.intel.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	luto@kernel.org, jun.nakajima@intel.com, dave.hansen@intel.com,
	ak@linux.intel.com, david@redhat.com, aarcange@redhat.com,
	ddutile@redhat.com, dhildenb@redhat.com,
	Quentin Perret <qperret@google.com>,
	tabba@google.com, Michael Roth <michael.roth@amd.com>,
	mhocko@suse.com, Muchun Song <songmuchun@bytedance.com>,
	wei.w.wang@intel.com
Subject: Re: [PATCH v9 2/8] KVM: Extend the memslot to support fd-based private memory
Date: Tue, 15 Nov 2022 17:29:06 +0800	[thread overview]
Message-ID: <20221115092906.GA338422@chaop.bj.intel.com> (raw)
In-Reply-To: <877czxbjf6.fsf@linaro.org>

On Mon, Nov 14, 2022 at 04:04:59PM +0000, Alex Bennée wrote:
> 
> Chao Peng <chao.p.peng@linux.intel.com> writes:
> 
> > In memory encryption usage, guest memory may be encrypted with special
> > key and can be accessed only by the guest itself. We call such memory
> > private memory. It's valueless and sometimes can cause problem to allow
> > userspace to access guest private memory. This new KVM memslot extension
> > allows guest private memory being provided though a restrictedmem
> > backed file descriptor(fd) and userspace is restricted to access the
> > bookmarked memory in the fd.
> >
> <snip>
> > To make code maintenance easy, internally we use a binary compatible
> > alias struct kvm_user_mem_region to handle both the normal and the
> > '_ext' variants.
> 
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 0d5d4419139a..f1ae45c10c94 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -103,6 +103,33 @@ struct kvm_userspace_memory_region {
> >  	__u64 userspace_addr; /* start of the userspace allocated memory */
> >  };
> >  
> > +struct kvm_userspace_memory_region_ext {
> > +	struct kvm_userspace_memory_region region;
> > +	__u64 restricted_offset;
> > +	__u32 restricted_fd;
> > +	__u32 pad1;
> > +	__u64 pad2[14];
> > +};
> > +
> > +#ifdef __KERNEL__
> > +/*
> > + * kvm_user_mem_region is a kernel-only alias of kvm_userspace_memory_region_ext
> > + * that "unpacks" kvm_userspace_memory_region so that KVM can directly access
> > + * all fields from the top-level "extended" region.
> > + */
> > +struct kvm_user_mem_region {
> > +	__u32 slot;
> > +	__u32 flags;
> > +	__u64 guest_phys_addr;
> > +	__u64 memory_size;
> > +	__u64 userspace_addr;
> > +	__u64 restricted_offset;
> > +	__u32 restricted_fd;
> > +	__u32 pad1;
> > +	__u64 pad2[14];
> > +};
> > +#endif
> 
> I'm not sure I buy the argument this makes the code maintenance easier
> because you now have multiple places to update if you extend the field.
> Was this simply to avoid changing:
> 
>   foo->slot to foo->region.slot
> 
> in the underlying code?

That is one of the reasons, by doing this we can also avoid confusion to
deal with '_ext' and the 'base' struct for different functions spread
across KVM code. No doubt now I need update every places where the
'base' struct is being used, but that makes future maintenance easier,
e.g. adding another new field or even extend the memslot structure again
would just require changes to the flat struct here and the places where
the new field is actually used.

> 
> > +
> >  /*
> >   * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
> >   * other bits are reserved for kvm internal use which are defined in
> > @@ -110,6 +137,7 @@ struct kvm_userspace_memory_region {
> >   */
> >  #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
> >  #define KVM_MEM_READONLY	(1UL << 1)
> > +#define KVM_MEM_PRIVATE		(1UL << 2)
> >  
> >  /* for KVM_IRQ_LINE */
> >  struct kvm_irq_level {
> > @@ -1178,6 +1206,7 @@ struct kvm_ppc_resize_hpt {
> >  #define KVM_CAP_S390_ZPCI_OP 221
> >  #define KVM_CAP_S390_CPU_TOPOLOGY 222
> >  #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
> > +#define KVM_CAP_PRIVATE_MEM 224
> >  
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >  
> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> > index 800f9470e36b..9ff164c7e0cc 100644
> > --- a/virt/kvm/Kconfig
> > +++ b/virt/kvm/Kconfig
> > @@ -86,3 +86,6 @@ config KVM_XFER_TO_GUEST_WORK
> >  
> >  config HAVE_KVM_PM_NOTIFIER
> >         bool
> > +
> > +config HAVE_KVM_RESTRICTED_MEM
> > +       bool
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index e30f1b4ecfa5..8dace78a0278 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -1526,7 +1526,7 @@ static void kvm_replace_memslot(struct kvm *kvm,
> >  	}
> >  }
> >  
> > -static int check_memory_region_flags(const struct kvm_userspace_memory_region *mem)
> > +static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
> >  {
> >  	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
> >  
> > @@ -1920,7 +1920,7 @@ static bool kvm_check_memslot_overlap(struct kvm_memslots *slots, int id,
> >   * Must be called holding kvm->slots_lock for write.
> >   */
> >  int __kvm_set_memory_region(struct kvm *kvm,
> > -			    const struct kvm_userspace_memory_region *mem)
> > +			    const struct kvm_user_mem_region *mem)
> >  {
> >  	struct kvm_memory_slot *old, *new;
> >  	struct kvm_memslots *slots;
> > @@ -2024,7 +2024,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
> >  EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
> >  
> >  int kvm_set_memory_region(struct kvm *kvm,
> > -			  const struct kvm_userspace_memory_region *mem)
> > +			  const struct kvm_user_mem_region *mem)
> >  {
> >  	int r;
> >  
> > @@ -2036,7 +2036,7 @@ int kvm_set_memory_region(struct kvm *kvm,
> >  EXPORT_SYMBOL_GPL(kvm_set_memory_region);
> >  
> >  static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
> > -					  struct kvm_userspace_memory_region *mem)
> > +					  struct kvm_user_mem_region *mem)
> >  {
> >  	if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
> >  		return -EINVAL;
> > @@ -4627,6 +4627,33 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
> >  	return fd;
> >  }
> >  
> > +#define SANITY_CHECK_MEM_REGION_FIELD(field)					\
> > +do {										\
> > +	BUILD_BUG_ON(offsetof(struct kvm_user_mem_region, field) !=		\
> > +		     offsetof(struct kvm_userspace_memory_region, field));	\
> > +	BUILD_BUG_ON(sizeof_field(struct kvm_user_mem_region, field) !=		\
> > +		     sizeof_field(struct kvm_userspace_memory_region, field));	\
> > +} while (0)
> > +
> > +#define SANITY_CHECK_MEM_REGION_EXT_FIELD(field)					\
> > +do {											\
> > +	BUILD_BUG_ON(offsetof(struct kvm_user_mem_region, field) !=			\
> > +		     offsetof(struct kvm_userspace_memory_region_ext, field));		\
> > +	BUILD_BUG_ON(sizeof_field(struct kvm_user_mem_region, field) !=			\
> > +		     sizeof_field(struct kvm_userspace_memory_region_ext, field));	\
> > +} while (0)
> > +
> > +static void kvm_sanity_check_user_mem_region_alias(void)
> > +{
> > +	SANITY_CHECK_MEM_REGION_FIELD(slot);
> > +	SANITY_CHECK_MEM_REGION_FIELD(flags);
> > +	SANITY_CHECK_MEM_REGION_FIELD(guest_phys_addr);
> > +	SANITY_CHECK_MEM_REGION_FIELD(memory_size);
> > +	SANITY_CHECK_MEM_REGION_FIELD(userspace_addr);
> > +	SANITY_CHECK_MEM_REGION_EXT_FIELD(restricted_offset);
> > +	SANITY_CHECK_MEM_REGION_EXT_FIELD(restricted_fd);
> > +}
> 
> Do we have other examples in the kernel that jump these hoops?

grep -rn 'BUILD_BUG_ON(offsetof' can give you some hint on other usages
in the kernel. But for a quick check you can look:
  siginfo_buildtime_checks()

> 
> >  static long kvm_vm_ioctl(struct file *filp,
> >  			   unsigned int ioctl, unsigned long arg)
> >  {
> > @@ -4650,14 +4677,20 @@ static long kvm_vm_ioctl(struct file *filp,
> >  		break;
> >  	}
> >  	case KVM_SET_USER_MEMORY_REGION: {
> > -		struct kvm_userspace_memory_region kvm_userspace_mem;
> > +		struct kvm_user_mem_region mem;
> > +		unsigned long size = sizeof(struct kvm_userspace_memory_region);
> > +
> > +		kvm_sanity_check_user_mem_region_alias();
> >  
> >  		r = -EFAULT;
> > -		if (copy_from_user(&kvm_userspace_mem, argp,
> > -						sizeof(kvm_userspace_mem)))
> > +		if (copy_from_user(&mem, argp, size))
> > +			goto out;
> > +
> > +		r = -EINVAL;
> > +		if (mem.flags & KVM_MEM_PRIVATE)
> >  			goto out;
> 
> Hmm I can see in the later code you explicitly check for the
> KVM_MEM_PRIVATE flag with:
> 
> 		if (get_user(flags, (u32 __user *)(argp + flags_offset)))
> 			goto out;
> 
> 		if (flags & KVM_MEM_PRIVATE)
> 			size = sizeof(struct kvm_userspace_memory_region_ext);
> 		else
> 			size = sizeof(struct kvm_userspace_memory_region);
> 
> I think it would make sense to bring that sanity checking forward into
> this patch to avoid the validation logic working in two different ways
> over the series.

That is my original code actually, then Sean suggested to change to
current code[*], the reason is these two pathes are for different
purpose, this patch introduces the data structures but the later patch
actually makes use of the '_ext' variant.

[*] https://lkml.kernel.org/kvm/YuQ6QWcdZLdStkWl@google.com/

Chao
> 
> >  
> > -		r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
> > +		r = kvm_vm_ioctl_set_memory_region(kvm, &mem);
> >  		break;
> >  	}
> >  	case KVM_GET_DIRTY_LOG: {
> 
> 
> -- 
> Alex Bennée


  reply	other threads:[~2022-11-15  9:33 UTC|newest]

Thread overview: 101+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-25 15:13 [PATCH v9 0/8] KVM: mm: fd-based approach for supporting KVM Chao Peng
2022-10-25 15:13 ` [PATCH v9 1/8] mm: Introduce memfd_restricted system call to create restricted user memory Chao Peng
2022-10-26 17:31   ` Isaku Yamahata
2022-10-28  6:12     ` Chao Peng
2022-10-27 10:20   ` Fuad Tabba
2022-10-31 17:47   ` Michael Roth
2022-11-01 11:37     ` Chao Peng
2022-11-01 15:19       ` Michael Roth
2022-11-01 19:30         ` Michael Roth
2022-11-02 14:53           ` Chao Peng
2022-11-02 21:19             ` Michael Roth
2022-11-14 14:02         ` Vlastimil Babka
2022-11-14 15:28           ` Kirill A. Shutemov
2022-11-14 22:16             ` Michael Roth
2022-11-15  9:48               ` Chao Peng
2022-11-14 22:16           ` Michael Roth
2022-11-02 21:14     ` Kirill A. Shutemov
2022-11-02 21:26       ` Michael Roth
2022-11-02 22:07       ` Michael Roth
2022-11-03 16:30         ` Kirill A. Shutemov
2022-11-29  0:06   ` Michael Roth
2022-11-29 11:21     ` Kirill A. Shutemov
2022-11-29 11:39       ` David Hildenbrand
2022-11-29 13:59         ` Chao Peng
2022-11-29 13:58       ` Chao Peng
2022-11-29  0:37   ` Michael Roth
2022-11-29 14:06     ` Chao Peng
2022-11-29 19:06       ` Michael Roth
2022-11-29 19:18         ` Michael Roth
2022-11-30  9:39           ` Chao Peng
2022-11-30 14:31             ` Michael Roth
2022-11-29 18:01     ` Vishal Annapurve
2022-12-02  2:16   ` Vishal Annapurve
2022-12-02  6:49     ` Chao Peng
2022-12-02 13:44       ` Kirill A . Shutemov
2022-10-25 15:13 ` [PATCH v9 2/8] KVM: Extend the memslot to support fd-based private memory Chao Peng
2022-10-27 10:25   ` Fuad Tabba
2022-10-28  7:04   ` Xiaoyao Li
2022-10-31 14:14     ` Chao Peng
2022-11-14 16:04   ` Alex Bennée
2022-11-15  9:29     ` Chao Peng [this message]
2022-10-25 15:13 ` [PATCH v9 3/8] KVM: Add KVM_EXIT_MEMORY_FAULT exit Chao Peng
2022-10-25 15:26   ` Peter Maydell
2022-10-25 16:17     ` Sean Christopherson
2022-10-27 10:27   ` Fuad Tabba
2022-10-28  6:14     ` Chao Peng
2022-11-15 16:56   ` Alex Bennée
2022-11-16  3:14     ` Chao Peng
2022-11-16 19:03       ` Alex Bennée
2022-11-17 13:45         ` Chao Peng
2022-11-17 15:08           ` Alex Bennée
2022-11-18  1:32             ` Chao Peng
2022-11-18 13:23               ` Alex Bennée
2022-11-18 15:59                 ` Sean Christopherson
2022-11-22  9:50                   ` Chao Peng
2022-11-23 18:02                     ` Sean Christopherson
2022-11-16 18:15   ` Andy Lutomirski
2022-11-16 18:48     ` Sean Christopherson
2022-11-17 13:42       ` Chao Peng
2022-10-25 15:13 ` [PATCH v9 4/8] KVM: Use gfn instead of hva for mmu_notifier_retry Chao Peng
2022-10-27 10:29   ` Fuad Tabba
2022-11-04  2:28     ` Chao Peng
2022-11-04 22:29       ` Sean Christopherson
2022-11-08  7:16         ` Chao Peng
2022-11-10 17:53           ` Sean Christopherson
2022-11-10 20:06   ` Sean Christopherson
2022-11-11  8:27     ` Chao Peng
2022-10-25 15:13 ` [PATCH v9 5/8] KVM: Register/unregister the guest private memory regions Chao Peng
2022-10-27 10:31   ` Fuad Tabba
2022-11-03 23:04   ` Sean Christopherson
2022-11-04  8:28     ` Chao Peng
2022-11-04 21:19       ` Sean Christopherson
2022-11-08  8:24         ` Chao Peng
2022-11-08  1:35   ` Yuan Yao
2022-11-08  9:41     ` Chao Peng
2022-11-09  5:52       ` Yuan Yao
2022-11-16 22:24   ` Sean Christopherson
2022-11-17 13:20     ` Chao Peng
2022-10-25 15:13 ` [PATCH v9 6/8] KVM: Update lpage info when private/shared memory are mixed Chao Peng
2022-10-26 20:46   ` Isaku Yamahata
2022-10-28  6:38     ` Chao Peng
2022-11-08 12:08   ` Yuan Yao
2022-11-09  4:13     ` Chao Peng
2022-10-25 15:13 ` [PATCH v9 7/8] KVM: Handle page fault for private memory Chao Peng
2022-10-26 21:54   ` Isaku Yamahata
2022-10-28  6:55     ` Chao Peng
2022-11-01  0:02       ` Isaku Yamahata
2022-11-01 11:38         ` Chao Peng
2022-11-16 20:50   ` Ackerley Tng
2022-11-16 22:13     ` Sean Christopherson
2022-11-17 13:25       ` Chao Peng
2022-10-25 15:13 ` [PATCH v9 8/8] KVM: Enable and expose KVM_MEM_PRIVATE Chao Peng
2022-10-27 10:31   ` Fuad Tabba
2022-11-03 12:13 ` [PATCH v9 0/8] KVM: mm: fd-based approach for supporting KVM Vishal Annapurve
2022-11-08  0:41   ` Isaku Yamahata
2022-11-09 15:54     ` Kirill A. Shutemov
2022-11-15 14:36       ` Kirill A. Shutemov
2022-11-14 11:43 ` Alex Bennée
2022-11-16  5:00   ` Chao Peng
2022-11-16  9:40     ` Alex Bennée
2022-11-17 14:16       ` Chao Peng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221115092906.GA338422@chaop.bj.intel.com \
    --to=chao.p.peng@linux.intel.com \
    --cc=aarcange@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.bennee@linaro.org \
    --cc=bfields@fieldses.org \
    --cc=bp@alien8.de \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@intel.com \
    --cc=david@redhat.com \
    --cc=ddutile@redhat.com \
    --cc=dhildenb@redhat.com \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jlayton@kernel.org \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=jun.nakajima@intel.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mail@maciej.szmigiero.name \
    --cc=mhocko@suse.com \
    --cc=michael.roth@amd.com \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qperret@google.com \
    --cc=rppt@kernel.org \
    --cc=seanjc@google.com \
    --cc=shuah@kernel.org \
    --cc=songmuchun@bytedance.com \
    --cc=steven.price@arm.com \
    --cc=tabba@google.com \
    --cc=tglx@linutronix.de \
    --cc=vannapurve@google.com \
    --cc=vbabka@suse.cz \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=wei.w.wang@intel.com \
    --cc=x86@kernel.org \
    --cc=yu.c.zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).