linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Michael Kelley <mikelley@microsoft.com>
To: Nuno Das Neves <nunodasneves@linux.microsoft.com>,
	"linux-hyperv@vger.kernel.org" <linux-hyperv@vger.kernel.org>
Cc: "virtualization@lists.linux-foundation.org" 
	<virtualization@lists.linux-foundation.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"viremana@linux.microsoft.com" <viremana@linux.microsoft.com>,
	Sunil Muthuswamy <sunilmut@microsoft.com>,
	"wei.liu@kernel.org" <wei.liu@kernel.org>,
	Lillian Grassin-Drake <Lillian.GrassinDrake@microsoft.com>,
	KY Srinivasan <kys@microsoft.com>
Subject: RE: [RFC PATCH 06/18] virt/mshv: create, initialize, finalize, delete partition hypercalls
Date: Mon, 8 Feb 2021 19:42:14 +0000	[thread overview]
Message-ID: <MWHPR21MB1593518130E3E0532894C516D78F9@MWHPR21MB1593.namprd21.prod.outlook.com> (raw)
In-Reply-To: <1605918637-12192-7-git-send-email-nunodasneves@linux.microsoft.com>

From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Friday, November 20, 2020 4:30 PM
> 
> Add hypercalls for fully setting up and mostly tearing down a guest
> partition.
> The teardown operation will generate an error as the deposited
> memory has not been withdrawn.
> This is fixed in the next patch.
> 
> Co-developed-by: Lillian Grassin-Drake <ligrassi@microsoft.com>
> Signed-off-by: Lillian Grassin-Drake <ligrassi@microsoft.com>
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
>  include/asm-generic/hyperv-tlfs.h      |  52 +++++++-
>  include/uapi/asm-generic/hyperv-tlfs.h |   1 +
>  include/uapi/linux/mshv.h              |   1 +
>  virt/mshv/mshv_main.c                  | 169 ++++++++++++++++++++++++-
>  4 files changed, 220 insertions(+), 3 deletions(-)
> 
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index 2ff580780ce4..ab6ae6c164f5 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -142,6 +142,10 @@ struct ms_hyperv_tsc_page {
>  #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX	0x0013
>  #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX	0x0014
>  #define HVCALL_SEND_IPI_EX			0x0015
> +#define HVCALL_CREATE_PARTITION			0x0040
> +#define HVCALL_INITIALIZE_PARTITION		0x0041
> +#define HVCALL_FINALIZE_PARTITION		0x0042
> +#define HVCALL_DELETE_PARTITION			0x0043
>  #define HVCALL_GET_PARTITION_ID			0x0046
>  #define HVCALL_DEPOSIT_MEMORY			0x0048
>  #define HVCALL_CREATE_VP			0x004e
> @@ -451,7 +455,7 @@ struct hv_get_partition_id {
>  struct hv_deposit_memory {
>  	u64 partition_id;
>  	u64 gpa_page_list[];
> -} __packed;
> +};

Why remove __packed?

> 
>  struct hv_proximity_domain_flags {
>  	u32 proximity_preferred : 1;
> @@ -767,4 +771,50 @@ struct hv_input_unmap_device_interrupt {
>  #define HV_SOURCE_SHADOW_NONE               0x0
>  #define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE   0x1
> 
> +#define HV_MAKE_COMPATIBILITY_VERSION(major_, minor_)                          \
> +	((u32)((major_) << 8 | (minor_)))
> +
> +enum hv_compatibility_version {
> +	HV_COMPATIBILITY_19_H1 = HV_MAKE_COMPATIBILITY_VERSION(0X6, 0X5),
> +	HV_COMPATIBILITY_MANGANESE = HV_MAKE_COMPATIBILITY_VERSION(0X6, 0X7),

Avoid use of "Manganese", which is an internal code name.  I'd suggest calling it
20_H1 instead, which at least has some broader meaning.

> +	HV_COMPATIBILITY_PRERELEASE = HV_MAKE_COMPATIBILITY_VERSION(0XFE, 0X0),
> +	HV_COMPATIBILITY_EXPERIMENT = HV_MAKE_COMPATIBILITY_VERSION(0XFF, 0X0),
> +};
> +
> +union hv_partition_isolation_properties {
> +	u64 as_uint64;
> +	struct {
> +		u64 isolation_type: 5;
> +		u64 rsvd_z: 7;
> +		u64 shared_gpa_boundary_page_number: 52;
> +	};
> +};

Add __packed.

> +
> +/* Non-userspace-visible partition creation flags */
> +#define HV_PARTITION_CREATION_FLAG_EXO_PARTITION                    BIT(8)
> +
> +struct hv_create_partition_in {
> +	u64 flags;
> +	union hv_proximity_domain_info proximity_domain_info;
> +	enum hv_compatibility_version compatibility_version;

An "enum" is a 32 bit value in gcc and I would presume that
Hyper-V is expecting a 64 bit value.  In general, using an enum in a data
structure with exact layout requirements is problematic because the "C"
language doesn't specify how big an enum is.  In such cases, it's better
to use an integer field with an explicit size (like u64) and #defines for
the possible values.

> +	struct hv_partition_creation_properties partition_creation_properties;
> +	union hv_partition_isolation_properties isolation_properties;
> +};
> +
> +struct hv_create_partition_out {
> +	u64 partition_id;
> +};
> +
> +struct hv_initialize_partition {
> +	u64 partition_id;
> +};
> +
> +struct hv_finalize_partition {
> +	u64 partition_id;
> +};
> +
> +struct hv_delete_partition {
> +	u64 partition_id;
> +};

All of the above should have __packed for consistency with the other
Hyper-V data structures.

> +
>  #endif
> diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv-
> tlfs.h
> index 140cc0b4f98f..7a858226a9c5 100644
> --- a/include/uapi/asm-generic/hyperv-tlfs.h
> +++ b/include/uapi/asm-generic/hyperv-tlfs.h
> @@ -6,6 +6,7 @@
>  #define BIT(X)	(1ULL << (X))
>  #endif
> 
> +/* Userspace-visible partition creation flags */

Could this comment be included in the earlier patch with the #defines so
that you avoid the trivial change here?

>  #define HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST                BIT(0)
>  #define HV_PARTITION_CREATION_FLAG_GPA_LARGE_PAGES_DISABLED         BIT(3)
>  #define HV_PARTITION_CREATION_FLAG_GPA_SUPER_PAGES_ENABLED          BIT(4)
> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h
> index 3788f8bc5caa..4f8da9a6fde2 100644
> --- a/include/uapi/linux/mshv.h
> +++ b/include/uapi/linux/mshv.h
> @@ -9,6 +9,7 @@
> 
>  #include <linux/types.h>
>  #include <asm/hyperv-tlfs.h>
> +#include <asm-generic/hyperv-tlfs.h>

Similarly, consider adding this #include in the earlier patch so that
this trivial change isn't needed here.

> 
>  #define MSHV_VERSION	0x0
> 
> diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
> index 4dcbe4907430..c4130a6508e5 100644
> --- a/virt/mshv/mshv_main.c
> +++ b/virt/mshv/mshv_main.c
> @@ -15,6 +15,7 @@
>  #include <linux/file.h>
>  #include <linux/anon_inodes.h>
>  #include <linux/mshv.h>
> +#include <asm/mshyperv.h>
> 
>  MODULE_AUTHOR("Microsoft");
>  MODULE_LICENSE("GPL");
> @@ -31,7 +32,6 @@ static struct mshv mshv = {};
>  static void mshv_partition_put(struct mshv_partition *partition);
>  static int mshv_partition_release(struct inode *inode, struct file *filp);
>  static long mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg);
> -

Spurious whitespace change?

>  static int mshv_dev_open(struct inode *inode, struct file *filp);
>  static int mshv_dev_release(struct inode *inode, struct file *filp);
>  static long mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg);
> @@ -57,6 +57,143 @@ static struct miscdevice mshv_dev = {
>  	.mode = 600,
>  };
> 
> +#define HV_INIT_PARTITION_DEPOSIT_PAGES 208

A comment about how this value is determined would be useful.
I'm assuming it was determined empirically.

> +
> +static int
> +hv_call_create_partition(
> +		u64 flags,
> +		struct hv_partition_creation_properties creation_properties,
> +		u64 *partition_id)
> +{
> +	struct hv_create_partition_in *input;
> +	struct hv_create_partition_out *output;
> +	int status;
> +	int ret;
> +	unsigned long irq_flags;
> +	int i;
> +
> +	do {
> +		local_irq_save(irq_flags);
> +		input = (struct hv_create_partition_in *)(*this_cpu_ptr(
> +			hyperv_pcpu_input_arg));
> +		output = (struct hv_create_partition_out *)(*this_cpu_ptr(
> +			hyperv_pcpu_output_arg));
> +
> +		input->flags = flags;
> +		input->proximity_domain_info.as_uint64 = 0;
> +		input->compatibility_version = HV_COMPATIBILITY_MANGANESE;
> +		for (i = 0; i < HV_PARTITION_PROCESSOR_FEATURE_BANKS; ++i)
> +			input->partition_creation_properties
> +				.disabled_processor_features.as_uint64[i] = 0;
> +		input->partition_creation_properties
> +			.disabled_processor_xsave_features.as_uint64 = 0;
> +		input->isolation_properties.as_uint64 = 0;
> +
> +		status = hv_do_hypercall(HVCALL_CREATE_PARTITION,
> +					 input, output);

hv_do_hypercall returns a u64, which should then be masked with
HV_HYPERCALL_RESULT_MASK before checking the result.

> +		if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
> +			if (status == HV_STATUS_SUCCESS)
> +				*partition_id = output->partition_id;
> +			else
> +				pr_err("%s: %s\n",
> +				       __func__, hv_status_to_string(status));
> +			local_irq_restore(irq_flags);
> +			ret = -hv_status_to_errno(status);
> +			break;
> +		}
> +		local_irq_restore(irq_flags);
> +		ret = hv_call_deposit_pages(NUMA_NO_NODE,
> +					    hv_current_partition_id, 1);
> +	} while (!ret);
> +
> +	return ret;
> +}
> +
> +static int
> +hv_call_initialize_partition(u64 partition_id)
> +{
> +	struct hv_initialize_partition *input;
> +	int status;
> +	int ret;
> +	unsigned long flags;
> +
> +	ret = hv_call_deposit_pages(
> +				NUMA_NO_NODE,
> +				partition_id,
> +				HV_INIT_PARTITION_DEPOSIT_PAGES);
> +	if (ret)
> +		return ret;
> +
> +	do {
> +		local_irq_save(flags);
> +		input = (struct hv_initialize_partition *)(*this_cpu_ptr(
> +			hyperv_pcpu_input_arg));
> +		input->partition_id = partition_id;
> +
> +		status = hv_do_hypercall(
> +				HVCALL_INITIALIZE_PARTITION,
> +				input, NULL);

FWIW, since the input is a single 64 bit value, and there's no output,
this could use hv_do_fast_hypercall8() instead, and avoid
needing to use the input arg page and the irq save/restore.  Would have
to check that the particular hypercall supports the "fast" version.

> +		local_irq_restore(flags);
> +
> +		if (status != HV_STATUS_INSUFFICIENT_MEMORY) {

Same comment about status being u64 and masking.

> +			if (status != HV_STATUS_SUCCESS)
> +				pr_err("%s: %s\n",
> +				       __func__, hv_status_to_string(status));
> +			ret = -hv_status_to_errno(status);
> +			break;
> +		}
> +		ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1);
> +	} while (!ret);
> +
> +	return ret;
> +}
> +
> +static int
> +hv_call_finalize_partition(u64 partition_id)
> +{
> +	struct hv_finalize_partition *input;
> +	int status;
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	input = (struct hv_finalize_partition *)(*this_cpu_ptr(
> +		hyperv_pcpu_input_arg));
> +
> +	input->partition_id = partition_id;
> +	status = hv_do_hypercall(
> +			HVCALL_FINALIZE_PARTITION,
> +			input, NULL);
> +	local_irq_restore(flags);


Same comment about hv_do_fast_hypercall8() and about status
being a u64 and masking.

> +
> +	if (status != HV_STATUS_SUCCESS)
> +		pr_err("%s: %s\n", __func__, hv_status_to_string(status));
> +
> +	return -hv_status_to_errno(status);
> +}
> +
> +static int
> +hv_call_delete_partition(u64 partition_id)
> +{
> +	struct hv_delete_partition *input;
> +	int status;
> +	unsigned long flags;
> +
> +	local_irq_save(flags);
> +	input = (struct hv_delete_partition *)(*this_cpu_ptr(
> +		hyperv_pcpu_input_arg));
> +
> +	input->partition_id = partition_id;
> +	status = hv_do_hypercall(
> +			HVCALL_DELETE_PARTITION,
> +			input, NULL);
> +	local_irq_restore(flags);

Same comments about hv_do_fast_hypercall8(), and
the status and masking.

> +
> +	if (status != HV_STATUS_SUCCESS)
> +		pr_err("%s: %s\n", __func__, hv_status_to_string(status));
> +
> +	return -hv_status_to_errno(status);
> +}
> +
>  static long
>  mshv_partition_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
>  {
> @@ -86,6 +223,17 @@ destroy_partition(struct mshv_partition *partition)
> 
>  	spin_unlock_irqrestore(&mshv.partitions.lock, flags);
> 
> +	/*
> +	 * There are no remaining references to the partition or vps,
> +	 * so the remaining cleanup can be lockless
> +	 */
> +
> +	/* Deallocates and unmaps everything including vcpus, GPA mappings etc */
> +	hv_call_finalize_partition(partition->id);
> +	/* TODO: Withdraw and free all pages we deposited */
> +
> +	hv_call_delete_partition(partition->id);
> +
>  	kfree(partition);
>  }
> 
> @@ -146,6 +294,9 @@ mshv_ioctl_create_partition(void __user *user_arg)
>  	if (copy_from_user(&args, user_arg, sizeof(args)))
>  		return -EFAULT;
> 
> +	/* Only support EXO partitions */
> +	args.flags |= HV_PARTITION_CREATION_FLAG_EXO_PARTITION;
> +
>  	partition = kzalloc(sizeof(*partition), GFP_KERNEL);
>  	if (!partition)
>  		return -ENOMEM;
> @@ -156,11 +307,21 @@ mshv_ioctl_create_partition(void __user *user_arg)
>  		goto free_partition;
>  	}
> 
> +	ret = hv_call_create_partition(args.flags,
> +				       args.partition_creation_properties,
> +				       &partition->id);
> +	if (ret)
> +		goto put_fd;
> +
> +	ret = hv_call_initialize_partition(partition->id);
> +	if (ret)
> +		goto delete_partition;
> +
>  	file = anon_inode_getfile("mshv_partition", &mshv_partition_fops,
>  				  partition, O_RDWR);
>  	if (IS_ERR(file)) {
>  		ret = PTR_ERR(file);
> -		goto put_fd;
> +		goto finalize_partition;
>  	}
>  	refcount_set(&partition->ref_count, 1);
> 
> @@ -174,6 +335,10 @@ mshv_ioctl_create_partition(void __user *user_arg)
> 
>  release_file:
>  	file->f_op->release(file->f_inode, file);
> +finalize_partition:
> +	hv_call_finalize_partition(partition->id);
> +delete_partition:
> +	hv_call_delete_partition(partition->id);
>  put_fd:
>  	put_unused_fd(fd);
>  free_partition:
> --
> 2.25.1


  reply	other threads:[~2021-02-08 20:54 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-21  0:30 [RFC PATCH 00/18] Microsoft Hypervisor root partition ioctl interface Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 01/18] x86/hyperv: convert hyperv statuses to linux error codes Nuno Das Neves
2021-02-09 13:04   ` Vitaly Kuznetsov
2021-03-04 18:24     ` Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 02/18] asm-generic/hyperv: convert hyperv statuses to strings Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 03/18] virt/mshv: minimal mshv module (/dev/mshv/) Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 04/18] virt/mshv: request version ioctl Nuno Das Neves
2021-02-08 19:41   ` Michael Kelley
2021-03-04 21:35     ` Nuno Das Neves
2021-02-09 13:11   ` Vitaly Kuznetsov
2021-03-04 18:43     ` Nuno Das Neves
2021-03-05  9:18       ` Vitaly Kuznetsov
2021-04-07  0:21         ` Nuno Das Neves
2021-04-07  7:38           ` Vitaly Kuznetsov
2021-04-07 13:43             ` Wei Liu
2021-04-07 14:02               ` Vitaly Kuznetsov
2021-04-07 14:19                 ` Wei Liu
2020-11-21  0:30 ` [RFC PATCH 05/18] virt/mshv: create partition ioctl Nuno Das Neves
2021-02-09 13:15   ` Vitaly Kuznetsov
2021-03-04 18:44     ` Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 06/18] virt/mshv: create, initialize, finalize, delete partition hypercalls Nuno Das Neves
2021-02-08 19:42   ` Michael Kelley [this message]
2021-03-04 23:49     ` Nuno Das Neves
2021-03-04 23:58       ` Michael Kelley
2020-11-21  0:30 ` [RFC PATCH 07/18] virt/mshv: withdraw memory hypercall Nuno Das Neves
2021-02-08 19:44   ` Michael Kelley
2021-03-05 21:01     ` Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 08/18] virt/mshv: map and unmap guest memory Nuno Das Neves
2021-02-08 19:45   ` Michael Kelley
2021-03-08 19:14     ` Nuno Das Neves
2021-03-08 19:30       ` Michael Kelley
2020-11-21  0:30 ` [RFC PATCH 09/18] virt/mshv: create vcpu ioctl Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 10/18] virt/mshv: get and set vcpu registers ioctls Nuno Das Neves
2021-02-08 19:47   ` Michael Kelley
2021-03-09  1:39     ` Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 11/18] virt/mshv: set up synic pages for intercept messages Nuno Das Neves
2021-02-08 19:47   ` Michael Kelley
2021-03-11 19:37     ` Nuno Das Neves
2021-03-11 20:45       ` Michael Kelley
2020-11-21  0:30 ` [RFC PATCH 12/18] virt/mshv: run vp ioctl and isr Nuno Das Neves
2020-11-24 16:15   ` Wei Liu
2020-11-21  0:30 ` [RFC PATCH 13/18] virt/mshv: install intercept ioctl Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 14/18] virt/mshv: assert interrupt ioctl Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 15/18] virt/mshv: get and set vp state ioctls Nuno Das Neves
2021-02-08 19:48   ` Michael Kelley
2021-03-11 23:38     ` Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 16/18] virt/mshv: mmap vp register page Nuno Das Neves
2021-02-08 19:49   ` Michael Kelley
2021-03-25 17:36     ` Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 17/18] virt/mshv: get and set partition property ioctls Nuno Das Neves
2020-11-21  0:30 ` [RFC PATCH 18/18] virt/mshv: Add enlightenment bits to create partition ioctl Nuno Das Neves
2020-11-24 16:18 ` [RFC PATCH 00/18] Microsoft Hypervisor root partition ioctl interface Wei Liu
2021-02-08 19:40 ` Michael Kelley

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=MWHPR21MB1593518130E3E0532894C516D78F9@MWHPR21MB1593.namprd21.prod.outlook.com \
    --to=mikelley@microsoft.com \
    --cc=Lillian.GrassinDrake@microsoft.com \
    --cc=kys@microsoft.com \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nunodasneves@linux.microsoft.com \
    --cc=sunilmut@microsoft.com \
    --cc=viremana@linux.microsoft.com \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=wei.liu@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).