RE: [PATCH V2 10/13] x86/hyper-v: Add HvFlushGuestAddressList hypercall support

From: "Michael Kelley (EOSG)" <Michael.H.Kelley@microsoft.com>
To: Tianyu Lan <Tianyu.Lan@microsoft.com>
Cc: KY Srinivasan <kys@microsoft.com>,
	Haiyang Zhang <haiyangz@microsoft.com>,
	Stephen Hemminger <sthemmin@microsoft.com>,
	"tglx@linutronix.de" <tglx@linutronix.de>,
	"mingo@redhat.com" <mingo@redhat.com>,
	"hpa@zytor.com" <hpa@zytor.com>,
	"x86@kernel.org" <x86@kernel.org>,
	"pbonzini@redhat.com" <pbonzini@redhat.com>,
	"rkrcmar@redhat.com" <rkrcmar@redhat.com>,
	"devel@linuxdriverproject.org" <devel@linuxdriverproject.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>,
	vkuznets <vkuznets@redhat.com>,
	Jork Loeser <Jork.Loeser@microsoft.com>
Subject: RE: [PATCH V2 10/13] x86/hyper-v: Add HvFlushGuestAddressList hypercall support
Date: Wed, 19 Sep 2018 17:00:01 +0000	[thread overview]
Message-ID: <CY4PR21MB07737E82C42696BA39A5DA31DC1C0@CY4PR21MB0773.namprd21.prod.outlook.com> (raw)
In-Reply-To: <20180918031754.113013-11-Tianyu.Lan@microsoft.com>

From: Tianyu Lan  Sent: Monday, September 17, 2018 8:19 PM
>
>  #include <linux/types.h>
>  #include <asm/hyperv-tlfs.h>
>  #include <asm/mshyperv.h>
>  #include <asm/tlbflush.h>
> +#include <asm/kvm_host.h>

Hopefully asm/kvm_host.h does not need to be #included, given
the new code structure.

> 
>  #include <asm/trace/hyperv.h>
> 
> +/*
> + *  MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
> + *  by the bitwidth of "additional_pages" in union hv_gpa_page_range.
> + */
> +#define MAX_FLUSH_PAGES (2048)
> +
> +/*
> + * All input flush parameters are in single page. The max flush count
> + * is equal with how many entries of union hv_gpa_page_range can be
> + * populated in the input parameter page. MAX_FLUSH_REP_COUNT
> + * = (4096 - 16) / 8. (“Page Size” - "Address Space" - "Flags") /
> + * "GPA Range".
> + */
> +#define MAX_FLUSH_REP_COUNT (510)
> +

I would recommend putting the above two definitions in
hyperv-tlfs.h.  They are directly tied to the data structures defined
by Hyper-V in the TLFS.  Put MAX_FLUSH_PAGES immediately after
the definition for hv_gpa_page_range so that the dependency is
obvious.

For MAX_FLUSH_REP_COUNT, can you do the calculation in
the #define rather than just in the comment?  Alternatively, define
the gpa_list[] array to be of MAX_FLUSH_REP_COUNT size, and then
add a compile time assert that the size of struct
hv_guest_mapping_flush_list is exactly one page in size.   It's just
a good way to use the compiler to help check for mistakes.

Also prefix them both with HV_ since they will be more
globally visible as part of hyperv-tlfs.h.

>  int hyperv_flush_guest_mapping(u64 as)
>  {
>  	struct hv_guest_mapping_flush **flush_pcpu;
> @@ -54,3 +71,89 @@ int hyperv_flush_guest_mapping(u64 as)
>  	return ret;
>  }
>  EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
> +
> +static int fill_flush_list(union hv_gpa_page_range gpa_list[],
> +		int offset, u64 start_gfn, u64 pages)
> +{
> +	int gpa_n = offset;
> +	u64 cur = start_gfn;
> +	u64 additional_pages;
> +
> +	do {
> +		if (gpa_n >= MAX_FLUSH_REP_COUNT) {
> +			pr_warn("Request exceeds HvFlushGuestList max flush count.");
> +			return -ENOSPC;

I wonder if the warning is really needed.  When the error is returned up
through the higher levels of code, won't the higher levels just fallback to
the non-enlightened flush code?  So nothing is actually goes wrong; it's just
that a slower code path gets taken.  A comment about such expectations
might be helpful.

> +		}
> +
> +		if (pages > MAX_FLUSH_PAGES) {
> +			additional_pages = MAX_FLUSH_PAGES - 1;
> +			pages -= MAX_FLUSH_PAGES;
> +		} else {
> +			additional_pages = pages - 1;
> +			pages = 0;
> +		}

The above code is really doing:

		additional_pages = min(pages, MAX_FLUSH_PAGES) - 1;
		pages -= additional_pages + 1;

And you might want to move the decrement of 'pages' down to the
bottom of the loop where you update the other loop variables.

> +
> +		gpa_list[gpa_n].page.additional_pages = additional_pages;
> +		gpa_list[gpa_n].page.largepage = false;
> +		gpa_list[gpa_n].page.basepfn = cur;
> +
> +		cur += additional_pages + 1;
> +		gpa_n++;
> +	} while (pages > 0);
> +
> +	return gpa_n;
> +}
> +
> +int hyperv_flush_guest_mapping_range(u64 as, struct hyperv_tlb_range *range)
> +{
> +	struct hv_guest_mapping_flush_list **flush_pcpu;
> +	struct hv_guest_mapping_flush_list *flush;
> +	u64 status = 0;
> +	unsigned long flags;
> +	int ret = -ENOTSUPP;
> +	int gpa_n = 0;
> +
> +	if (!hv_hypercall_pg)
> +		goto fault;
> +
> +	local_irq_save(flags);
> +
> +	flush_pcpu = (struct hv_guest_mapping_flush_list **)
> +		this_cpu_ptr(hyperv_pcpu_input_arg);
> +
> +	flush = *flush_pcpu;
> +	if (unlikely(!flush)) {
> +		local_irq_restore(flags);
> +		goto fault;
> +	}
> +
> +	flush->address_space = as;
> +	flush->flags = 0;
> +
> +	if (!range->flush_list)
> +		gpa_n = fill_flush_list(flush->gpa_list, gpa_n,
> +				range->start_gfn, range->pages);
> +	else if (range->parse_flush_list_func)
> +		gpa_n = range->parse_flush_list_func(flush->gpa_list, gpa_n,
> +				range->flush_list, fill_flush_list);
> +	else
> +		gpa_n = -1;
> +
> +	if (gpa_n < 0) {
> +		local_irq_restore(flags);
> +		goto fault;
> +	}
> +
> +	status = hv_do_rep_hypercall(HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST,
> +				     gpa_n, 0, flush, NULL);
> +
> +	local_irq_restore(flags);
> +
> +	if (!(status & HV_HYPERCALL_RESULT_MASK))
> +		ret = 0;
> +	else
> +		ret = status;
> +fault:
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping_range);
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index e977b6b3a538..512f22b49999 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -353,6 +353,7 @@ struct hv_tsc_emulation_status {
>  #define HVCALL_POST_MESSAGE			0x005c
>  #define HVCALL_SIGNAL_EVENT			0x005d
>  #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
> +#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
> 
>  #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
>  #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
> @@ -750,6 +751,22 @@ struct hv_guest_mapping_flush {
>  	u64 flags;
>  };
> 
> +/* HvFlushGuestPhysicalAddressList hypercall */
> +union hv_gpa_page_range {
> +	u64 address_space;
> +	struct {
> +		u64 additional_pages:11;
> +		u64 largepage:1;
> +		u64 basepfn:52;
> +	} page;
> +};
> +
> +struct hv_guest_mapping_flush_list {
> +	u64 address_space;
> +	u64 flags;
> +	union hv_gpa_page_range gpa_list[];
> +};
> +
>  /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
>  struct hv_tlb_flush {
>  	u64 address_space;
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index f37704497d8f..19f49fbcf94d 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -22,6 +22,16 @@ struct ms_hyperv_info {
> 
>  extern struct ms_hyperv_info ms_hyperv;
> 
> +struct hyperv_tlb_range {
> +	u64 start_gfn;
> +	u64 pages;
> +	struct list_head *flush_list;
> +	int (*parse_flush_list_func)(union hv_gpa_page_range gpa_list[],
> +		int offset, struct list_head *flush_list,
> +		int (*fill_flush_list)(union hv_gpa_page_range gpa_list[],
> +		int offset, u64 start_gfn, u64 end_gfn));
> +};
> +
>  /*
>   * Generate the guest ID.
>   */
> @@ -348,6 +358,7 @@ void set_hv_tscchange_cb(void (*cb)(void));
>  void clear_hv_tscchange_cb(void);
>  void hyperv_stop_tsc_emulation(void);
>  int hyperv_flush_guest_mapping(u64 as);
> +int hyperv_flush_guest_mapping_range(u64 as, struct hyperv_tlb_range *range);
> 
>  #ifdef CONFIG_X86_64
>  void hv_apic_init(void);
> @@ -368,6 +379,11 @@ static inline struct hv_vp_assist_page
> *hv_get_vp_assist_page(unsigned int cpu)
>  	return NULL;
>  }
>  static inline int hyperv_flush_guest_mapping(u64 as) { return -1; }
> +static inline int hyperv_flush_guest_mapping_range(u64 as,
> +		struct hyperv_tlb_range *range)
> +{
> +	return -1;
> +}
>  #endif /* CONFIG_HYPERV */
> 
>  #ifdef CONFIG_HYPERV_TSCPAGE
> --
> 2.14.4