linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Boqun Feng <boqun.feng@gmail.com>
To: Michael Kelley <mikelley@microsoft.com>
Cc: hpa@zytor.com, kys@microsoft.com, haiyangz@microsoft.com,
	sthemmin@microsoft.com, wei.liu@kernel.org, decui@microsoft.com,
	luto@kernel.org, peterz@infradead.org, davem@davemloft.net,
	edumazet@google.com, kuba@kernel.org, pabeni@redhat.com,
	lpieralisi@kernel.org, robh@kernel.org, kw@linux.com,
	bhelgaas@google.com, arnd@arndb.de, hch@infradead.org,
	m.szyprowski@samsung.com, robin.murphy@arm.com,
	thomas.lendacky@amd.com, brijesh.singh@amd.com,
	tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, Tianyu.Lan@microsoft.com,
	kirill.shutemov@linux.intel.com,
	sathyanarayanan.kuppuswamy@linux.intel.com, ak@linux.intel.com,
	isaku.yamahata@intel.com, dan.j.williams@intel.com,
	jane.chu@oracle.com, seanjc@google.com, tony.luck@intel.com,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	linux-hyperv@vger.kernel.org, netdev@vger.kernel.org,
	linux-pci@vger.kernel.org, linux-arch@vger.kernel.org,
	iommu@lists.linux.dev
Subject: Re: [PATCH 12/12] PCI: hv: Enable PCI pass-thru devices in Confidential VMs
Date: Mon, 7 Nov 2022 13:41:48 -0800	[thread overview]
Message-ID: <Y2l7nJ3l1jeOebGG@Boquns-Mac-mini.local> (raw)
In-Reply-To: <1666288635-72591-13-git-send-email-mikelley@microsoft.com>

On Thu, Oct 20, 2022 at 10:57:15AM -0700, Michael Kelley wrote:
> For PCI pass-thru devices in a Confidential VM, Hyper-V requires
> that PCI config space be accessed via hypercalls.  In normal VMs,
> config space accesses are trapped to the Hyper-V host and emulated.
> But in a confidential VM, the host can't access guest memory to
> decode the instruction for emulation, so an explicit hypercall must
> be used.
> 
> Update the PCI config space access functions to use the hypercalls
> when such use is indicated by Hyper-V flags.  Also, set the flag to
> allow the Hyper-V PCI driver to be loaded and used in a Confidential
> VM (a.k.a., "Isolation VM").  The driver has previously been hardened
> against a malicious Hyper-V host[1].
> 
> [1] https://lore.kernel.org/all/20220511223207.3386-2-parri.andrea@gmail.com/
> 
> Co-developed-by: Dexuan Cui <decui@microsoft.com>
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> Signed-off-by: Michael Kelley <mikelley@microsoft.com>
> ---
>  drivers/hv/channel_mgmt.c           |   2 +-
>  drivers/pci/controller/pci-hyperv.c | 153 +++++++++++++++++++++---------------
>  2 files changed, 92 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
> index 5b12040..c0f9ac2 100644
> --- a/drivers/hv/channel_mgmt.c
> +++ b/drivers/hv/channel_mgmt.c
> @@ -67,7 +67,7 @@
>  	{ .dev_type = HV_PCIE,
>  	  HV_PCIE_GUID,
>  	  .perf_device = false,
> -	  .allowed_in_isolated = false,
> +	  .allowed_in_isolated = true,
>  	},
>  
>  	/* Synthetic Frame Buffer */
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 02ebf3e..9873296 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -514,6 +514,7 @@ struct hv_pcibus_device {
>  
>  	/* Highest slot of child device with resources allocated */
>  	int wslot_res_allocated;
> +	bool use_calls; /* Use hypercalls to access mmio cfg space */
>  
>  	/* hypercall arg, must not cross page boundary */
>  	struct hv_retarget_device_interrupt retarget_msi_interrupt_params;
> @@ -1134,8 +1135,9 @@ static void hv_pci_write_mmio(phys_addr_t gpa, int size, u32 val)
>  static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
>  				     int size, u32 *val)
>  {
> +	struct hv_pcibus_device *hbus = hpdev->hbus;
> +	int offset = where + CFG_PAGE_OFFSET;
>  	unsigned long flags;
> -	void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where;
>  
>  	/*
>  	 * If the attempt is to read the IDs or the ROM BAR, simulate that.
> @@ -1163,56 +1165,74 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
>  		 */
>  		*val = 0;
>  	} else if (where + size <= CFG_PAGE_SIZE) {
> -		spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
> -		/* Choose the function to be read. (See comment above) */
> -		writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
> -		/* Make sure the function was chosen before we start reading. */
> -		mb();
> -		/* Read from that function's config space. */
> -		switch (size) {
> -		case 1:
> -			*val = readb(addr);
> -			break;
> -		case 2:
> -			*val = readw(addr);
> -			break;
> -		default:
> -			*val = readl(addr);
> -			break;
> +
> +		spin_lock_irqsave(&hbus->config_lock, flags);
> +		if (hbus->use_calls) {
> +			phys_addr_t addr = hbus->mem_config->start + offset;
> +
> +			hv_pci_write_mmio(hbus->mem_config->start, 4,
> +						hpdev->desc.win_slot.slot);
> +			hv_pci_read_mmio(addr, size, val);
> +		} else {
> +			void __iomem *addr = hbus->cfg_addr + offset;
> +
> +			/* Choose the function to be read. (See comment above) */
> +			writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
> +			/* Make sure the function was chosen before reading. */
> +			mb();
> +			/* Read from that function's config space. */
> +			switch (size) {
> +			case 1:
> +				*val = readb(addr);
> +				break;
> +			case 2:
> +				*val = readw(addr);
> +				break;
> +			default:
> +				*val = readl(addr);
> +				break;
> +			}

An mb() is missing here?

>  		}
> -		/*
> -		 * Make sure the read was done before we release the spinlock
> -		 * allowing consecutive reads/writes.
> -		 */
> -		mb();
> -		spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
> +		spin_unlock_irqrestore(&hbus->config_lock, flags);
>  	} else {
> -		dev_err(&hpdev->hbus->hdev->device,
> +		dev_err(&hbus->hdev->device,
>  			"Attempt to read beyond a function's config space.\n");
>  	}
>  }
>  
>  static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
>  {
> +	struct hv_pcibus_device *hbus = hpdev->hbus;
> +	u32 val;
>  	u16 ret;
>  	unsigned long flags;
> -	void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET +
> -			     PCI_VENDOR_ID;
>  
> -	spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
> +	spin_lock_irqsave(&hbus->config_lock, flags);
>  
> -	/* Choose the function to be read. (See comment above) */
> -	writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
> -	/* Make sure the function was chosen before we start reading. */
> -	mb();
> -	/* Read from that function's config space. */
> -	ret = readw(addr);
> -	/*
> -	 * mb() is not required here, because the spin_unlock_irqrestore()
> -	 * is a barrier.
> -	 */
> +	if (hbus->use_calls) {
> +		phys_addr_t addr = hbus->mem_config->start +
> +					 CFG_PAGE_OFFSET + PCI_VENDOR_ID;
> +
> +		hv_pci_write_mmio(hbus->mem_config->start, 4,
> +					hpdev->desc.win_slot.slot);
> +		hv_pci_read_mmio(addr, 2, &val);
> +		ret = val;  /* Truncates to 16 bits */
> +	} else {
> +		void __iomem *addr = hbus->cfg_addr + CFG_PAGE_OFFSET +
> +					     PCI_VENDOR_ID;
> +		/* Choose the function to be read. (See comment above) */
> +		writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
> +		/* Make sure the function was chosen before we start reading. */
> +		mb();
> +		/* Read from that function's config space. */
> +		ret = readw(addr);
> +		/*
> +		 * mb() is not required here, because the
> +		 * spin_unlock_irqrestore() is a barrier.
> +		 */
> +	}
>  
> -	spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
> +	spin_unlock_irqrestore(&hbus->config_lock, flags);
>  
>  	return ret;
>  }
> @@ -1227,38 +1247,45 @@ static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
>  static void _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where,
>  				      int size, u32 val)
>  {
> +	struct hv_pcibus_device *hbus = hpdev->hbus;
> +	int offset = where + CFG_PAGE_OFFSET;
>  	unsigned long flags;
> -	void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where;
>  
>  	if (where >= PCI_SUBSYSTEM_VENDOR_ID &&
>  	    where + size <= PCI_CAPABILITY_LIST) {
>  		/* SSIDs and ROM BARs are read-only */
>  	} else if (where >= PCI_COMMAND && where + size <= CFG_PAGE_SIZE) {
> -		spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
> -		/* Choose the function to be written. (See comment above) */
> -		writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
> -		/* Make sure the function was chosen before we start writing. */
> -		wmb();
> -		/* Write to that function's config space. */
> -		switch (size) {
> -		case 1:
> -			writeb(val, addr);
> -			break;
> -		case 2:
> -			writew(val, addr);
> -			break;
> -		default:
> -			writel(val, addr);
> -			break;
> +		spin_lock_irqsave(&hbus->config_lock, flags);
> +
> +		if (hbus->use_calls) {
> +			phys_addr_t addr = hbus->mem_config->start + offset;
> +
> +			hv_pci_write_mmio(hbus->mem_config->start, 4,
> +						hpdev->desc.win_slot.slot);
> +			hv_pci_write_mmio(addr, size, val);
> +		} else {
> +			void __iomem *addr = hbus->cfg_addr + offset;
> +
> +			/* Choose the function to write. (See comment above) */
> +			writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
> +			/* Make sure the function was chosen before writing. */
> +			wmb();
> +			/* Write to that function's config space. */
> +			switch (size) {
> +			case 1:
> +				writeb(val, addr);
> +				break;
> +			case 2:
> +				writew(val, addr);
> +				break;
> +			default:
> +				writel(val, addr);
> +				break;
> +			}

Ditto, an mb() is needed here to align with the old code.

With these fixed, feel free to add

Reviewed-by: Boqun Feng <boqun.feng@gmail.com>

Regards,
BOqun

>  		}
> -		/*
> -		 * Make sure the write was done before we release the spinlock
> -		 * allowing consecutive reads/writes.
> -		 */
> -		mb();
> -		spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
> +		spin_unlock_irqrestore(&hbus->config_lock, flags);
>  	} else {
> -		dev_err(&hpdev->hbus->hdev->device,
> +		dev_err(&hbus->hdev->device,
>  			"Attempt to write beyond a function's config space.\n");
>  	}
>  }
> @@ -3568,6 +3595,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	hbus->bridge->domain_nr = dom;
>  #ifdef CONFIG_X86
>  	hbus->sysdata.domain = dom;
> +	hbus->use_calls = !!(ms_hyperv.hints & HV_X64_USE_MMIO_HYPERCALLS);
>  #elif defined(CONFIG_ARM64)
>  	/*
>  	 * Set the PCI bus parent to be the corresponding VMbus
> @@ -3577,6 +3605,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	 * information to devices created on the bus.
>  	 */
>  	hbus->sysdata.parent = hdev->device.parent;
> +	hbus->use_calls = false;
>  #endif
>  
>  	hbus->hdev = hdev;
> -- 
> 1.8.3.1
> 
> 

      reply	other threads:[~2022-11-07 21:41 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-20 17:57 [PATCH 00/12] Add PCI pass-thru support to Hyper-V Confidential VMs Michael Kelley
2022-10-20 17:57 ` [PATCH 01/12] x86/ioremap: Fix page aligned size calculation in __ioremap_caller() Michael Kelley
2022-10-20 17:57 ` [PATCH 02/12] x86/ioapic: Gate decrypted mapping on cc_platform_has() attribute Michael Kelley
2022-11-02 13:03   ` Wei Liu
2022-10-20 17:57 ` [PATCH 03/12] x86/hyperv: Reorder code in prep for subsequent patch Michael Kelley
2022-11-02 17:28   ` Tianyu Lan
2022-10-20 17:57 ` [PATCH 04/12] Drivers: hv: Explicitly request decrypted in vmap_pfn() calls Michael Kelley
2022-11-03 14:02   ` Tianyu Lan
2022-10-20 17:57 ` [PATCH 05/12] x86/hyperv: Change vTOM handling to use standard coco mechanisms Michael Kelley
2022-11-03 14:00   ` Tianyu Lan
2022-11-03 14:12     ` Michael Kelley (LINUX)
2022-11-09  1:09   ` Tianyu Lan
2022-10-20 17:57 ` [PATCH 06/12] swiotlb: Remove bounce buffer remapping for Hyper-V Michael Kelley
2022-10-24 15:36   ` Christoph Hellwig
2022-10-20 17:57 ` [PATCH 07/12] Drivers: hv: vmbus: Remove second mapping of VMBus monitor pages Michael Kelley
2022-11-03 14:27   ` Tianyu Lan
2022-10-20 17:57 ` [PATCH 08/12] Drivers: hv: vmbus: Remove second way of mapping ring buffers Michael Kelley
2022-11-03 15:07   ` Tianyu Lan
2022-10-20 17:57 ` [PATCH 09/12] hv_netvsc: Remove second mapping of send and recv buffers Michael Kelley
2022-11-03 15:09   ` Tianyu Lan
2022-11-08 10:36   ` Tianyu Lan
2022-10-20 17:57 ` [PATCH 10/12] Drivers: hv: Don't remap addresses that are above shared_gpa_boundary Michael Kelley
2022-11-03 15:25   ` Tianyu Lan
2022-10-20 17:57 ` [PATCH 11/12] PCI: hv: Add hypercalls to read/write MMIO space Michael Kelley
2022-10-20 19:04   ` Bjorn Helgaas
2022-10-21 14:04     ` Michael Kelley (LINUX)
2022-10-20 17:57 ` [PATCH 12/12] PCI: hv: Enable PCI pass-thru devices in Confidential VMs Michael Kelley
2022-11-07 21:41   ` Boqun Feng [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y2l7nJ3l1jeOebGG@Boquns-Mac-mini.local \
    --to=boqun.feng@gmail.com \
    --cc=Tianyu.Lan@microsoft.com \
    --cc=ak@linux.intel.com \
    --cc=arnd@arndb.de \
    --cc=bhelgaas@google.com \
    --cc=bp@alien8.de \
    --cc=brijesh.singh@amd.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=davem@davemloft.net \
    --cc=decui@microsoft.com \
    --cc=edumazet@google.com \
    --cc=haiyangz@microsoft.com \
    --cc=hch@infradead.org \
    --cc=hpa@zytor.com \
    --cc=iommu@lists.linux.dev \
    --cc=isaku.yamahata@intel.com \
    --cc=jane.chu@oracle.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kuba@kernel.org \
    --cc=kw@linux.com \
    --cc=kys@microsoft.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=lpieralisi@kernel.org \
    --cc=luto@kernel.org \
    --cc=m.szyprowski@samsung.com \
    --cc=mikelley@microsoft.com \
    --cc=mingo@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=peterz@infradead.org \
    --cc=robh@kernel.org \
    --cc=robin.murphy@arm.com \
    --cc=sathyanarayanan.kuppuswamy@linux.intel.com \
    --cc=seanjc@google.com \
    --cc=sthemmin@microsoft.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=tony.luck@intel.com \
    --cc=wei.liu@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).