netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Michael Kelley <mikelley@microsoft.com>
To: Tianyu Lan <ltykernel@gmail.com>,
	KY Srinivasan <kys@microsoft.com>,
	Haiyang Zhang <haiyangz@microsoft.com>,
	Stephen Hemminger <sthemmin@microsoft.com>,
	"wei.liu@kernel.org" <wei.liu@kernel.org>,
	Dexuan Cui <decui@microsoft.com>,
	"tglx@linutronix.de" <tglx@linutronix.de>,
	"mingo@redhat.com" <mingo@redhat.com>,
	"bp@alien8.de" <bp@alien8.de>, "x86@kernel.org" <x86@kernel.org>,
	"hpa@zytor.com" <hpa@zytor.com>,
	"dave.hansen@linux.intel.com" <dave.hansen@linux.intel.com>,
	"luto@kernel.org" <luto@kernel.org>,
	"peterz@infradead.org" <peterz@infradead.org>,
	"konrad.wilk@oracle.com" <konrad.wilk@oracle.com>,
	"boris.ostrovsky@oracle.com" <boris.ostrovsky@oracle.com>,
	"jgross@suse.com" <jgross@suse.com>,
	"sstabellini@kernel.org" <sstabellini@kernel.org>,
	"joro@8bytes.org" <joro@8bytes.org>,
	"will@kernel.org" <will@kernel.org>,
	"davem@davemloft.net" <davem@davemloft.net>,
	"kuba@kernel.org" <kuba@kernel.org>,
	"jejb@linux.ibm.com" <jejb@linux.ibm.com>,
	"martin.petersen@oracle.com" <martin.petersen@oracle.com>,
	"arnd@arndb.de" <arnd@arndb.de>, "hch@lst.de" <hch@lst.de>,
	"m.szyprowski@samsung.com" <m.szyprowski@samsung.com>,
	"robin.murphy@arm.com" <robin.murphy@arm.com>,
	"thomas.lendacky@amd.com" <thomas.lendacky@amd.com>,
	"brijesh.singh@amd.com" <brijesh.singh@amd.com>,
	"ardb@kernel.org" <ardb@kernel.org>,
	Tianyu Lan <Tianyu.Lan@microsoft.com>,
	"pgonda@google.com" <pgonda@google.com>,
	"martin.b.radev@gmail.com" <martin.b.radev@gmail.com>,
	"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
	"kirill.shutemov@linux.intel.com"
	<kirill.shutemov@linux.intel.com>,
	"rppt@kernel.org" <rppt@kernel.org>,
	"sfr@canb.auug.org.au" <sfr@canb.auug.org.au>,
	"saravanand@fb.com" <saravanand@fb.com>,
	"krish.sadhukhan@oracle.com" <krish.sadhukhan@oracle.com>,
	"aneesh.kumar@linux.ibm.com" <aneesh.kumar@linux.ibm.com>,
	"xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>,
	"rientjes@google.com" <rientjes@google.com>,
	"hannes@cmpxchg.org" <hannes@cmpxchg.org>,
	"tj@kernel.org" <tj@kernel.org>
Cc: "iommu@lists.linux-foundation.org"
	<iommu@lists.linux-foundation.org>,
	"linux-arch@vger.kernel.org" <linux-arch@vger.kernel.org>,
	"linux-hyperv@vger.kernel.org" <linux-hyperv@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-scsi@vger.kernel.org" <linux-scsi@vger.kernel.org>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	vkuznets <vkuznets@redhat.com>,
	"parri.andrea@gmail.com" <parri.andrea@gmail.com>,
	"dave.hansen@intel.com" <dave.hansen@intel.com>
Subject: RE: [PATCH V3 13/13] HV/Storvsc: Add Isolation VM support for storvsc driver
Date: Thu, 19 Aug 2021 18:17:40 +0000	[thread overview]
Message-ID: <MWHPR21MB1593EEF30FFD5C60ED744985D7C09@MWHPR21MB1593.namprd21.prod.outlook.com> (raw)
In-Reply-To: <20210809175620.720923-14-ltykernel@gmail.com>

From: Tianyu Lan <ltykernel@gmail.com> Sent: Monday, August 9, 2021 10:56 AM
> 

Subject line tag should be "scsi: storvsc:"

> In Isolation VM, all shared memory with host needs to mark visible
> to host via hvcall. vmbus_establish_gpadl() has already done it for
> storvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
> mpb_desc() still need to handle. Use DMA API to map/umap these

s/need to handle/needs to be handled/

> memory during sending/receiving packet and Hyper-V DMA ops callback
> will use swiotlb function to allocate bounce buffer and copy data
> from/to bounce buffer.
> 
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> ---
>  drivers/scsi/storvsc_drv.c | 68 +++++++++++++++++++++++++++++++++++---
>  1 file changed, 63 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
> index 328bb961c281..78320719bdd8 100644
> --- a/drivers/scsi/storvsc_drv.c
> +++ b/drivers/scsi/storvsc_drv.c
> @@ -21,6 +21,8 @@
>  #include <linux/device.h>
>  #include <linux/hyperv.h>
>  #include <linux/blkdev.h>
> +#include <linux/io.h>
> +#include <linux/dma-mapping.h>
>  #include <scsi/scsi.h>
>  #include <scsi/scsi_cmnd.h>
>  #include <scsi/scsi_host.h>
> @@ -427,6 +429,8 @@ struct storvsc_cmd_request {
>  	u32 payload_sz;
> 
>  	struct vstor_packet vstor_packet;
> +	u32 hvpg_count;

This count is really the number of entries in the dma_range
array, right?  If so, perhaps "dma_range_count" would be
a better name so that it is more tightly associated.

> +	struct hv_dma_range *dma_range;
>  };
> 
> 
> @@ -509,6 +513,14 @@ struct storvsc_scan_work {
>  	u8 tgt_id;
>  };
> 
> +#define storvsc_dma_map(dev, page, offset, size, dir) \
> +	dma_map_page(dev, page, offset, size, dir)
> +
> +#define storvsc_dma_unmap(dev, dma_range, dir)		\
> +		dma_unmap_page(dev, dma_range.dma,	\
> +			       dma_range.mapping_size,	\
> +			       dir ? DMA_FROM_DEVICE : DMA_TO_DEVICE)
> +

Each of these macros is used only once.  IMHO, they don't
add a lot of value.  Just coding dma_map/unmap_page()
inline would be fine and eliminate these lines of code.

>  static void storvsc_device_scan(struct work_struct *work)
>  {
>  	struct storvsc_scan_work *wrk;
> @@ -1260,6 +1272,7 @@ static void storvsc_on_channel_callback(void *context)
>  	struct hv_device *device;
>  	struct storvsc_device *stor_device;
>  	struct Scsi_Host *shost;
> +	int i;
> 
>  	if (channel->primary_channel != NULL)
>  		device = channel->primary_channel->device_obj;
> @@ -1314,6 +1327,15 @@ static void storvsc_on_channel_callback(void *context)
>  				request = (struct storvsc_cmd_request *)scsi_cmd_priv(scmnd);
>  			}
> 
> +			if (request->dma_range) {
> +				for (i = 0; i < request->hvpg_count; i++)
> +					storvsc_dma_unmap(&device->device,
> +						request->dma_range[i],
> +						request->vstor_packet.vm_srb.data_in == READ_TYPE);

I think you can directly get the DMA direction as request->cmd->sc_data_direction.

> +
> +				kfree(request->dma_range);
> +			}
> +
>  			storvsc_on_receive(stor_device, packet, request);
>  			continue;
>  		}
> @@ -1810,7 +1832,9 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
>  		unsigned int hvpgoff, hvpfns_to_add;
>  		unsigned long offset_in_hvpg = offset_in_hvpage(sgl->offset);
>  		unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length);
> +		dma_addr_t dma;
>  		u64 hvpfn;
> +		u32 size;
> 
>  		if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
> 
> @@ -1824,6 +1848,13 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
>  		payload->range.len = length;
>  		payload->range.offset = offset_in_hvpg;
> 
> +		cmd_request->dma_range = kcalloc(hvpg_count,
> +				 sizeof(*cmd_request->dma_range),
> +				 GFP_ATOMIC);

With this patch, it appears that storvsc_queuecommand() is always
doing bounce buffering, even when running in a non-isolated VM.
The dma_range is always allocated, and the inner loop below does
the dma mapping for every I/O page.  The corresponding code in
storvsc_on_channel_callback() that does the dma unmap allows for
the dma_range to be NULL, but that never happens.

> +		if (!cmd_request->dma_range) {
> +			ret = -ENOMEM;

The other memory allocation failure in this function returns
SCSI_MLQUEUE_DEVICE_BUSY.   It may be debatable as to whether
that's the best approach, but that's a topic for a different patch.  I
would suggest being consistent and using the same return code
here.

> +			goto free_payload;
> +		}
> 
>  		for (i = 0; sgl != NULL; sgl = sg_next(sgl)) {
>  			/*
> @@ -1847,9 +1878,29 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
>  			 * last sgl should be reached at the same time that
>  			 * the PFN array is filled.
>  			 */
> -			while (hvpfns_to_add--)
> -				payload->range.pfn_array[i++] =	hvpfn++;
> +			while (hvpfns_to_add--) {
> +				size = min(HV_HYP_PAGE_SIZE - offset_in_hvpg,
> +					   (unsigned long)length);
> +				dma = storvsc_dma_map(&dev->device, pfn_to_page(hvpfn++),
> +						      offset_in_hvpg, size,
> +						      scmnd->sc_data_direction);
> +				if (dma_mapping_error(&dev->device, dma)) {
> +					ret = -ENOMEM;

The typical error from dma_map_page() will be running out of
bounce buffer memory.   This is a transient condition that should be
retried at the higher levels.  So make sure to return an error code
that indicates the I/O should be resubmitted.

> +					goto free_dma_range;
> +				}
> +
> +				if (offset_in_hvpg) {
> +					payload->range.offset = dma & ~HV_HYP_PAGE_MASK;
> +					offset_in_hvpg = 0;
> +				}

I'm not clear on why payload->range.offset needs to be set again.
Even after the dma mapping is done, doesn't the offset in the first
page have to be the same?  If it wasn't the same, Hyper-V wouldn't
be able to process the PFN list correctly.  In fact, couldn't the above
code just always set offset_in_hvpg = 0?

> +
> +				cmd_request->dma_range[i].dma = dma;
> +				cmd_request->dma_range[i].mapping_size = size;
> +				payload->range.pfn_array[i++] = dma >> HV_HYP_PAGE_SHIFT;
> +				length -= size;
> +			}
>  		}
> +		cmd_request->hvpg_count = hvpg_count;

This line just saves the size of the dma_range array.  Could
it be moved up with the code that allocates the dma_range
array?  To me, it would make more sense to have all that
code together in one place.

>  	}

The whole approach here is to do dma remapping on each individual page
of the I/O buffer.  But wouldn't it be possible to use dma_map_sg() to map
each scatterlist entry as a unit?  Each scatterlist entry describes a range of
physically contiguous memory.  After dma_map_sg(), the resulting dma
address must also refer to a physically contiguous range in the swiotlb
bounce buffer memory.   So at the top of the "for" loop over the scatterlist
entries, do dma_map_sg() if we're in an isolated VM.  Then compute the
hvpfn value based on the dma address instead of sg_page().  But everything
else is the same, and the inner loop for populating the pfn_arry is unmodified.
Furthermore, the dma_range array that you've added is not needed, since
scatterlist entries already have a dma_address field for saving the mapped
address, and dma_unmap_sg() uses that field.

One thing:  There's a maximum swiotlb mapping size, which I think works
out to be 256 Kbytes.  See swiotlb_max_mapping_size().  We need to make
sure that we don't get a scatterlist entry bigger than this size.  But I think
this already happens because you set the device->dma_mask field in
Patch 11 of this series.  __scsi_init_queue checks for this setting and
sets max_sectors to limits transfers to the max mapping size.

> 
>  	cmd_request->payload = payload;
> @@ -1860,13 +1911,20 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd)
>  	put_cpu();
> 
>  	if (ret == -EAGAIN) {
> -		if (payload_sz > sizeof(cmd_request->mpb))
> -			kfree(payload);
>  		/* no more space */
> -		return SCSI_MLQUEUE_DEVICE_BUSY;
> +		ret = SCSI_MLQUEUE_DEVICE_BUSY;
> +		goto free_dma_range;
>  	}
> 
>  	return 0;
> +
> +free_dma_range:
> +	kfree(cmd_request->dma_range);
> +
> +free_payload:
> +	if (payload_sz > sizeof(cmd_request->mpb))
> +		kfree(payload);
> +	return ret;
>  }
> 
>  static struct scsi_host_template scsi_driver = {
> --
> 2.25.1


  reply	other threads:[~2021-08-19 18:17 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-09 17:56 [PATCH V3 00/13] x86/Hyper-V: Add Hyper-V Isolation VM support Tianyu Lan
2021-08-09 17:56 ` [PATCH V3 01/13] x86/HV: Initialize GHCB page in Isolation VM Tianyu Lan
2021-08-10 10:56   ` Wei Liu
2021-08-10 12:17     ` Tianyu Lan
2021-08-12 19:14   ` Michael Kelley
2021-08-13 15:46     ` Tianyu Lan
2021-08-09 17:56 ` [PATCH V3 02/13] x86/HV: Initialize shared memory boundary in the " Tianyu Lan
2021-08-12 19:18   ` Michael Kelley
2021-08-14 13:32     ` Tianyu Lan
2021-08-09 17:56 ` [PATCH V3 03/13] x86/HV: Add new hvcall guest address host visibility support Tianyu Lan
2021-08-09 22:12   ` Dave Hansen
2021-08-10 13:09     ` Tianyu Lan
2021-08-10 11:03   ` Wei Liu
2021-08-10 12:25     ` Tianyu Lan
2021-08-12 19:36   ` Michael Kelley
2021-08-12 21:10   ` Michael Kelley
2021-08-09 17:56 ` [PATCH V3 04/13] HV: Mark vmbus ring buffer visible to host in Isolation VM Tianyu Lan
2021-08-12 22:20   ` Michael Kelley
2021-08-15 15:21     ` Tianyu Lan
2021-08-09 17:56 ` [PATCH V3 05/13] HV: Add Write/Read MSR registers via ghcb page Tianyu Lan
2021-08-13 19:31   ` Michael Kelley
2021-08-13 20:26     ` Michael Kelley
2021-08-24  8:45   ` Christoph Hellwig
2021-08-09 17:56 ` [PATCH V3 06/13] HV: Add ghcb hvcall support for SNP VM Tianyu Lan
2021-08-13 20:42   ` Michael Kelley
2021-08-09 17:56 ` [PATCH V3 07/13] HV/Vmbus: Add SNP support for VMbus channel initiate message Tianyu Lan
2021-08-13 21:28   ` Michael Kelley
2021-08-09 17:56 ` [PATCH V3 08/13] HV/Vmbus: Initialize VMbus ring buffer for Isolation VM Tianyu Lan
2021-08-16 17:28   ` Michael Kelley
2021-08-17 15:36     ` Tianyu Lan
2021-08-09 17:56 ` [PATCH V3 09/13] DMA: Add dma_map_decrypted/dma_unmap_encrypted() function Tianyu Lan
2021-08-12 12:26   ` Christoph Hellwig
2021-08-12 15:38     ` Tianyu Lan
2021-08-09 17:56 ` [PATCH V3 10/13] x86/Swiotlb: Add Swiotlb bounce buffer remap function for HV IVM Tianyu Lan
2021-08-12 12:27   ` Christoph Hellwig
2021-08-13 17:58     ` Tianyu Lan
2021-08-16 14:50       ` Tianyu Lan
2021-08-19  8:49         ` Christoph Hellwig
2021-08-19  9:59           ` Tianyu Lan
2021-08-19 10:02             ` Christoph Hellwig
2021-08-19 10:03               ` Tianyu Lan
2021-08-09 17:56 ` [PATCH V3 11/13] HV/IOMMU: Enable swiotlb bounce buffer for Isolation VM Tianyu Lan
2021-08-19 18:11   ` Michael Kelley
2021-08-20  4:13     ` hch
2021-08-20  9:32     ` Tianyu Lan
2021-08-09 17:56 ` [PATCH V3 12/13] HV/Netvsc: Add Isolation VM support for netvsc driver Tianyu Lan
2021-08-19 18:14   ` Michael Kelley
2021-08-20  4:21     ` hch
2021-08-20 13:11       ` Tianyu Lan
2021-08-20 13:30       ` Tom Lendacky
2021-08-20 18:20     ` Tianyu Lan
2021-08-09 17:56 ` [PATCH V3 13/13] HV/Storvsc: Add Isolation VM support for storvsc driver Tianyu Lan
2021-08-19 18:17   ` Michael Kelley [this message]
2021-08-20  4:32     ` hch
2021-08-20 15:40       ` Michael Kelley
2021-08-24  8:49         ` min_align_mask " hch
2021-08-20 16:01       ` Tianyu Lan
2021-08-20 15:20     ` Tianyu Lan
2021-08-20 15:37       ` Tianyu Lan
2021-08-20 16:08       ` Michael Kelley
2021-08-20 18:04         ` Tianyu Lan
2021-08-20 19:22           ` Michael Kelley
2021-08-24  8:46           ` hch
2021-08-16 14:55 ` [PATCH V3 00/13] x86/Hyper-V: Add Hyper-V Isolation VM support Michael Kelley

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=MWHPR21MB1593EEF30FFD5C60ED744985D7C09@MWHPR21MB1593.namprd21.prod.outlook.com \
    --to=mikelley@microsoft.com \
    --cc=Tianyu.Lan@microsoft.com \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=ardb@kernel.org \
    --cc=arnd@arndb.de \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=brijesh.singh@amd.com \
    --cc=dave.hansen@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=davem@davemloft.net \
    --cc=decui@microsoft.com \
    --cc=haiyangz@microsoft.com \
    --cc=hannes@cmpxchg.org \
    --cc=hch@lst.de \
    --cc=hpa@zytor.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jejb@linux.ibm.com \
    --cc=jgross@suse.com \
    --cc=joro@8bytes.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=konrad.wilk@oracle.com \
    --cc=krish.sadhukhan@oracle.com \
    --cc=kuba@kernel.org \
    --cc=kys@microsoft.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=ltykernel@gmail.com \
    --cc=luto@kernel.org \
    --cc=m.szyprowski@samsung.com \
    --cc=martin.b.radev@gmail.com \
    --cc=martin.petersen@oracle.com \
    --cc=mingo@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=parri.andrea@gmail.com \
    --cc=peterz@infradead.org \
    --cc=pgonda@google.com \
    --cc=rientjes@google.com \
    --cc=robin.murphy@arm.com \
    --cc=rppt@kernel.org \
    --cc=saravanand@fb.com \
    --cc=sfr@canb.auug.org.au \
    --cc=sstabellini@kernel.org \
    --cc=sthemmin@microsoft.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=tj@kernel.org \
    --cc=vkuznets@redhat.com \
    --cc=wei.liu@kernel.org \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).