All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael Kelley (LINUX)" <mikelley@microsoft.com>
To: Tianyu Lan <ltykernel@gmail.com>,
	"tglx@linutronix.de" <tglx@linutronix.de>,
	"mingo@redhat.com" <mingo@redhat.com>,
	"bp@alien8.de" <bp@alien8.de>,
	"dave.hansen@linux.intel.com" <dave.hansen@linux.intel.com>,
	"x86@kernel.org" <x86@kernel.org>,
	"hpa@zytor.com" <hpa@zytor.com>,
	"luto@kernel.org" <luto@kernel.org>,
	"peterz@infradead.org" <peterz@infradead.org>,
	"jgross@suse.com" <jgross@suse.com>,
	"sstabellini@kernel.org" <sstabellini@kernel.org>,
	"boris.ostrovsky@oracle.com" <boris.ostrovsky@oracle.com>,
	KY Srinivasan <kys@microsoft.com>,
	Haiyang Zhang <haiyangz@microsoft.com>,
	Stephen Hemminger <sthemmin@microsoft.com>,
	"wei.liu@kernel.org" <wei.liu@kernel.org>,
	Dexuan Cui <decui@microsoft.com>,
	"joro@8bytes.org" <joro@8bytes.org>,
	"will@kernel.org" <will@kernel.org>,
	"davem@davemloft.net" <davem@davemloft.net>,
	"kuba@kernel.org" <kuba@kernel.org>,
	"jejb@linux.ibm.com" <jejb@linux.ibm.com>,
	"martin.petersen@oracle.com" <martin.petersen@oracle.com>,
	"hch@lst.de" <hch@lst.de>,
	"m.szyprowski@samsung.com" <m.szyprowski@samsung.com>,
	"robin.murphy@arm.com" <robin.murphy@arm.com>,
	Tianyu Lan <Tianyu.Lan@microsoft.com>,
	"thomas.lendacky@amd.com" <thomas.lendacky@amd.com>,
	"xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: "iommu@lists.linux-foundation.org"
	<iommu@lists.linux-foundation.org>,
	"linux-hyperv@vger.kernel.org" <linux-hyperv@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-scsi@vger.kernel.org" <linux-scsi@vger.kernel.org>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	vkuznets <vkuznets@redhat.com>,
	"brijesh.singh@amd.com" <brijesh.singh@amd.com>,
	"konrad.wilk@oracle.com" <konrad.wilk@oracle.com>,
	"parri.andrea@gmail.com" <parri.andrea@gmail.com>,
	"dave.hansen@intel.com" <dave.hansen@intel.com>
Subject: RE: [PATCH V2 5/6] net: netvsc: Add Isolation VM support for netvsc driver
Date: Tue, 23 Nov 2021 17:55:51 +0000	[thread overview]
Message-ID: <MWHPR21MB1593FF92E42C1FD3C2755A51D7609@MWHPR21MB1593.namprd21.prod.outlook.com> (raw)
In-Reply-To: <20211123143039.331929-6-ltykernel@gmail.com>

From: Tianyu Lan <ltykernel@gmail.com> Sent: Tuesday, November 23, 2021 6:31 AM
> 
> In Isolation VM, all shared memory with host needs to mark visible
> to host via hvcall. vmbus_establish_gpadl() has already done it for
> netvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
> pagebuffer() stills need to be handled. Use DMA API to map/umap
> these memory during sending/receiving packet and Hyper-V swiotlb
> bounce buffer dma address will be returned. The swiotlb bounce buffer
> has been masked to be visible to host during boot up.
> 
> Allocate rx/tx ring buffer via dma_alloc_noncontiguous() in Isolation
> VM. After calling vmbus_establish_gpadl() which marks these pages visible
> to host, map these pages unencrypted addes space via dma_vmap_noncontiguous().
> 
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> ---
>  drivers/net/hyperv/hyperv_net.h   |   5 +
>  drivers/net/hyperv/netvsc.c       | 192 +++++++++++++++++++++++++++---
>  drivers/net/hyperv/rndis_filter.c |   2 +
>  include/linux/hyperv.h            |   6 +
>  4 files changed, 190 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index 315278a7cf88..31c77a00d01e 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -164,6 +164,7 @@ struct hv_netvsc_packet {
>  	u32 total_bytes;
>  	u32 send_buf_index;
>  	u32 total_data_buflen;
> +	struct hv_dma_range *dma_range;
>  };
> 
>  #define NETVSC_HASH_KEYLEN 40
> @@ -1074,6 +1075,7 @@ struct netvsc_device {
> 
>  	/* Receive buffer allocated by us but manages by NetVSP */
>  	void *recv_buf;
> +	struct sg_table *recv_sgt;
>  	u32 recv_buf_size; /* allocated bytes */
>  	struct vmbus_gpadl recv_buf_gpadl_handle;
>  	u32 recv_section_cnt;
> @@ -1082,6 +1084,7 @@ struct netvsc_device {
> 
>  	/* Send buffer allocated by us */
>  	void *send_buf;
> +	struct sg_table *send_sgt;
>  	u32 send_buf_size;
>  	struct vmbus_gpadl send_buf_gpadl_handle;
>  	u32 send_section_cnt;
> @@ -1731,4 +1734,6 @@ struct rndis_message {
>  #define RETRY_US_HI	10000
>  #define RETRY_MAX	2000	/* >10 sec */
> 
> +void netvsc_dma_unmap(struct hv_device *hv_dev,
> +		      struct hv_netvsc_packet *packet);
>  #endif /* _HYPERV_NET_H */
> diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> index 396bc1c204e6..9cdc71930830 100644
> --- a/drivers/net/hyperv/netvsc.c
> +++ b/drivers/net/hyperv/netvsc.c
> @@ -20,6 +20,7 @@
>  #include <linux/vmalloc.h>
>  #include <linux/rtnetlink.h>
>  #include <linux/prefetch.h>
> +#include <linux/gfp.h>
> 
>  #include <asm/sync_bitops.h>
>  #include <asm/mshyperv.h>
> @@ -146,15 +147,39 @@ static struct netvsc_device *alloc_net_device(void)
>  	return net_device;
>  }
> 
> +static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
> +{
> +	struct vmbus_channel *primary = channel->primary_channel;
> +
> +	return primary ? primary->device_obj : channel->device_obj;
> +}
> +
>  static void free_netvsc_device(struct rcu_head *head)
>  {
>  	struct netvsc_device *nvdev
>  		= container_of(head, struct netvsc_device, rcu);
> +	struct hv_device *dev =
> +		netvsc_channel_to_device(nvdev->chan_table[0].channel);
>  	int i;
> 
>  	kfree(nvdev->extension);
> -	vfree(nvdev->recv_buf);
> -	vfree(nvdev->send_buf);
> +
> +	if (nvdev->recv_sgt) {
> +		dma_vunmap_noncontiguous(&dev->device, nvdev->recv_buf);
> +		dma_free_noncontiguous(&dev->device, nvdev->recv_buf_size,
> +				       nvdev->recv_sgt, DMA_FROM_DEVICE);
> +	} else {
> +		vfree(nvdev->recv_buf);
> +	}
> +
> +	if (nvdev->send_sgt) {
> +		dma_vunmap_noncontiguous(&dev->device, nvdev->send_buf);
> +		dma_free_noncontiguous(&dev->device, nvdev->send_buf_size,
> +				       nvdev->send_sgt, DMA_TO_DEVICE);
> +	} else {
> +		vfree(nvdev->send_buf);
> +	}
> +
>  	kfree(nvdev->send_section_map);
> 
>  	for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
> @@ -348,7 +373,21 @@ static int netvsc_init_buf(struct hv_device *device,
>  		buf_size = min_t(unsigned int, buf_size,
>  				 NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
> 
> -	net_device->recv_buf = vzalloc(buf_size);
> +	if (hv_isolation_type_snp()) {
> +		net_device->recv_sgt =
> +			dma_alloc_noncontiguous(&device->device, buf_size,
> +						DMA_FROM_DEVICE, GFP_KERNEL, 0);
> +		if (!net_device->recv_sgt) {
> +			pr_err("Fail to allocate recv buffer buf_size %d.\n.", buf_size);
> +			ret = -ENOMEM;
> +			goto cleanup;
> +		}
> +
> +		net_device->recv_buf = (void *)net_device->recv_sgt->sgl->dma_address;

Use sg_dma_address() macro.

> +	} else {
> +		net_device->recv_buf = vzalloc(buf_size);
> +	}
> +
>  	if (!net_device->recv_buf) {
>  		netdev_err(ndev,
>  			   "unable to allocate receive buffer of size %u\n",
> @@ -357,8 +396,6 @@ static int netvsc_init_buf(struct hv_device *device,
>  		goto cleanup;
>  	}
> 
> -	net_device->recv_buf_size = buf_size;
> -
>  	/*
>  	 * Establish the gpadl handle for this buffer on this
>  	 * channel.  Note: This call uses the vmbus connection rather
> @@ -373,6 +410,19 @@ static int netvsc_init_buf(struct hv_device *device,
>  		goto cleanup;
>  	}
> 
> +	if (net_device->recv_sgt) {
> +		net_device->recv_buf =
> +			dma_vmap_noncontiguous(&device->device, buf_size,
> +					       net_device->recv_sgt);
> +		if (!net_device->recv_buf) {
> +			pr_err("Fail to vmap recv buffer.\n");
> +			ret = -ENOMEM;
> +			goto cleanup;
> +		}
> +	}
> +
> +	net_device->recv_buf_size = buf_size;
> +
>  	/* Notify the NetVsp of the gpadl handle */
>  	init_packet = &net_device->channel_init_pkt;
>  	memset(init_packet, 0, sizeof(struct nvsp_message));
> @@ -454,14 +504,27 @@ static int netvsc_init_buf(struct hv_device *device,
>  	buf_size = device_info->send_sections * device_info->send_section_size;
>  	buf_size = round_up(buf_size, PAGE_SIZE);
> 
> -	net_device->send_buf = vzalloc(buf_size);
> +	if (hv_isolation_type_snp()) {
> +		net_device->send_sgt =
> +			dma_alloc_noncontiguous(&device->device, buf_size,
> +						DMA_TO_DEVICE, GFP_KERNEL, 0);
> +		if (!net_device->send_sgt) {
> +			pr_err("Fail to allocate send buffer buf_size %d.\n.", buf_size);
> +			ret = -ENOMEM;
> +			goto cleanup;
> +		}
> +
> +		net_device->send_buf = (void *)net_device->send_sgt->sgl->dma_address;

Use sg_dma_address() macro.

> +	} else {
> +		net_device->send_buf = vzalloc(buf_size);
> +	}
> +
>  	if (!net_device->send_buf) {
>  		netdev_err(ndev, "unable to allocate send buffer of size %u\n",
>  			   buf_size);
>  		ret = -ENOMEM;
>  		goto cleanup;
>  	}
> -	net_device->send_buf_size = buf_size;
> 
>  	/* Establish the gpadl handle for this buffer on this
>  	 * channel.  Note: This call uses the vmbus connection rather
> @@ -476,6 +539,19 @@ static int netvsc_init_buf(struct hv_device *device,
>  		goto cleanup;
>  	}
> 
> +	if (net_device->send_sgt) {
> +		net_device->send_buf =
> +			dma_vmap_noncontiguous(&device->device, buf_size,
> +					       net_device->send_sgt);
> +		if (!net_device->send_buf) {
> +			pr_err("Fail to vmap send buffer.\n");
> +			ret = -ENOMEM;
> +			goto cleanup;
> +		}
> +	}
> +
> +	net_device->send_buf_size = buf_size;
> +
>  	/* Notify the NetVsp of the gpadl handle */
>  	init_packet = &net_device->channel_init_pkt;
>  	memset(init_packet, 0, sizeof(struct nvsp_message));
> @@ -766,7 +842,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
> 
>  	/* Notify the layer above us */
>  	if (likely(skb)) {
> -		const struct hv_netvsc_packet *packet
> +		struct hv_netvsc_packet *packet
>  			= (struct hv_netvsc_packet *)skb->cb;
>  		u32 send_index = packet->send_buf_index;
>  		struct netvsc_stats *tx_stats;
> @@ -782,6 +858,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
>  		tx_stats->bytes += packet->total_bytes;
>  		u64_stats_update_end(&tx_stats->syncp);
> 
> +		netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
>  		napi_consume_skb(skb, budget);
>  	}
> 
> @@ -946,6 +1023,87 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
>  		memset(dest, 0, padding);
>  }
> 
> +void netvsc_dma_unmap(struct hv_device *hv_dev,
> +		      struct hv_netvsc_packet *packet)
> +{
> +	u32 page_count = packet->cp_partial ?
> +		packet->page_buf_cnt - packet->rmsg_pgcnt :
> +		packet->page_buf_cnt;
> +	int i;
> +
> +	if (!hv_is_isolation_supported())
> +		return;
> +
> +	if (!packet->dma_range)
> +		return;
> +
> +	for (i = 0; i < page_count; i++)
> +		dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
> +				 packet->dma_range[i].mapping_size,
> +				 DMA_TO_DEVICE);
> +
> +	kfree(packet->dma_range);
> +}
> +
> +/* netvsc_dma_map - Map swiotlb bounce buffer with data page of
> + * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation
> + * VM.
> + *
> + * In isolation VM, netvsc send buffer has been marked visible to
> + * host and so the data copied to send buffer doesn't need to use
> + * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer()
> + * may not be copied to send buffer and so these pages need to be
> + * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do
> + * that. The pfns in the struct hv_page_buffer need to be converted
> + * to bounce buffer's pfn. The loop here is necessary because the
> + * entries in the page buffer array are not necessarily full
> + * pages of data.  Each entry in the array has a separate offset and
> + * len that may be non-zero, even for entries in the middle of the
> + * array.  And the entries are not physically contiguous.  So each
> + * entry must be individually mapped rather than as a contiguous unit.
> + * So not use dma_map_sg() here.
> + */
> +static int netvsc_dma_map(struct hv_device *hv_dev,
> +			  struct hv_netvsc_packet *packet,
> +			  struct hv_page_buffer *pb)
> +{
> +	u32 page_count =  packet->cp_partial ?
> +		packet->page_buf_cnt - packet->rmsg_pgcnt :
> +		packet->page_buf_cnt;
> +	dma_addr_t dma;
> +	int i;
> +
> +	if (!hv_is_isolation_supported())
> +		return 0;
> +
> +	packet->dma_range = kcalloc(page_count,
> +				    sizeof(*packet->dma_range),
> +				    GFP_KERNEL);
> +	if (!packet->dma_range)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < page_count; i++) {
> +		char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
> +					 + pb[i].offset);
> +		u32 len = pb[i].len;
> +
> +		dma = dma_map_single(&hv_dev->device, src, len,
> +				     DMA_TO_DEVICE);
> +		if (dma_mapping_error(&hv_dev->device, dma)) {
> +			kfree(packet->dma_range);
> +			return -ENOMEM;
> +		}
> +
> +		packet->dma_range[i].dma = dma;
> +		packet->dma_range[i].mapping_size = len;
> +		pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
> +		pb[i].offset = offset_in_hvpage(dma);
> +		pb[i].len = len;

As noted in comments on an earlier version of this patch, the
pb[i].len and .offset fields should not be changed by doing
dma_map_single().  So there's no need to set them again here.  Adding
a comment to that effect might be good.

> +	}
> +
> +	return 0;
> +}
> +
>  static inline int netvsc_send_pkt(
>  	struct hv_device *device,
>  	struct hv_netvsc_packet *packet,
> @@ -986,14 +1144,24 @@ static inline int netvsc_send_pkt(
> 
>  	trace_nvsp_send_pkt(ndev, out_channel, rpkt);
> 
> +	packet->dma_range = NULL;
>  	if (packet->page_buf_cnt) {
>  		if (packet->cp_partial)
>  			pb += packet->rmsg_pgcnt;
> 
> +		ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
> +		if (ret) {
> +			ret = -EAGAIN;
> +			goto exit;
> +		}
> +
>  		ret = vmbus_sendpacket_pagebuffer(out_channel,
>  						  pb, packet->page_buf_cnt,
>  						  &nvmsg, sizeof(nvmsg),
>  						  req_id);
> +
> +		if (ret)
> +			netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
>  	} else {
>  		ret = vmbus_sendpacket(out_channel,
>  				       &nvmsg, sizeof(nvmsg),
> @@ -1001,6 +1169,7 @@ static inline int netvsc_send_pkt(
>  				       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
>  	}
> 
> +exit:
>  	if (ret == 0) {
>  		atomic_inc_return(&nvchan->queue_sends);
> 
> @@ -1515,13 +1684,6 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
>  	return 0;
>  }
> 
> -static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
> -{
> -	struct vmbus_channel *primary = channel->primary_channel;
> -
> -	return primary ? primary->device_obj : channel->device_obj;
> -}
> -
>  /* Network processing softirq
>   * Process data in incoming ring buffer from host
>   * Stops when ring is empty or budget is met or exceeded.
> diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
> index f6c9c2a670f9..448fcc325ed7 100644
> --- a/drivers/net/hyperv/rndis_filter.c
> +++ b/drivers/net/hyperv/rndis_filter.c
> @@ -361,6 +361,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
>  			}
>  		}
> 
> +		netvsc_dma_unmap(((struct net_device_context *)
> +			netdev_priv(ndev))->device_ctx, &request->pkt);
>  		complete(&request->wait_event);
>  	} else {
>  		netdev_err(ndev,
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 4d44fb3b3f1c..8882e46d1070 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -25,6 +25,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/reciprocal_div.h>
>  #include <asm/hyperv-tlfs.h>
> +#include <linux/dma-map-ops.h>
> 
>  #define MAX_PAGE_BUFFER_COUNT				32
>  #define MAX_MULTIPAGE_BUFFER_COUNT			32 /* 128K */
> @@ -1583,6 +1584,11 @@ struct hyperv_service_callback {
>  	void (*callback)(void *context);
>  };
> 
> +struct hv_dma_range {
> +	dma_addr_t dma;
> +	u32 mapping_size;
> +};
> +
>  #define MAX_SRV_VER	0x7ffffff
>  extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen,
>  				const int *fw_version, int fw_vercnt,
> --
> 2.25.1


WARNING: multiple messages have this Message-ID (diff)
From: "Michael Kelley \(LINUX\) via iommu" <iommu@lists.linux-foundation.org>
To: Tianyu Lan <ltykernel@gmail.com>,
	"tglx@linutronix.de" <tglx@linutronix.de>,
	"mingo@redhat.com" <mingo@redhat.com>,
	"bp@alien8.de" <bp@alien8.de>,
	"dave.hansen@linux.intel.com" <dave.hansen@linux.intel.com>,
	"x86@kernel.org" <x86@kernel.org>,
	"hpa@zytor.com" <hpa@zytor.com>,
	"luto@kernel.org" <luto@kernel.org>,
	"peterz@infradead.org" <peterz@infradead.org>,
	"jgross@suse.com" <jgross@suse.com>,
	"sstabellini@kernel.org" <sstabellini@kernel.org>,
	"boris.ostrovsky@oracle.com" <boris.ostrovsky@oracle.com>,
	KY Srinivasan <kys@microsoft.com>,
	Haiyang Zhang <haiyangz@microsoft.com>,
	Stephen Hemminger <sthemmin@microsoft.com>,
	"wei.liu@kernel.org" <wei.liu@kernel.org>,
	Dexuan Cui <decui@microsoft.com>,
	"joro@8bytes.org" <joro@8bytes.org>,
	"will@kernel.org" <will@kernel.org>,
	"davem@davemloft.net" <davem@davemloft.net>,
	"kuba@kernel.org" <kuba@kernel.org>,
	"jejb@linux.ibm.com" <jejb@linux.ibm.com>,
	"martin.petersen@oracle.com" <martin.petersen@oracle.com>,
	"hch@lst.de" <hch@lst.de>,
	"m.szyprowski@samsung.com" <m.szyprowski@samsung.com>,
	"robin.murphy@arm.com" <robin.murphy@arm.com>,
	Tianyu Lan <Tianyu.Lan@microsoft.com>,
	"thomas.lendacky@amd.com" <thomas.lendacky@amd.com>,
	"xen-devel@lists.xenproject.org" <xen-devel@lists.xenproject.org>
Cc: "parri.andrea@gmail.com" <parri.andrea@gmail.com>,
	"linux-hyperv@vger.kernel.org" <linux-hyperv@vger.kernel.org>,
	"brijesh.singh@amd.com" <brijesh.singh@amd.com>,
	"linux-scsi@vger.kernel.org" <linux-scsi@vger.kernel.org>,
	"konrad.wilk@oracle.com" <konrad.wilk@oracle.com>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"dave.hansen@intel.com" <dave.hansen@intel.com>,
	"iommu@lists.linux-foundation.org"
	<iommu@lists.linux-foundation.org>,
	vkuznets <vkuznets@redhat.com>
Subject: RE: [PATCH V2 5/6] net: netvsc: Add Isolation VM support for netvsc driver
Date: Tue, 23 Nov 2021 17:55:51 +0000	[thread overview]
Message-ID: <MWHPR21MB1593FF92E42C1FD3C2755A51D7609@MWHPR21MB1593.namprd21.prod.outlook.com> (raw)
In-Reply-To: <20211123143039.331929-6-ltykernel@gmail.com>

From: Tianyu Lan <ltykernel@gmail.com> Sent: Tuesday, November 23, 2021 6:31 AM
> 
> In Isolation VM, all shared memory with host needs to mark visible
> to host via hvcall. vmbus_establish_gpadl() has already done it for
> netvsc rx/tx ring buffer. The page buffer used by vmbus_sendpacket_
> pagebuffer() stills need to be handled. Use DMA API to map/umap
> these memory during sending/receiving packet and Hyper-V swiotlb
> bounce buffer dma address will be returned. The swiotlb bounce buffer
> has been masked to be visible to host during boot up.
> 
> Allocate rx/tx ring buffer via dma_alloc_noncontiguous() in Isolation
> VM. After calling vmbus_establish_gpadl() which marks these pages visible
> to host, map these pages unencrypted addes space via dma_vmap_noncontiguous().
> 
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> ---
>  drivers/net/hyperv/hyperv_net.h   |   5 +
>  drivers/net/hyperv/netvsc.c       | 192 +++++++++++++++++++++++++++---
>  drivers/net/hyperv/rndis_filter.c |   2 +
>  include/linux/hyperv.h            |   6 +
>  4 files changed, 190 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
> index 315278a7cf88..31c77a00d01e 100644
> --- a/drivers/net/hyperv/hyperv_net.h
> +++ b/drivers/net/hyperv/hyperv_net.h
> @@ -164,6 +164,7 @@ struct hv_netvsc_packet {
>  	u32 total_bytes;
>  	u32 send_buf_index;
>  	u32 total_data_buflen;
> +	struct hv_dma_range *dma_range;
>  };
> 
>  #define NETVSC_HASH_KEYLEN 40
> @@ -1074,6 +1075,7 @@ struct netvsc_device {
> 
>  	/* Receive buffer allocated by us but manages by NetVSP */
>  	void *recv_buf;
> +	struct sg_table *recv_sgt;
>  	u32 recv_buf_size; /* allocated bytes */
>  	struct vmbus_gpadl recv_buf_gpadl_handle;
>  	u32 recv_section_cnt;
> @@ -1082,6 +1084,7 @@ struct netvsc_device {
> 
>  	/* Send buffer allocated by us */
>  	void *send_buf;
> +	struct sg_table *send_sgt;
>  	u32 send_buf_size;
>  	struct vmbus_gpadl send_buf_gpadl_handle;
>  	u32 send_section_cnt;
> @@ -1731,4 +1734,6 @@ struct rndis_message {
>  #define RETRY_US_HI	10000
>  #define RETRY_MAX	2000	/* >10 sec */
> 
> +void netvsc_dma_unmap(struct hv_device *hv_dev,
> +		      struct hv_netvsc_packet *packet);
>  #endif /* _HYPERV_NET_H */
> diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
> index 396bc1c204e6..9cdc71930830 100644
> --- a/drivers/net/hyperv/netvsc.c
> +++ b/drivers/net/hyperv/netvsc.c
> @@ -20,6 +20,7 @@
>  #include <linux/vmalloc.h>
>  #include <linux/rtnetlink.h>
>  #include <linux/prefetch.h>
> +#include <linux/gfp.h>
> 
>  #include <asm/sync_bitops.h>
>  #include <asm/mshyperv.h>
> @@ -146,15 +147,39 @@ static struct netvsc_device *alloc_net_device(void)
>  	return net_device;
>  }
> 
> +static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
> +{
> +	struct vmbus_channel *primary = channel->primary_channel;
> +
> +	return primary ? primary->device_obj : channel->device_obj;
> +}
> +
>  static void free_netvsc_device(struct rcu_head *head)
>  {
>  	struct netvsc_device *nvdev
>  		= container_of(head, struct netvsc_device, rcu);
> +	struct hv_device *dev =
> +		netvsc_channel_to_device(nvdev->chan_table[0].channel);
>  	int i;
> 
>  	kfree(nvdev->extension);
> -	vfree(nvdev->recv_buf);
> -	vfree(nvdev->send_buf);
> +
> +	if (nvdev->recv_sgt) {
> +		dma_vunmap_noncontiguous(&dev->device, nvdev->recv_buf);
> +		dma_free_noncontiguous(&dev->device, nvdev->recv_buf_size,
> +				       nvdev->recv_sgt, DMA_FROM_DEVICE);
> +	} else {
> +		vfree(nvdev->recv_buf);
> +	}
> +
> +	if (nvdev->send_sgt) {
> +		dma_vunmap_noncontiguous(&dev->device, nvdev->send_buf);
> +		dma_free_noncontiguous(&dev->device, nvdev->send_buf_size,
> +				       nvdev->send_sgt, DMA_TO_DEVICE);
> +	} else {
> +		vfree(nvdev->send_buf);
> +	}
> +
>  	kfree(nvdev->send_section_map);
> 
>  	for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
> @@ -348,7 +373,21 @@ static int netvsc_init_buf(struct hv_device *device,
>  		buf_size = min_t(unsigned int, buf_size,
>  				 NETVSC_RECEIVE_BUFFER_SIZE_LEGACY);
> 
> -	net_device->recv_buf = vzalloc(buf_size);
> +	if (hv_isolation_type_snp()) {
> +		net_device->recv_sgt =
> +			dma_alloc_noncontiguous(&device->device, buf_size,
> +						DMA_FROM_DEVICE, GFP_KERNEL, 0);
> +		if (!net_device->recv_sgt) {
> +			pr_err("Fail to allocate recv buffer buf_size %d.\n.", buf_size);
> +			ret = -ENOMEM;
> +			goto cleanup;
> +		}
> +
> +		net_device->recv_buf = (void *)net_device->recv_sgt->sgl->dma_address;

Use sg_dma_address() macro.

> +	} else {
> +		net_device->recv_buf = vzalloc(buf_size);
> +	}
> +
>  	if (!net_device->recv_buf) {
>  		netdev_err(ndev,
>  			   "unable to allocate receive buffer of size %u\n",
> @@ -357,8 +396,6 @@ static int netvsc_init_buf(struct hv_device *device,
>  		goto cleanup;
>  	}
> 
> -	net_device->recv_buf_size = buf_size;
> -
>  	/*
>  	 * Establish the gpadl handle for this buffer on this
>  	 * channel.  Note: This call uses the vmbus connection rather
> @@ -373,6 +410,19 @@ static int netvsc_init_buf(struct hv_device *device,
>  		goto cleanup;
>  	}
> 
> +	if (net_device->recv_sgt) {
> +		net_device->recv_buf =
> +			dma_vmap_noncontiguous(&device->device, buf_size,
> +					       net_device->recv_sgt);
> +		if (!net_device->recv_buf) {
> +			pr_err("Fail to vmap recv buffer.\n");
> +			ret = -ENOMEM;
> +			goto cleanup;
> +		}
> +	}
> +
> +	net_device->recv_buf_size = buf_size;
> +
>  	/* Notify the NetVsp of the gpadl handle */
>  	init_packet = &net_device->channel_init_pkt;
>  	memset(init_packet, 0, sizeof(struct nvsp_message));
> @@ -454,14 +504,27 @@ static int netvsc_init_buf(struct hv_device *device,
>  	buf_size = device_info->send_sections * device_info->send_section_size;
>  	buf_size = round_up(buf_size, PAGE_SIZE);
> 
> -	net_device->send_buf = vzalloc(buf_size);
> +	if (hv_isolation_type_snp()) {
> +		net_device->send_sgt =
> +			dma_alloc_noncontiguous(&device->device, buf_size,
> +						DMA_TO_DEVICE, GFP_KERNEL, 0);
> +		if (!net_device->send_sgt) {
> +			pr_err("Fail to allocate send buffer buf_size %d.\n.", buf_size);
> +			ret = -ENOMEM;
> +			goto cleanup;
> +		}
> +
> +		net_device->send_buf = (void *)net_device->send_sgt->sgl->dma_address;

Use sg_dma_address() macro.

> +	} else {
> +		net_device->send_buf = vzalloc(buf_size);
> +	}
> +
>  	if (!net_device->send_buf) {
>  		netdev_err(ndev, "unable to allocate send buffer of size %u\n",
>  			   buf_size);
>  		ret = -ENOMEM;
>  		goto cleanup;
>  	}
> -	net_device->send_buf_size = buf_size;
> 
>  	/* Establish the gpadl handle for this buffer on this
>  	 * channel.  Note: This call uses the vmbus connection rather
> @@ -476,6 +539,19 @@ static int netvsc_init_buf(struct hv_device *device,
>  		goto cleanup;
>  	}
> 
> +	if (net_device->send_sgt) {
> +		net_device->send_buf =
> +			dma_vmap_noncontiguous(&device->device, buf_size,
> +					       net_device->send_sgt);
> +		if (!net_device->send_buf) {
> +			pr_err("Fail to vmap send buffer.\n");
> +			ret = -ENOMEM;
> +			goto cleanup;
> +		}
> +	}
> +
> +	net_device->send_buf_size = buf_size;
> +
>  	/* Notify the NetVsp of the gpadl handle */
>  	init_packet = &net_device->channel_init_pkt;
>  	memset(init_packet, 0, sizeof(struct nvsp_message));
> @@ -766,7 +842,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
> 
>  	/* Notify the layer above us */
>  	if (likely(skb)) {
> -		const struct hv_netvsc_packet *packet
> +		struct hv_netvsc_packet *packet
>  			= (struct hv_netvsc_packet *)skb->cb;
>  		u32 send_index = packet->send_buf_index;
>  		struct netvsc_stats *tx_stats;
> @@ -782,6 +858,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
>  		tx_stats->bytes += packet->total_bytes;
>  		u64_stats_update_end(&tx_stats->syncp);
> 
> +		netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
>  		napi_consume_skb(skb, budget);
>  	}
> 
> @@ -946,6 +1023,87 @@ static void netvsc_copy_to_send_buf(struct netvsc_device *net_device,
>  		memset(dest, 0, padding);
>  }
> 
> +void netvsc_dma_unmap(struct hv_device *hv_dev,
> +		      struct hv_netvsc_packet *packet)
> +{
> +	u32 page_count = packet->cp_partial ?
> +		packet->page_buf_cnt - packet->rmsg_pgcnt :
> +		packet->page_buf_cnt;
> +	int i;
> +
> +	if (!hv_is_isolation_supported())
> +		return;
> +
> +	if (!packet->dma_range)
> +		return;
> +
> +	for (i = 0; i < page_count; i++)
> +		dma_unmap_single(&hv_dev->device, packet->dma_range[i].dma,
> +				 packet->dma_range[i].mapping_size,
> +				 DMA_TO_DEVICE);
> +
> +	kfree(packet->dma_range);
> +}
> +
> +/* netvsc_dma_map - Map swiotlb bounce buffer with data page of
> + * packet sent by vmbus_sendpacket_pagebuffer() in the Isolation
> + * VM.
> + *
> + * In isolation VM, netvsc send buffer has been marked visible to
> + * host and so the data copied to send buffer doesn't need to use
> + * bounce buffer. The data pages handled by vmbus_sendpacket_pagebuffer()
> + * may not be copied to send buffer and so these pages need to be
> + * mapped with swiotlb bounce buffer. netvsc_dma_map() is to do
> + * that. The pfns in the struct hv_page_buffer need to be converted
> + * to bounce buffer's pfn. The loop here is necessary because the
> + * entries in the page buffer array are not necessarily full
> + * pages of data.  Each entry in the array has a separate offset and
> + * len that may be non-zero, even for entries in the middle of the
> + * array.  And the entries are not physically contiguous.  So each
> + * entry must be individually mapped rather than as a contiguous unit.
> + * So not use dma_map_sg() here.
> + */
> +static int netvsc_dma_map(struct hv_device *hv_dev,
> +			  struct hv_netvsc_packet *packet,
> +			  struct hv_page_buffer *pb)
> +{
> +	u32 page_count =  packet->cp_partial ?
> +		packet->page_buf_cnt - packet->rmsg_pgcnt :
> +		packet->page_buf_cnt;
> +	dma_addr_t dma;
> +	int i;
> +
> +	if (!hv_is_isolation_supported())
> +		return 0;
> +
> +	packet->dma_range = kcalloc(page_count,
> +				    sizeof(*packet->dma_range),
> +				    GFP_KERNEL);
> +	if (!packet->dma_range)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < page_count; i++) {
> +		char *src = phys_to_virt((pb[i].pfn << HV_HYP_PAGE_SHIFT)
> +					 + pb[i].offset);
> +		u32 len = pb[i].len;
> +
> +		dma = dma_map_single(&hv_dev->device, src, len,
> +				     DMA_TO_DEVICE);
> +		if (dma_mapping_error(&hv_dev->device, dma)) {
> +			kfree(packet->dma_range);
> +			return -ENOMEM;
> +		}
> +
> +		packet->dma_range[i].dma = dma;
> +		packet->dma_range[i].mapping_size = len;
> +		pb[i].pfn = dma >> HV_HYP_PAGE_SHIFT;
> +		pb[i].offset = offset_in_hvpage(dma);
> +		pb[i].len = len;

As noted in comments on an earlier version of this patch, the
pb[i].len and .offset fields should not be changed by doing
dma_map_single().  So there's no need to set them again here.  Adding
a comment to that effect might be good.

> +	}
> +
> +	return 0;
> +}
> +
>  static inline int netvsc_send_pkt(
>  	struct hv_device *device,
>  	struct hv_netvsc_packet *packet,
> @@ -986,14 +1144,24 @@ static inline int netvsc_send_pkt(
> 
>  	trace_nvsp_send_pkt(ndev, out_channel, rpkt);
> 
> +	packet->dma_range = NULL;
>  	if (packet->page_buf_cnt) {
>  		if (packet->cp_partial)
>  			pb += packet->rmsg_pgcnt;
> 
> +		ret = netvsc_dma_map(ndev_ctx->device_ctx, packet, pb);
> +		if (ret) {
> +			ret = -EAGAIN;
> +			goto exit;
> +		}
> +
>  		ret = vmbus_sendpacket_pagebuffer(out_channel,
>  						  pb, packet->page_buf_cnt,
>  						  &nvmsg, sizeof(nvmsg),
>  						  req_id);
> +
> +		if (ret)
> +			netvsc_dma_unmap(ndev_ctx->device_ctx, packet);
>  	} else {
>  		ret = vmbus_sendpacket(out_channel,
>  				       &nvmsg, sizeof(nvmsg),
> @@ -1001,6 +1169,7 @@ static inline int netvsc_send_pkt(
>  				       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
>  	}
> 
> +exit:
>  	if (ret == 0) {
>  		atomic_inc_return(&nvchan->queue_sends);
> 
> @@ -1515,13 +1684,6 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
>  	return 0;
>  }
> 
> -static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
> -{
> -	struct vmbus_channel *primary = channel->primary_channel;
> -
> -	return primary ? primary->device_obj : channel->device_obj;
> -}
> -
>  /* Network processing softirq
>   * Process data in incoming ring buffer from host
>   * Stops when ring is empty or budget is met or exceeded.
> diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
> index f6c9c2a670f9..448fcc325ed7 100644
> --- a/drivers/net/hyperv/rndis_filter.c
> +++ b/drivers/net/hyperv/rndis_filter.c
> @@ -361,6 +361,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
>  			}
>  		}
> 
> +		netvsc_dma_unmap(((struct net_device_context *)
> +			netdev_priv(ndev))->device_ctx, &request->pkt);
>  		complete(&request->wait_event);
>  	} else {
>  		netdev_err(ndev,
> diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
> index 4d44fb3b3f1c..8882e46d1070 100644
> --- a/include/linux/hyperv.h
> +++ b/include/linux/hyperv.h
> @@ -25,6 +25,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/reciprocal_div.h>
>  #include <asm/hyperv-tlfs.h>
> +#include <linux/dma-map-ops.h>
> 
>  #define MAX_PAGE_BUFFER_COUNT				32
>  #define MAX_MULTIPAGE_BUFFER_COUNT			32 /* 128K */
> @@ -1583,6 +1584,11 @@ struct hyperv_service_callback {
>  	void (*callback)(void *context);
>  };
> 
> +struct hv_dma_range {
> +	dma_addr_t dma;
> +	u32 mapping_size;
> +};
> +
>  #define MAX_SRV_VER	0x7ffffff
>  extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, u32 buflen,
>  				const int *fw_version, int fw_vercnt,
> --
> 2.25.1

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

  reply	other threads:[~2021-11-23 17:55 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-23 14:30 [PATCH V2 0/6] x86/Hyper-V: Add Hyper-V Isolation VM support(Second part) Tianyu Lan
2021-11-23 14:30 ` Tianyu Lan
2021-11-23 14:30 ` [PATCH V2 1/6] Swiotlb: Add Swiotlb bounce buffer remap function for HV IVM Tianyu Lan
2021-11-23 14:30   ` Tianyu Lan
2021-11-23 17:15   ` Michael Kelley (LINUX)
2021-11-23 17:15     ` Michael Kelley (LINUX) via iommu
2021-11-24 14:07     ` Tianyu Lan
2021-11-24 14:07       ` Tianyu Lan
2021-11-23 14:30 ` [PATCH V2 2/6] dma-mapping: Add vmap/vunmap_noncontiguous() callback in dma ops Tianyu Lan
2021-11-23 14:30   ` Tianyu Lan
2021-11-23 14:30 ` [PATCH V2 3/6] x86/hyper-v: Add hyperv Isolation VM check in the cc_platform_has() Tianyu Lan
2021-11-23 14:30   ` Tianyu Lan
2021-11-23 14:30 ` [PATCH V2 4/6] hyperv/IOMMU: Enable swiotlb bounce buffer for Isolation VM Tianyu Lan
2021-11-23 14:30   ` Tianyu Lan
2021-11-23 17:44   ` Michael Kelley (LINUX)
2021-11-23 17:44     ` Michael Kelley (LINUX) via iommu
2021-11-23 14:30 ` [PATCH V2 5/6] net: netvsc: Add Isolation VM support for netvsc driver Tianyu Lan
2021-11-23 14:30   ` Tianyu Lan
2021-11-23 17:55   ` Michael Kelley (LINUX) [this message]
2021-11-23 17:55     ` Michael Kelley (LINUX) via iommu
2021-11-24 17:03   ` Michael Kelley (LINUX)
2021-11-24 17:03     ` Michael Kelley (LINUX) via iommu
2021-11-25 21:58     ` Haiyang Zhang via iommu
2021-11-25 21:58       ` Haiyang Zhang
2021-11-23 14:30 ` [PATCH V2 6/6] scsi: storvsc: Add Isolation VM support for storvsc driver Tianyu Lan
2021-11-23 14:30   ` Tianyu Lan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=MWHPR21MB1593FF92E42C1FD3C2755A51D7609@MWHPR21MB1593.namprd21.prod.outlook.com \
    --to=mikelley@microsoft.com \
    --cc=Tianyu.Lan@microsoft.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=brijesh.singh@amd.com \
    --cc=dave.hansen@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=davem@davemloft.net \
    --cc=decui@microsoft.com \
    --cc=haiyangz@microsoft.com \
    --cc=hch@lst.de \
    --cc=hpa@zytor.com \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jejb@linux.ibm.com \
    --cc=jgross@suse.com \
    --cc=joro@8bytes.org \
    --cc=konrad.wilk@oracle.com \
    --cc=kuba@kernel.org \
    --cc=kys@microsoft.com \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=ltykernel@gmail.com \
    --cc=luto@kernel.org \
    --cc=m.szyprowski@samsung.com \
    --cc=martin.petersen@oracle.com \
    --cc=mingo@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=parri.andrea@gmail.com \
    --cc=peterz@infradead.org \
    --cc=robin.murphy@arm.com \
    --cc=sstabellini@kernel.org \
    --cc=sthemmin@microsoft.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=vkuznets@redhat.com \
    --cc=wei.liu@kernel.org \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.