linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next,v2,0/3] hv_netvsc: Support LRO/RSC in the vSwitch
@ 2018-09-21 18:20 Haiyang Zhang
  2018-09-21 18:20 ` [PATCH net-next,v2,1/3] hv_netvsc: Add support for " Haiyang Zhang
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Haiyang Zhang @ 2018-09-21 18:20 UTC (permalink / raw)
  To: davem, netdev
  Cc: haiyangz, kys, sthemmin, olaf, vkuznets, devel, linux-kernel

From: Haiyang Zhang <haiyangz@microsoft.com>

The patch adds support for LRO/RSC in the vSwitch feature. It reduces
the per packet processing overhead by coalescing multiple TCP segments
when possible. The feature is enabled by default on VMs running on
Windows Server 2019 and later.

The patch set also adds ethtool command handler and documents.

Haiyang Zhang (3):
  hv_netvsc: Add support for LRO/RSC in the vSwitch
  hv_netvsc: Add handler for LRO setting change
  hv_netvsc: Update document for LRO/RSC support

 Documentation/networking/netvsc.txt |  9 +++
 drivers/net/hyperv/hyperv_net.h     | 51 ++++++++++++---
 drivers/net/hyperv/netvsc.c         | 18 ++++--
 drivers/net/hyperv/netvsc_drv.c     | 58 ++++++++++++-----
 drivers/net/hyperv/rndis_filter.c   | 97 +++++++++++++++++++++++++----
 5 files changed, 194 insertions(+), 39 deletions(-)

-- 
2.18.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH net-next,v2,1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch
  2018-09-21 18:20 [PATCH net-next,v2,0/3] hv_netvsc: Support LRO/RSC in the vSwitch Haiyang Zhang
@ 2018-09-21 18:20 ` Haiyang Zhang
  2018-09-21 18:36   ` [PATCH net-next, v2, 1/3] " Stephen Hemminger
  2018-09-21 18:20 ` [PATCH net-next,v2,2/3] hv_netvsc: Add handler for LRO setting change Haiyang Zhang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Haiyang Zhang @ 2018-09-21 18:20 UTC (permalink / raw)
  To: davem, netdev
  Cc: haiyangz, kys, sthemmin, olaf, vkuznets, devel, linux-kernel

From: Haiyang Zhang <haiyangz@microsoft.com>

LRO/RSC in the vSwitch is a feature available in Windows Server 2019
hosts and later. It reduces the per packet processing overhead by
coalescing multiple TCP segments when possible. This patch adds netvsc
driver support for this feature.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h   | 47 +++++++++++++---
 drivers/net/hyperv/netvsc.c       | 18 +++++--
 drivers/net/hyperv/netvsc_drv.c   | 28 +++++-----
 drivers/net/hyperv/rndis_filter.c | 90 ++++++++++++++++++++++++++-----
 4 files changed, 145 insertions(+), 38 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index a32ded5b4f41..7f1603dc8128 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -186,6 +186,7 @@ struct rndis_device {
 /* Interface */
 struct rndis_message;
 struct netvsc_device;
+struct netvsc_channel;
 struct net_device_context;
 
 extern u32 netvsc_ring_bytes;
@@ -203,10 +204,7 @@ void netvsc_linkstatus_callback(struct net_device *net,
 				struct rndis_message *resp);
 int netvsc_recv_callback(struct net_device *net,
 			 struct netvsc_device *nvdev,
-			 struct vmbus_channel *channel,
-			 void  *data, u32 len,
-			 const struct ndis_tcp_ip_checksum_info *csum_info,
-			 const struct ndis_pkt_8021q_info *vlan);
+			 struct netvsc_channel *nvchan);
 void netvsc_channel_cb(void *context);
 int netvsc_poll(struct napi_struct *napi, int budget);
 
@@ -222,7 +220,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev,
 			       const u8 *key);
 int rndis_filter_receive(struct net_device *ndev,
 			 struct netvsc_device *net_dev,
-			 struct vmbus_channel *channel,
+			 struct netvsc_channel *nvchan,
 			 void *data, u32 buflen);
 
 int rndis_filter_set_device_mac(struct netvsc_device *ndev,
@@ -524,6 +522,8 @@ struct nvsp_2_vsc_capability {
 			u64 ieee8021q:1;
 			u64 correlation_id:1;
 			u64 teaming:1;
+			u64 vsubnetid:1;
+			u64 rsc:1;
 		};
 	};
 } __packed;
@@ -826,7 +826,7 @@ struct nvsp_message {
 
 #define NETVSC_SUPPORTED_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | \
 				      NETIF_F_TSO | NETIF_F_IPV6_CSUM | \
-				      NETIF_F_TSO6)
+				      NETIF_F_TSO6 | NETIF_F_LRO)
 
 #define VRSS_SEND_TAB_SIZE 16  /* must be power of 2 */
 #define VRSS_CHANNEL_MAX 64
@@ -852,6 +852,18 @@ struct multi_recv_comp {
 	u32 next;	/* next entry for writing */
 };
 
+#define NVSP_RSC_MAX 562 /* Max #RSC frags in a vmbus xfer page pkt */
+
+struct nvsc_rsc {
+	const struct ndis_pkt_8021q_info *vlan;
+	const struct ndis_tcp_ip_checksum_info *csum_info;
+	u8 is_last; /* last RNDIS msg in a vmtransfer_page */
+	u32 cnt; /* #fragments in an RSC packet */
+	u32 pktlen; /* Full packet length */
+	void *data[NVSP_RSC_MAX];
+	u32 len[NVSP_RSC_MAX];
+};
+
 struct netvsc_stats {
 	u64 packets;
 	u64 bytes;
@@ -955,6 +967,7 @@ struct netvsc_channel {
 	struct multi_send_data msd;
 	struct multi_recv_comp mrc;
 	atomic_t queue_sends;
+	struct nvsc_rsc rsc;
 
 	struct netvsc_stats tx_stats;
 	struct netvsc_stats rx_stats;
@@ -1136,7 +1149,8 @@ struct rndis_oobd {
 /* Packet extension field contents associated with a Data message. */
 struct rndis_per_packet_info {
 	u32 size;
-	u32 type;
+	u32 type:31;
+	u32 internal:1;
 	u32 ppi_offset;
 };
 
@@ -1157,6 +1171,25 @@ enum ndis_per_pkt_info_type {
 	MAX_PER_PKT_INFO
 };
 
+enum rndis_per_pkt_info_interal_type {
+	RNDIS_PKTINFO_ID = 1,
+	/* Add more memebers here */
+
+	RNDIS_PKTINFO_MAX
+};
+
+#define RNDIS_PKTINFO_SUBALLOC BIT(0)
+#define RNDIS_PKTINFO_1ST_FRAG BIT(1)
+#define RNDIS_PKTINFO_LAST_FRAG BIT(2)
+
+#define RNDIS_PKTINFO_ID_V1 1
+
+struct rndis_pktinfo_id {
+	u8 ver;
+	u8 flag;
+	u16 pkt_id;
+};
+
 struct ndis_pkt_8021q_info {
 	union {
 		struct {
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index fe01e141c8f8..922054c1d544 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -542,6 +542,9 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 		init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
 	}
 
+	if (nvsp_ver >= NVSP_PROTOCOL_VERSION_61)
+		init_packet->msg.v2_msg.send_ndis_config.capability.rsc = 1;
+
 	trace_nvsp_send(ndev, init_packet);
 
 	ret = vmbus_sendpacket(device->channel, init_packet,
@@ -1111,11 +1114,12 @@ static void enq_receive_complete(struct net_device *ndev,
 
 static int netvsc_receive(struct net_device *ndev,
 			  struct netvsc_device *net_device,
-			  struct vmbus_channel *channel,
+			  struct netvsc_channel *nvchan,
 			  const struct vmpacket_descriptor *desc,
 			  const struct nvsp_message *nvsp)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
+	struct vmbus_channel *channel = nvchan->channel;
 	const struct vmtransfer_page_packet_header *vmxferpage_packet
 		= container_of(desc, const struct vmtransfer_page_packet_header, d);
 	u16 q_idx = channel->offermsg.offer.sub_channel_index;
@@ -1150,6 +1154,7 @@ static int netvsc_receive(struct net_device *ndev,
 		int ret;
 
 		if (unlikely(offset + buflen > net_device->recv_buf_size)) {
+			nvchan->rsc.cnt = 0;
 			status = NVSP_STAT_FAIL;
 			netif_err(net_device_ctx, rx_err, ndev,
 				  "Packet offset:%u + len:%u too big\n",
@@ -1160,11 +1165,13 @@ static int netvsc_receive(struct net_device *ndev,
 
 		data = recv_buf + offset;
 
+		nvchan->rsc.is_last = (i == count - 1);
+
 		trace_rndis_recv(ndev, q_idx, data);
 
 		/* Pass it to the upper layer */
 		ret = rndis_filter_receive(ndev, net_device,
-					   channel, data, buflen);
+					   nvchan, data, buflen);
 
 		if (unlikely(ret != NVSP_STAT_SUCCESS))
 			status = NVSP_STAT_FAIL;
@@ -1223,12 +1230,13 @@ static  void netvsc_receive_inband(struct net_device *ndev,
 }
 
 static int netvsc_process_raw_pkt(struct hv_device *device,
-				  struct vmbus_channel *channel,
+				  struct netvsc_channel *nvchan,
 				  struct netvsc_device *net_device,
 				  struct net_device *ndev,
 				  const struct vmpacket_descriptor *desc,
 				  int budget)
 {
+	struct vmbus_channel *channel = nvchan->channel;
 	const struct nvsp_message *nvmsg = hv_pkt_data(desc);
 
 	trace_nvsp_recv(ndev, channel, nvmsg);
@@ -1240,7 +1248,7 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
 		break;
 
 	case VM_PKT_DATA_USING_XFER_PAGES:
-		return netvsc_receive(ndev, net_device, channel,
+		return netvsc_receive(ndev, net_device, nvchan,
 				      desc, nvmsg);
 		break;
 
@@ -1284,7 +1292,7 @@ int netvsc_poll(struct napi_struct *napi, int budget)
 		nvchan->desc = hv_pkt_iter_first(channel);
 
 	while (nvchan->desc && work_done < budget) {
-		work_done += netvsc_process_raw_pkt(device, channel, net_device,
+		work_done += netvsc_process_raw_pkt(device, nvchan, net_device,
 						    ndev, nvchan->desc, budget);
 		nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
 	}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 3af6d8d15233..f8c18370d9d1 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -744,14 +744,16 @@ void netvsc_linkstatus_callback(struct net_device *net,
 }
 
 static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
-					     struct napi_struct *napi,
-					     const struct ndis_tcp_ip_checksum_info *csum_info,
-					     const struct ndis_pkt_8021q_info *vlan,
-					     void *data, u32 buflen)
+					     struct netvsc_channel *nvchan)
 {
+	struct napi_struct *napi = &nvchan->napi;
+	const struct ndis_pkt_8021q_info *vlan = nvchan->rsc.vlan;
+	const struct ndis_tcp_ip_checksum_info *csum_info =
+						nvchan->rsc.csum_info;
 	struct sk_buff *skb;
+	int i;
 
-	skb = napi_alloc_skb(napi, buflen);
+	skb = napi_alloc_skb(napi, nvchan->rsc.pktlen);
 	if (!skb)
 		return skb;
 
@@ -759,7 +761,8 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
 	 * Copy to skb. This copy is needed here since the memory pointed by
 	 * hv_netvsc_packet cannot be deallocated
 	 */
-	skb_put_data(skb, data, buflen);
+	for (i = 0; i < nvchan->rsc.cnt; i++)
+		skb_put_data(skb, nvchan->rsc.data[i], nvchan->rsc.len[i]);
 
 	skb->protocol = eth_type_trans(skb, net);
 
@@ -792,14 +795,11 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
  */
 int netvsc_recv_callback(struct net_device *net,
 			 struct netvsc_device *net_device,
-			 struct vmbus_channel *channel,
-			 void  *data, u32 len,
-			 const struct ndis_tcp_ip_checksum_info *csum_info,
-			 const struct ndis_pkt_8021q_info *vlan)
+			 struct netvsc_channel *nvchan)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
+	struct vmbus_channel *channel = nvchan->channel;
 	u16 q_idx = channel->offermsg.offer.sub_channel_index;
-	struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
 	struct sk_buff *skb;
 	struct netvsc_stats *rx_stats;
 
@@ -807,8 +807,8 @@ int netvsc_recv_callback(struct net_device *net,
 		return NVSP_STAT_FAIL;
 
 	/* Allocate a skb - TODO direct I/O to pages? */
-	skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
-				    csum_info, vlan, data, len);
+	skb = netvsc_alloc_recv_skb(net, nvchan);
+
 	if (unlikely(!skb)) {
 		++net_device_ctx->eth_stats.rx_no_memory;
 		rcu_read_unlock();
@@ -825,7 +825,7 @@ int netvsc_recv_callback(struct net_device *net,
 	rx_stats = &nvchan->rx_stats;
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->packets++;
-	rx_stats->bytes += len;
+	rx_stats->bytes += nvchan->rsc.pktlen;
 
 	if (skb->pkt_type == PACKET_BROADCAST)
 		++rx_stats->broadcast;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 2a5209f23f29..f3ac66386297 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -342,7 +342,8 @@ static void rndis_filter_receive_response(struct net_device *ndev,
  * Get the Per-Packet-Info with the specified type
  * return NULL if not found.
  */
-static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
+static inline void *rndis_get_ppi(struct rndis_packet *rpkt,
+				  u32 type, u8 internal)
 {
 	struct rndis_per_packet_info *ppi;
 	int len;
@@ -355,7 +356,7 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
 	len = rpkt->per_pkt_info_len;
 
 	while (len > 0) {
-		if (ppi->type == type)
+		if (ppi->type == type && ppi->internal == internal)
 			return (void *)((ulong)ppi + ppi->ppi_offset);
 		len -= ppi->size;
 		ppi = (struct rndis_per_packet_info *)((ulong)ppi + ppi->size);
@@ -364,17 +365,41 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
 	return NULL;
 }
 
+static inline
+void rsc_add_data(struct netvsc_channel *nvchan,
+		  const struct ndis_pkt_8021q_info *vlan,
+		  const struct ndis_tcp_ip_checksum_info *csum_info,
+		  void *data, u32 len)
+{
+	u32 cnt = nvchan->rsc.cnt;
+
+	if (cnt) {
+		nvchan->rsc.pktlen += len;
+	} else {
+		nvchan->rsc.vlan = vlan;
+		nvchan->rsc.csum_info = csum_info;
+		nvchan->rsc.pktlen = len;
+	}
+
+	nvchan->rsc.data[cnt] = data;
+	nvchan->rsc.len[cnt] = len;
+	nvchan->rsc.cnt++;
+}
+
 static int rndis_filter_receive_data(struct net_device *ndev,
 				     struct netvsc_device *nvdev,
-				     struct vmbus_channel *channel,
+				     struct netvsc_channel *nvchan,
 				     struct rndis_message *msg,
 				     u32 data_buflen)
 {
 	struct rndis_packet *rndis_pkt = &msg->msg.pkt;
 	const struct ndis_tcp_ip_checksum_info *csum_info;
 	const struct ndis_pkt_8021q_info *vlan;
+	const struct rndis_pktinfo_id *pktinfo_id;
 	u32 data_offset;
 	void *data;
+	bool rsc_more = false;
+	int ret;
 
 	/* Remove the rndis header and pass it back up the stack */
 	data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
@@ -393,25 +418,59 @@ static int rndis_filter_receive_data(struct net_device *ndev,
 		return NVSP_STAT_FAIL;
 	}
 
-	vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO);
+	vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO, 0);
+
+	csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO, 0);
 
-	csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO);
+	pktinfo_id = rndis_get_ppi(rndis_pkt, RNDIS_PKTINFO_ID, 1);
 
 	data = (void *)msg + data_offset;
 
-	/*
-	 * Remove the rndis trailer padding from rndis packet message
+	/* Identify RSC frags, drop erroneous packets */
+	if (pktinfo_id && (pktinfo_id->flag & RNDIS_PKTINFO_SUBALLOC)) {
+		if (pktinfo_id->flag & RNDIS_PKTINFO_1ST_FRAG)
+			nvchan->rsc.cnt = 0;
+		else if (nvchan->rsc.cnt == 0)
+			goto drop;
+
+		rsc_more = true;
+
+		if (pktinfo_id->flag & RNDIS_PKTINFO_LAST_FRAG)
+			rsc_more = false;
+
+		if (rsc_more && nvchan->rsc.is_last)
+			goto drop;
+	} else {
+		nvchan->rsc.cnt = 0;
+	}
+
+	if (unlikely(nvchan->rsc.cnt >= NVSP_RSC_MAX))
+		goto drop;
+
+	/* Put data into per channel structure.
+	 * Also, remove the rndis trailer padding from rndis packet message
 	 * rndis_pkt->data_len tell us the real data length, we only copy
 	 * the data packet to the stack, without the rndis trailer padding
 	 */
-	return netvsc_recv_callback(ndev, nvdev, channel,
-				    data, rndis_pkt->data_len,
-				    csum_info, vlan);
+	rsc_add_data(nvchan, vlan, csum_info, data, rndis_pkt->data_len);
+
+	if (rsc_more)
+		return NVSP_STAT_SUCCESS;
+
+	ret = netvsc_recv_callback(ndev, nvdev, nvchan);
+	nvchan->rsc.cnt = 0;
+
+	return ret;
+
+drop:
+	/* Drop incomplete packet */
+	nvchan->rsc.cnt = 0;
+	return NVSP_STAT_FAIL;
 }
 
 int rndis_filter_receive(struct net_device *ndev,
 			 struct netvsc_device *net_dev,
-			 struct vmbus_channel *channel,
+			 struct netvsc_channel *nvchan,
 			 void *data, u32 buflen)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
@@ -422,7 +481,7 @@ int rndis_filter_receive(struct net_device *ndev,
 
 	switch (rndis_msg->ndis_msg_type) {
 	case RNDIS_MSG_PACKET:
-		return rndis_filter_receive_data(ndev, net_dev, channel,
+		return rndis_filter_receive_data(ndev, net_dev, nvchan,
 						 rndis_msg, buflen);
 	case RNDIS_MSG_INIT_C:
 	case RNDIS_MSG_QUERY_C:
@@ -1184,6 +1243,13 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
 		}
 	}
 
+	if (hwcaps.rsc.ip4 && hwcaps.rsc.ip6) {
+		net->hw_features |= NETIF_F_LRO;
+
+		offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
+		offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
+	}
+
 	/* In case some hw_features disappeared we need to remove them from
 	 * net->features list as they're no longer supported.
 	 */
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next,v2,2/3] hv_netvsc: Add handler for LRO setting change
  2018-09-21 18:20 [PATCH net-next,v2,0/3] hv_netvsc: Support LRO/RSC in the vSwitch Haiyang Zhang
  2018-09-21 18:20 ` [PATCH net-next,v2,1/3] hv_netvsc: Add support for " Haiyang Zhang
@ 2018-09-21 18:20 ` Haiyang Zhang
  2018-09-21 18:20 ` [PATCH net-next,v2,3/3] hv_netvsc: Update document for LRO/RSC support Haiyang Zhang
  2018-09-23  0:23 ` [PATCH net-next,v2,0/3] hv_netvsc: Support LRO/RSC in the vSwitch David Miller
  3 siblings, 0 replies; 9+ messages in thread
From: Haiyang Zhang @ 2018-09-21 18:20 UTC (permalink / raw)
  To: davem, netdev
  Cc: haiyangz, kys, sthemmin, olaf, vkuznets, devel, linux-kernel

From: Haiyang Zhang <haiyangz@microsoft.com>

This patch adds the handler for LRO setting change, so that a user
can use ethtool command to enable / disable LRO feature.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 drivers/net/hyperv/hyperv_net.h   |  4 ++++
 drivers/net/hyperv/netvsc_drv.c   | 30 ++++++++++++++++++++++++++++++
 drivers/net/hyperv/rndis_filter.c | 11 ++++++++---
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 7f1603dc8128..ef6f766f6389 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -185,6 +185,7 @@ struct rndis_device {
 
 /* Interface */
 struct rndis_message;
+struct ndis_offload_params;
 struct netvsc_device;
 struct netvsc_channel;
 struct net_device_context;
@@ -218,6 +219,9 @@ void rndis_filter_device_remove(struct hv_device *dev,
 				struct netvsc_device *nvdev);
 int rndis_filter_set_rss_param(struct rndis_device *rdev,
 			       const u8 *key);
+int rndis_filter_set_offload_params(struct net_device *ndev,
+				    struct netvsc_device *nvdev,
+				    struct ndis_offload_params *req_offloads);
 int rndis_filter_receive(struct net_device *ndev,
 			 struct netvsc_device *net_dev,
 			 struct netvsc_channel *nvchan,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index f8c18370d9d1..ec699741170b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1006,6 +1006,8 @@ static void netvsc_init_settings(struct net_device *dev)
 
 	ndc->speed = SPEED_UNKNOWN;
 	ndc->duplex = DUPLEX_FULL;
+
+	dev->features = NETIF_F_LRO;
 }
 
 static int netvsc_get_link_ksettings(struct net_device *dev,
@@ -1733,6 +1735,33 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 	return ret;
 }
 
+static int netvsc_set_features(struct net_device *ndev,
+			       netdev_features_t features)
+{
+	netdev_features_t change = features ^ ndev->features;
+	struct net_device_context *ndevctx = netdev_priv(ndev);
+	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
+	struct ndis_offload_params offloads;
+
+	if (!nvdev || nvdev->destroy)
+		return -ENODEV;
+
+	if (!(change & NETIF_F_LRO))
+		return 0;
+
+	memset(&offloads, 0, sizeof(struct ndis_offload_params));
+
+	if (features & NETIF_F_LRO) {
+		offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
+		offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
+	} else {
+		offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
+		offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
+	}
+
+	return rndis_filter_set_offload_params(ndev, nvdev, &offloads);
+}
+
 static u32 netvsc_get_msglevel(struct net_device *ndev)
 {
 	struct net_device_context *ndev_ctx = netdev_priv(ndev);
@@ -1776,6 +1805,7 @@ static const struct net_device_ops device_ops = {
 	.ndo_start_xmit =		netvsc_start_xmit,
 	.ndo_change_rx_flags =		netvsc_change_rx_flags,
 	.ndo_set_rx_mode =		netvsc_set_rx_mode,
+	.ndo_set_features =		netvsc_set_features,
 	.ndo_change_mtu =		netvsc_change_mtu,
 	.ndo_validate_addr =		eth_validate_addr,
 	.ndo_set_mac_address =		netvsc_set_mac_addr,
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index f3ac66386297..8b537a049c1e 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -716,7 +716,7 @@ int rndis_filter_set_device_mac(struct netvsc_device *nvdev,
 	return ret;
 }
 
-static int
+int
 rndis_filter_set_offload_params(struct net_device *ndev,
 				struct netvsc_device *nvdev,
 				struct ndis_offload_params *req_offloads)
@@ -1246,8 +1246,13 @@ static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
 	if (hwcaps.rsc.ip4 && hwcaps.rsc.ip6) {
 		net->hw_features |= NETIF_F_LRO;
 
-		offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
-		offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
+		if (net->features & NETIF_F_LRO) {
+			offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
+			offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_ENABLED;
+		} else {
+			offloads.rsc_ip_v4 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
+			offloads.rsc_ip_v6 = NDIS_OFFLOAD_PARAMETERS_RSC_DISABLED;
+		}
 	}
 
 	/* In case some hw_features disappeared we need to remove them from
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH net-next,v2,3/3] hv_netvsc: Update document for LRO/RSC support
  2018-09-21 18:20 [PATCH net-next,v2,0/3] hv_netvsc: Support LRO/RSC in the vSwitch Haiyang Zhang
  2018-09-21 18:20 ` [PATCH net-next,v2,1/3] hv_netvsc: Add support for " Haiyang Zhang
  2018-09-21 18:20 ` [PATCH net-next,v2,2/3] hv_netvsc: Add handler for LRO setting change Haiyang Zhang
@ 2018-09-21 18:20 ` Haiyang Zhang
  2018-09-23  0:23 ` [PATCH net-next,v2,0/3] hv_netvsc: Support LRO/RSC in the vSwitch David Miller
  3 siblings, 0 replies; 9+ messages in thread
From: Haiyang Zhang @ 2018-09-21 18:20 UTC (permalink / raw)
  To: davem, netdev
  Cc: haiyangz, kys, sthemmin, olaf, vkuznets, devel, linux-kernel

From: Haiyang Zhang <haiyangz@microsoft.com>

Update document for LRO/RSC support, and the command line info to
change the setting.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
---
 Documentation/networking/netvsc.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/networking/netvsc.txt b/Documentation/networking/netvsc.txt
index 92f5b31392fa..3bfa635bbbd5 100644
--- a/Documentation/networking/netvsc.txt
+++ b/Documentation/networking/netvsc.txt
@@ -45,6 +45,15 @@ Features
   like packets and significantly reduces CPU usage under heavy Rx
   load.
 
+  Large Receive Offload (LRO), or Receive Side Coalescing (RSC)
+  -------------------------------------------------------------
+  The driver supports LRO/RSC in the vSwitch feature. It reduces the per packet
+  processing overhead by coalescing multiple TCP segments when possible. The
+  feature is enabled by default on VMs running on Windows Server 2019 and
+  later. It may be changed by ethtool command:
+	ethtool -K eth0 lro on
+	ethtool -K eth0 lro off
+
   SR-IOV support
   --------------
   Hyper-V supports SR-IOV as a hardware acceleration option. If SR-IOV
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch
  2018-09-21 18:20 ` [PATCH net-next,v2,1/3] hv_netvsc: Add support for " Haiyang Zhang
@ 2018-09-21 18:36   ` Stephen Hemminger
  2018-09-21 18:51     ` Haiyang Zhang
  0 siblings, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2018-09-21 18:36 UTC (permalink / raw)
  To: Haiyang Zhang
  Cc: haiyangz, davem, netdev, olaf, linux-kernel, devel, vkuznets

On Fri, 21 Sep 2018 18:20:35 +0000
Haiyang Zhang <haiyangz@linuxonhyperv.com> wrote:

Overall, this looks good. Some minor suggestions.

> +struct nvsc_rsc {
> +	const struct ndis_pkt_8021q_info *vlan;
> +	const struct ndis_tcp_ip_checksum_info *csum_info;
> +	u8 is_last; /* last RNDIS msg in a vmtransfer_page */
> +	u32 cnt; /* #fragments in an RSC packet */
> +	u32 pktlen; /* Full packet length */
> +	void *data[NVSP_RSC_MAX];
> +	u32 len[NVSP_RSC_MAX];
> +};
> +

This new state structure is state on a per-channel basis.
Do you really need this to be persistent across packets?

Could this be on stack or do you need it to handle split packets
arriving in different polls? Or is the stack space a problem?

Also, maybe data and length could be in one structure since they
are related.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch
  2018-09-21 18:36   ` [PATCH net-next, v2, 1/3] " Stephen Hemminger
@ 2018-09-21 18:51     ` Haiyang Zhang
  2018-09-21 20:22       ` Stephen Hemminger
  0 siblings, 1 reply; 9+ messages in thread
From: Haiyang Zhang @ 2018-09-21 18:51 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: davem, netdev, olaf, linux-kernel, devel, vkuznets



> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Friday, September 21, 2018 2:37 PM
> To: Haiyang Zhang <haiyangz@linuxonhyperv.com>
> Cc: Haiyang Zhang <haiyangz@microsoft.com>; davem@davemloft.net;
> netdev@vger.kernel.org; olaf@aepfle.de; linux-kernel@vger.kernel.org;
> devel@linuxdriverproject.org; vkuznets <vkuznets@redhat.com>
> Subject: Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in
> the vSwitch
> 
> On Fri, 21 Sep 2018 18:20:35 +0000
> Haiyang Zhang <haiyangz@linuxonhyperv.com> wrote:
> 
> Overall, this looks good. Some minor suggestions.
> 
> > +struct nvsc_rsc {
> > +	const struct ndis_pkt_8021q_info *vlan;
> > +	const struct ndis_tcp_ip_checksum_info *csum_info;
> > +	u8 is_last; /* last RNDIS msg in a vmtransfer_page */
> > +	u32 cnt; /* #fragments in an RSC packet */
> > +	u32 pktlen; /* Full packet length */
> > +	void *data[NVSP_RSC_MAX];
> > +	u32 len[NVSP_RSC_MAX];
> > +};
> > +
> 
> This new state structure is state on a per-channel basis.
> Do you really need this to be persistent across packets?
> 
> Could this be on stack or do you need it to handle split packets arriving in
> different polls? Or is the stack space a problem?
> 
> Also, maybe data and length could be in one structure since they are related.

The stack space is a problem. NVSP_RSC_MAX is 562, which is defined by host.
It will be too large for limited stack space. 

struct nvsc_rsc includes the data, len, cnt, chksum, vlan for one RSC packet. They
are all related to construction of one SKB and its meta data. So I put them in
one structure.

Thanks,
- Haiyang


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch
  2018-09-21 18:51     ` Haiyang Zhang
@ 2018-09-21 20:22       ` Stephen Hemminger
  2018-09-21 20:29         ` Haiyang Zhang
  0 siblings, 1 reply; 9+ messages in thread
From: Stephen Hemminger @ 2018-09-21 20:22 UTC (permalink / raw)
  To: Haiyang Zhang; +Cc: davem, netdev, olaf, linux-kernel, devel, vkuznets

On Fri, 21 Sep 2018 18:51:54 +0000
Haiyang Zhang <haiyangz@microsoft.com> wrote:

> > -----Original Message-----
> > From: Stephen Hemminger <stephen@networkplumber.org>
> > Sent: Friday, September 21, 2018 2:37 PM
> > To: Haiyang Zhang <haiyangz@linuxonhyperv.com>
> > Cc: Haiyang Zhang <haiyangz@microsoft.com>; davem@davemloft.net;
> > netdev@vger.kernel.org; olaf@aepfle.de; linux-kernel@vger.kernel.org;
> > devel@linuxdriverproject.org; vkuznets <vkuznets@redhat.com>
> > Subject: Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in
> > the vSwitch
> > 
> > On Fri, 21 Sep 2018 18:20:35 +0000
> > Haiyang Zhang <haiyangz@linuxonhyperv.com> wrote:
> > 
> > Overall, this looks good. Some minor suggestions.
> >   
> > > +struct nvsc_rsc {
> > > +	const struct ndis_pkt_8021q_info *vlan;
> > > +	const struct ndis_tcp_ip_checksum_info *csum_info;
> > > +	u8 is_last; /* last RNDIS msg in a vmtransfer_page */
> > > +	u32 cnt; /* #fragments in an RSC packet */
> > > +	u32 pktlen; /* Full packet length */
> > > +	void *data[NVSP_RSC_MAX];
> > > +	u32 len[NVSP_RSC_MAX];
> > > +};
> > > +  
> > 
> > This new state structure is state on a per-channel basis.
> > Do you really need this to be persistent across packets?
> > 
> > Could this be on stack or do you need it to handle split packets arriving in
> > different polls? Or is the stack space a problem?
> > 
> > Also, maybe data and length could be in one structure since they are related.  
> 
> The stack space is a problem. NVSP_RSC_MAX is 562, which is defined by host.
> It will be too large for limited stack space. 
> 
> struct nvsc_rsc includes the data, len, cnt, chksum, vlan for one RSC packet. They
> are all related to construction of one SKB and its meta data. So I put them in
> one structure.
> 
> Thanks,
> - Haiyang
> 

That makes sense. How big is sizeof(struct net_device) + netdev_priv now?
Need to make sure it doesn't become an order 2 (ie keep it less than 4K).

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in the vSwitch
  2018-09-21 20:22       ` Stephen Hemminger
@ 2018-09-21 20:29         ` Haiyang Zhang
  0 siblings, 0 replies; 9+ messages in thread
From: Haiyang Zhang @ 2018-09-21 20:29 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: davem, netdev, olaf, linux-kernel, devel, vkuznets



> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Friday, September 21, 2018 4:22 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: davem@davemloft.net; netdev@vger.kernel.org; olaf@aepfle.de; linux-
> kernel@vger.kernel.org; devel@linuxdriverproject.org; vkuznets
> <vkuznets@redhat.com>
> Subject: Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for LRO/RSC in
> the vSwitch
> 
> On Fri, 21 Sep 2018 18:51:54 +0000
> Haiyang Zhang <haiyangz@microsoft.com> wrote:
> 
> > > -----Original Message-----
> > > From: Stephen Hemminger <stephen@networkplumber.org>
> > > Sent: Friday, September 21, 2018 2:37 PM
> > > To: Haiyang Zhang <haiyangz@linuxonhyperv.com>
> > > Cc: Haiyang Zhang <haiyangz@microsoft.com>; davem@davemloft.net;
> > > netdev@vger.kernel.org; olaf@aepfle.de;
> > > linux-kernel@vger.kernel.org; devel@linuxdriverproject.org; vkuznets
> > > <vkuznets@redhat.com>
> > > Subject: Re: [PATCH net-next, v2, 1/3] hv_netvsc: Add support for
> > > LRO/RSC in the vSwitch
> > >
> > > On Fri, 21 Sep 2018 18:20:35 +0000
> > > Haiyang Zhang <haiyangz@linuxonhyperv.com> wrote:
> > >
> > > Overall, this looks good. Some minor suggestions.
> > >
> > > > +struct nvsc_rsc {
> > > > +	const struct ndis_pkt_8021q_info *vlan;
> > > > +	const struct ndis_tcp_ip_checksum_info *csum_info;
> > > > +	u8 is_last; /* last RNDIS msg in a vmtransfer_page */
> > > > +	u32 cnt; /* #fragments in an RSC packet */
> > > > +	u32 pktlen; /* Full packet length */
> > > > +	void *data[NVSP_RSC_MAX];
> > > > +	u32 len[NVSP_RSC_MAX];
> > > > +};
> > > > +
> > >
> > > This new state structure is state on a per-channel basis.
> > > Do you really need this to be persistent across packets?
> > >
> > > Could this be on stack or do you need it to handle split packets
> > > arriving in different polls? Or is the stack space a problem?
> > >
> > > Also, maybe data and length could be in one structure since they are
> related.
> >
> > The stack space is a problem. NVSP_RSC_MAX is 562, which is defined by host.
> > It will be too large for limited stack space.
> >
> > struct nvsc_rsc includes the data, len, cnt, chksum, vlan for one RSC
> > packet. They are all related to construction of one SKB and its meta
> > data. So I put them in one structure.
> >
> > Thanks,
> > - Haiyang
> >
> 
> That makes sense. How big is sizeof(struct net_device) + netdev_priv now?
> Need to make sure it doesn't become an order 2 (ie keep it less than 4K).

This patch does not change struct net_device or netdev_priv, so their size
stays the same. Thanks.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH net-next,v2,0/3] hv_netvsc: Support LRO/RSC in the vSwitch
  2018-09-21 18:20 [PATCH net-next,v2,0/3] hv_netvsc: Support LRO/RSC in the vSwitch Haiyang Zhang
                   ` (2 preceding siblings ...)
  2018-09-21 18:20 ` [PATCH net-next,v2,3/3] hv_netvsc: Update document for LRO/RSC support Haiyang Zhang
@ 2018-09-23  0:23 ` David Miller
  3 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2018-09-23  0:23 UTC (permalink / raw)
  To: haiyangz, haiyangz
  Cc: netdev, kys, sthemmin, olaf, vkuznets, devel, linux-kernel

From: Haiyang Zhang <haiyangz@linuxonhyperv.com>
Date: Fri, 21 Sep 2018 18:20:34 +0000

> From: Haiyang Zhang <haiyangz@microsoft.com>
> 
> The patch adds support for LRO/RSC in the vSwitch feature. It reduces
> the per packet processing overhead by coalescing multiple TCP segments
> when possible. The feature is enabled by default on VMs running on
> Windows Server 2019 and later.
> 
> The patch set also adds ethtool command handler and documents.

Series applied, thank you.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-09-23  0:24 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-21 18:20 [PATCH net-next,v2,0/3] hv_netvsc: Support LRO/RSC in the vSwitch Haiyang Zhang
2018-09-21 18:20 ` [PATCH net-next,v2,1/3] hv_netvsc: Add support for " Haiyang Zhang
2018-09-21 18:36   ` [PATCH net-next, v2, 1/3] " Stephen Hemminger
2018-09-21 18:51     ` Haiyang Zhang
2018-09-21 20:22       ` Stephen Hemminger
2018-09-21 20:29         ` Haiyang Zhang
2018-09-21 18:20 ` [PATCH net-next,v2,2/3] hv_netvsc: Add handler for LRO setting change Haiyang Zhang
2018-09-21 18:20 ` [PATCH net-next,v2,3/3] hv_netvsc: Update document for LRO/RSC support Haiyang Zhang
2018-09-23  0:23 ` [PATCH net-next,v2,0/3] hv_netvsc: Support LRO/RSC in the vSwitch David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).