netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
@ 2014-09-18  0:11 David L Stevens
  2014-09-18  4:09 ` Raghuram Kothakota
  2014-09-18  4:23 ` Raghuram Kothakota
  0 siblings, 2 replies; 13+ messages in thread
From: David L Stevens @ 2014-09-18  0:11 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

This patch upgrades the sunvnet driver to support VIO protocol version 1.6.
In particular, it adds per-port MTU negotiation, allowing MTUs other than
ETH_FRAMELEN with ports using newer VIO protocol versions.

Signed-off-by: David L Stevens <david.stevens@oracle.com>
---
 arch/sparc/include/asm/vio.h       |   44 ++++++++++++++-
 arch/sparc/kernel/viohs.c          |   14 ++++-
 drivers/net/ethernet/sun/sunvnet.c |  104 +++++++++++++++++++++++++++++------
 drivers/net/ethernet/sun/sunvnet.h |    3 +
 4 files changed, 143 insertions(+), 22 deletions(-)

diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index 5c0ebe7..107d4a4 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -65,6 +65,7 @@ struct vio_dring_register {
 	u16			options;
 #define VIO_TX_DRING		0x0001
 #define VIO_RX_DRING		0x0002
+#define VIO_RX_DRING_DATA	0x0004
 	u16			resv;
 	u32			num_cookies;
 	struct ldc_trans_cookie	cookies[0];
@@ -80,6 +81,8 @@ struct vio_dring_unregister {
 #define VIO_PKT_MODE		0x01 /* Packet based transfer	*/
 #define VIO_DESC_MODE		0x02 /* In-band descriptors	*/
 #define VIO_DRING_MODE		0x03 /* Descriptor rings	*/
+/* in vers >= 1.2, VIO_DRING_MODE is 0x04 and transfer mode is a bitmask */
+#define VIO_NEW_DRING_MODE	0x04
 
 struct vio_dring_data {
 	struct vio_msg_tag	tag;
@@ -209,10 +212,20 @@ struct vio_net_attr_info {
 	u8			addr_type;
 #define VNET_ADDR_ETHERMAC	0x01
 	u16			ack_freq;
-	u32			resv1;
+	u8			plnk_updt;
+#define PHYSLINK_UPDATE_NONE		0x00
+#define PHYSLINK_UPDATE_STATE		0x01
+#define PHYSLINK_UPDATE_STATE_ACK	0x02
+#define PHYSLINK_UPDATE_STATE_NACK	0x03
+	u8			options;
+	u16			resv1;
 	u64			addr;
 	u64			mtu;
-	u64			resv2[3];
+	u16			cflags;
+#define VNET_LSO_IPV4_CAPAB		0x0001
+	u16			ipv4_lso_maxlen;
+	u32			resv2;
+	u64			resv3[2];
 };
 
 #define VNET_NUM_MCAST		7
@@ -370,6 +383,33 @@ struct vio_driver_state {
 	struct vio_driver_ops	*ops;
 };
 
+static inline bool vio_version_before(struct vio_driver_state *vio,
+				      u16 major, u16 minor)
+{
+	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
+	u32 want = (u32)major << 16 | minor;
+
+	return have < want;
+}
+
+static inline bool vio_version_after(struct vio_driver_state *vio,
+				      u16 major, u16 minor)
+{
+	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
+	u32 want = (u32)major << 16 | minor;
+
+	return have > want;
+}
+
+static inline bool vio_version_after_eq(struct vio_driver_state *vio,
+					u16 major, u16 minor)
+{
+	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
+	u32 want = (u32)major << 16 | minor;
+
+	return have >= want;
+}
+
 #define viodbg(TYPE, f, a...) \
 do {	if (vio->debug & VIO_DEBUG_##TYPE) \
 		printk(KERN_INFO "vio: ID[%lu] " f, \
diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c
index f8e7dd5..7ef081a 100644
--- a/arch/sparc/kernel/viohs.c
+++ b/arch/sparc/kernel/viohs.c
@@ -426,6 +426,13 @@ static int process_dreg_info(struct vio_driver_state *vio,
 	if (vio->dr_state & VIO_DR_STATE_RXREG)
 		goto send_nack;
 
+	/* v1.6 and higher, ACK with desired, supported mode, or NACK */
+	if (vio_version_after_eq(vio, 1, 6)) {
+		if (!(pkt->options & VIO_TX_DRING))
+			goto send_nack;
+		pkt->options = VIO_TX_DRING;
+	}
+
 	BUG_ON(vio->desc_buf);
 
 	vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC);
@@ -453,8 +460,11 @@ static int process_dreg_info(struct vio_driver_state *vio,
 	pkt->tag.stype = VIO_SUBTYPE_ACK;
 	pkt->dring_ident = ++dr->ident;
 
-	viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n",
-	       (unsigned long long) pkt->dring_ident);
+	viodbg(HS, "SEND DRING_REG ACK ident[%llx] "
+	       "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
+	       (unsigned long long) pkt->dring_ident,
+	       pkt->num_descr, pkt->descr_size, pkt->options,
+	       pkt->num_cookies);
 
 	len = (sizeof(*pkt) +
 	       (dr->ncookies * sizeof(struct ldc_trans_cookie)));
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 763cdfc..a9638c1 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -15,6 +15,7 @@
 #include <linux/ethtool.h>
 #include <linux/etherdevice.h>
 #include <linux/mutex.h>
+#include <linux/if_vlan.h>
 
 #include <asm/vio.h>
 #include <asm/ldc.h>
@@ -41,6 +42,7 @@ static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
 
 /* Ordered from largest major to lowest */
 static struct vio_version vnet_versions[] = {
+	{ .major = 1, .minor = 6 },
 	{ .major = 1, .minor = 0 },
 };
 
@@ -67,6 +69,7 @@ static int vnet_send_attr(struct vio_driver_state *vio)
 	struct vnet_port *port = to_vnet_port(vio);
 	struct net_device *dev = port->vp->dev;
 	struct vio_net_attr_info pkt;
+	int framelen = ETH_FRAME_LEN;
 	int i;
 
 	memset(&pkt, 0, sizeof(pkt));
@@ -74,19 +77,41 @@ static int vnet_send_attr(struct vio_driver_state *vio)
 	pkt.tag.stype = VIO_SUBTYPE_INFO;
 	pkt.tag.stype_env = VIO_ATTR_INFO;
 	pkt.tag.sid = vio_send_sid(vio);
-	pkt.xfer_mode = VIO_DRING_MODE;
+	if (vio_version_before(vio, 1, 2))
+		pkt.xfer_mode = VIO_DRING_MODE;
+	else
+		pkt.xfer_mode = VIO_NEW_DRING_MODE;
 	pkt.addr_type = VNET_ADDR_ETHERMAC;
 	pkt.ack_freq = 0;
 	for (i = 0; i < 6; i++)
 		pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
-	pkt.mtu = ETH_FRAME_LEN;
+	if (vio_version_after(vio, 1, 3)) {
+		if (port->rmtu) {
+			port->rmtu = min(VNET_MAXPACKET, port->rmtu);
+			pkt.mtu = port->rmtu;
+		} else {
+			port->rmtu = VNET_MAXPACKET;
+			pkt.mtu = port->rmtu;
+		}
+		if (vio_version_after_eq(vio, 1, 6))
+			pkt.options = VIO_TX_DRING;
+	} else if (vio_version_before(vio, 1, 3)) {
+		pkt.mtu = framelen;
+	} else { /* v1.3 */
+		pkt.mtu = framelen + VLAN_HLEN;
+	}
+
+	pkt.plnk_updt = PHYSLINK_UPDATE_NONE;
+	pkt.cflags = 0;
 
 	viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
-	       "ackfreq[%u] mtu[%llu]\n",
+	       "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
+	       "cflags[0x%04x] lso_max[%u]\n",
 	       pkt.xfer_mode, pkt.addr_type,
-	       (unsigned long long) pkt.addr,
-	       pkt.ack_freq,
-	       (unsigned long long) pkt.mtu);
+	       (unsigned long long)pkt.addr,
+	       pkt.ack_freq, pkt.plnk_updt, pkt.options,
+	       (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen);
+
 
 	return vio_ldc_send(vio, &pkt, sizeof(pkt));
 }
@@ -94,18 +119,52 @@ static int vnet_send_attr(struct vio_driver_state *vio)
 static int handle_attr_info(struct vio_driver_state *vio,
 			    struct vio_net_attr_info *pkt)
 {
-	viodbg(HS, "GOT NET ATTR INFO xmode[0x%x] atype[0x%x] addr[%llx] "
-	       "ackfreq[%u] mtu[%llu]\n",
+	struct vnet_port *port = to_vnet_port(vio);
+	u64	localmtu;
+	u8	xfer_mode;
+
+	viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
+	       "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
+	       " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
 	       pkt->xfer_mode, pkt->addr_type,
-	       (unsigned long long) pkt->addr,
-	       pkt->ack_freq,
-	       (unsigned long long) pkt->mtu);
+	       (unsigned long long)pkt->addr,
+	       pkt->ack_freq, pkt->plnk_updt, pkt->options,
+	       (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
+	       pkt->ipv4_lso_maxlen);
 
 	pkt->tag.sid = vio_send_sid(vio);
 
-	if (pkt->xfer_mode != VIO_DRING_MODE ||
+	xfer_mode = pkt->xfer_mode;
+	/* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
+	if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE)
+		xfer_mode = VIO_NEW_DRING_MODE;
+
+	/* MTU negotiation:
+	 *	< v1.3 - ETH_FRAME_LEN exactly
+	 *	> v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
+	 *			pkt->mtu for ACK
+	 *	= v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
+	 */
+	if (vio_version_before(vio, 1, 3)) {
+		localmtu = ETH_FRAME_LEN;
+	} else if (vio_version_after(vio, 1, 3)) {
+		localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET;
+		localmtu = min(pkt->mtu, localmtu);
+		pkt->mtu = localmtu;
+	} else { /* v1.3 */
+		localmtu = ETH_FRAME_LEN + VLAN_HLEN;
+	}
+	port->rmtu = localmtu;
+
+	/* for version >= 1.6, ACK packet mode we support */
+	if (vio_version_after_eq(vio, 1, 6)) {
+		pkt->xfer_mode = VIO_NEW_DRING_MODE;
+		pkt->options = VIO_TX_DRING;
+	}
+
+	if (!(xfer_mode | VIO_NEW_DRING_MODE) ||
 	    pkt->addr_type != VNET_ADDR_ETHERMAC ||
-	    pkt->mtu != ETH_FRAME_LEN) {
+	    pkt->mtu != localmtu) {
 		viodbg(HS, "SEND NET ATTR NACK\n");
 
 		pkt->tag.stype = VIO_SUBTYPE_NACK;
@@ -114,7 +173,14 @@ static int handle_attr_info(struct vio_driver_state *vio,
 
 		return -ECONNRESET;
 	} else {
-		viodbg(HS, "SEND NET ATTR ACK\n");
+		viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
+		       "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
+		       "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
+		       pkt->xfer_mode, pkt->addr_type,
+		       (unsigned long long)pkt->addr,
+		       pkt->ack_freq, pkt->plnk_updt, pkt->options,
+		       (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
+		       pkt->ipv4_lso_maxlen);
 
 		pkt->tag.stype = VIO_SUBTYPE_ACK;
 
@@ -210,7 +276,7 @@ static int vnet_rx_one(struct vnet_port *port, unsigned int len,
 	int err;
 
 	err = -EMSGSIZE;
-	if (unlikely(len < ETH_ZLEN || len > ETH_FRAME_LEN)) {
+	if (unlikely(len < ETH_ZLEN || len > port->rmtu)) {
 		dev->stats.rx_length_errors++;
 		goto out_dropped;
 	}
@@ -555,8 +621,10 @@ static void vnet_event(void *arg, int event)
 		vio_link_state_change(vio, event);
 		spin_unlock_irqrestore(&vio->lock, flags);
 
-		if (event == LDC_EVENT_RESET)
+		if (event == LDC_EVENT_RESET) {
+			port->rmtu = 0;
 			vio_port_up(vio);
+		}
 		return;
 	}
 
@@ -1048,8 +1116,8 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
 	void *dring;
 
 	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
-		void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL);
-		int map_len = (ETH_FRAME_LEN + 7) & ~7;
+		void *buf = kzalloc(VNET_MAXPACKET + 8, GFP_KERNEL);
+		int map_len = (VNET_MAXPACKET + 7) & ~7;
 
 		err = -ENOMEM;
 		if (!buf)
diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h
index da49337..986e04b 100644
--- a/drivers/net/ethernet/sun/sunvnet.h
+++ b/drivers/net/ethernet/sun/sunvnet.h
@@ -11,6 +11,7 @@
  */
 #define VNET_TX_TIMEOUT			(5 * HZ)
 
+#define VNET_MAXPACKET			1518ULL /* ETH_FRAMELEN + VLAN_HDR */
 #define VNET_TX_RING_SIZE		512
 #define VNET_TX_WAKEUP_THRESH(dr)	((dr)->pending / 4)
 
@@ -44,6 +45,8 @@ struct vnet_port {
 	u32			stop_rx_idx;
 	bool			stop_rx;
 	bool			start_cons;
+
+	u64			rmtu;
 };
 
 static inline struct vnet_port *to_vnet_port(struct vio_driver_state *vio)
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-18  0:11 [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6 David L Stevens
@ 2014-09-18  4:09 ` Raghuram Kothakota
  2014-09-18 13:03   ` David L Stevens
  2014-09-18  4:23 ` Raghuram Kothakota
  1 sibling, 1 reply; 13+ messages in thread
From: Raghuram Kothakota @ 2014-09-18  4:09 UTC (permalink / raw)
  To: David L Stevens; +Cc: David Miller, netdev


On Sep 17, 2014, at 5:11 PM, David L Stevens <david.stevens@oracle.com> wrote:

> This patch upgrades the sunvnet driver to support VIO protocol version 1.6.
> In particular, it adds per-port MTU negotiation, allowing MTUs other than
> ETH_FRAMELEN with ports using newer VIO protocol versions.
> 
> Signed-off-by: David L Stevens <david.stevens@oracle.com>
> ---
> arch/sparc/include/asm/vio.h       |   44 ++++++++++++++-
> arch/sparc/kernel/viohs.c          |   14 ++++-
> drivers/net/ethernet/sun/sunvnet.c |  104 +++++++++++++++++++++++++++++------
> drivers/net/ethernet/sun/sunvnet.h |    3 +
> 4 files changed, 143 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
> index 5c0ebe7..107d4a4 100644
> --- a/arch/sparc/include/asm/vio.h
> +++ b/arch/sparc/include/asm/vio.h
> @@ -65,6 +65,7 @@ struct vio_dring_register {
> 	u16			options;
> #define VIO_TX_DRING		0x0001
> #define VIO_RX_DRING		0x0002
> +#define VIO_RX_DRING_DATA	0x0004
> 	u16			resv;
> 	u32			num_cookies;
> 	struct ldc_trans_cookie	cookies[0];
> @@ -80,6 +81,8 @@ struct vio_dring_unregister {
> #define VIO_PKT_MODE		0x01 /* Packet based transfer	*/
> #define VIO_DESC_MODE		0x02 /* In-band descriptors	*/
> #define VIO_DRING_MODE		0x03 /* Descriptor rings	*/
> +/* in vers >= 1.2, VIO_DRING_MODE is 0x04 and transfer mode is a bitmask */
> +#define VIO_NEW_DRING_MODE	0x04
> 
> struct vio_dring_data {
> 	struct vio_msg_tag	tag;
> @@ -209,10 +212,20 @@ struct vio_net_attr_info {
> 	u8			addr_type;
> #define VNET_ADDR_ETHERMAC	0x01
> 	u16			ack_freq;
> -	u32			resv1;
> +	u8			plnk_updt;
> +#define PHYSLINK_UPDATE_NONE		0x00
> +#define PHYSLINK_UPDATE_STATE		0x01
> +#define PHYSLINK_UPDATE_STATE_ACK	0x02
> +#define PHYSLINK_UPDATE_STATE_NACK	0x03
> +	u8			options;
> +	u16			resv1;
> 	u64			addr;
> 	u64			mtu;
> -	u64			resv2[3];
> +	u16			cflags;
> +#define VNET_LSO_IPV4_CAPAB		0x0001
> +	u16			ipv4_lso_maxlen;
> +	u32			resv2;
> +	u64			resv3[2];
> };
> 
> #define VNET_NUM_MCAST		7
> @@ -370,6 +383,33 @@ struct vio_driver_state {
> 	struct vio_driver_ops	*ops;
> };
> 
> +static inline bool vio_version_before(struct vio_driver_state *vio,
> +				      u16 major, u16 minor)
> +{
> +	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
> +	u32 want = (u32)major << 16 | minor;
> +
> +	return have < want;
> +}
> +
> +static inline bool vio_version_after(struct vio_driver_state *vio,
> +				      u16 major, u16 minor)
> +{
> +	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
> +	u32 want = (u32)major << 16 | minor;
> +
> +	return have > want;
> +}
> +
> +static inline bool vio_version_after_eq(struct vio_driver_state *vio,
> +					u16 major, u16 minor)
> +{
> +	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
> +	u32 want = (u32)major << 16 | minor;
> +
> +	return have >= want;
> +}
> +
> #define viodbg(TYPE, f, a...) \
> do {	if (vio->debug & VIO_DEBUG_##TYPE) \
> 		printk(KERN_INFO "vio: ID[%lu] " f, \
> diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c
> index f8e7dd5..7ef081a 100644
> --- a/arch/sparc/kernel/viohs.c
> +++ b/arch/sparc/kernel/viohs.c
> @@ -426,6 +426,13 @@ static int process_dreg_info(struct vio_driver_state *vio,
> 	if (vio->dr_state & VIO_DR_STATE_RXREG)
> 		goto send_nack;
> 
> +	/* v1.6 and higher, ACK with desired, supported mode, or NACK */
> +	if (vio_version_after_eq(vio, 1, 6)) {
> +		if (!(pkt->options & VIO_TX_DRING))
> +			goto send_nack;
> +		pkt->options = VIO_TX_DRING;
> +	}
> +
> 	BUG_ON(vio->desc_buf);
> 
> 	vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC);
> @@ -453,8 +460,11 @@ static int process_dreg_info(struct vio_driver_state *vio,
> 	pkt->tag.stype = VIO_SUBTYPE_ACK;
> 	pkt->dring_ident = ++dr->ident;
> 
> -	viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n",
> -	       (unsigned long long) pkt->dring_ident);
> +	viodbg(HS, "SEND DRING_REG ACK ident[%llx] "
> +	       "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
> +	       (unsigned long long) pkt->dring_ident,
> +	       pkt->num_descr, pkt->descr_size, pkt->options,
> +	       pkt->num_cookies);
> 
> 	len = (sizeof(*pkt) +
> 	       (dr->ncookies * sizeof(struct ldc_trans_cookie)));
> diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
> index 763cdfc..a9638c1 100644
> --- a/drivers/net/ethernet/sun/sunvnet.c
> +++ b/drivers/net/ethernet/sun/sunvnet.c
> @@ -15,6 +15,7 @@
> #include <linux/ethtool.h>
> #include <linux/etherdevice.h>
> #include <linux/mutex.h>
> +#include <linux/if_vlan.h>
> 
> #include <asm/vio.h>
> #include <asm/ldc.h>
> @@ -41,6 +42,7 @@ static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
> 
> /* Ordered from largest major to lowest */
> static struct vio_version vnet_versions[] = {
> +	{ .major = 1, .minor = 6 },
> 	{ .major = 1, .minor = 0 },
> };
> 
> @@ -67,6 +69,7 @@ static int vnet_send_attr(struct vio_driver_state *vio)
> 	struct vnet_port *port = to_vnet_port(vio);
> 	struct net_device *dev = port->vp->dev;
> 	struct vio_net_attr_info pkt;
> +	int framelen = ETH_FRAME_LEN;
> 	int i;
> 
> 	memset(&pkt, 0, sizeof(pkt));
> @@ -74,19 +77,41 @@ static int vnet_send_attr(struct vio_driver_state *vio)
> 	pkt.tag.stype = VIO_SUBTYPE_INFO;
> 	pkt.tag.stype_env = VIO_ATTR_INFO;
> 	pkt.tag.sid = vio_send_sid(vio);
> -	pkt.xfer_mode = VIO_DRING_MODE;
> +	if (vio_version_before(vio, 1, 2))
> +		pkt.xfer_mode = VIO_DRING_MODE;
> +	else
> +		pkt.xfer_mode = VIO_NEW_DRING_MODE;
> 	pkt.addr_type = VNET_ADDR_ETHERMAC;
> 	pkt.ack_freq = 0;
> 	for (i = 0; i < 6; i++)
> 		pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
> -	pkt.mtu = ETH_FRAME_LEN;
> +	if (vio_version_after(vio, 1, 3)) {
> +		if (port->rmtu) {
> +			port->rmtu = min(VNET_MAXPACKET, port->rmtu);
> +			pkt.mtu = port->rmtu;
> +		} else {
> +			port->rmtu = VNET_MAXPACKET;
> +			pkt.mtu = port->rmtu;
> +		}
> +		if (vio_version_after_eq(vio, 1, 6))
> +			pkt.options = VIO_TX_DRING;
> +	} else if (vio_version_before(vio, 1, 3)) {
> +		pkt.mtu = framelen;
> +	} else { /* v1.3 */
> +		pkt.mtu = framelen + VLAN_HLEN;
> +	}
> +
> +	pkt.plnk_updt = PHYSLINK_UPDATE_NONE;
> +	pkt.cflags = 0;
> 
> 	viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
> -	       "ackfreq[%u] mtu[%llu]\n",
> +	       "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
> +	       "cflags[0x%04x] lso_max[%u]\n",
> 	       pkt.xfer_mode, pkt.addr_type,
> -	       (unsigned long long) pkt.addr,
> -	       pkt.ack_freq,
> -	       (unsigned long long) pkt.mtu);
> +	       (unsigned long long)pkt.addr,
> +	       pkt.ack_freq, pkt.plnk_updt, pkt.options,
> +	       (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen);
> +
> 
> 	return vio_ldc_send(vio, &pkt, sizeof(pkt));
> }
> @@ -94,18 +119,52 @@ static int vnet_send_attr(struct vio_driver_state *vio)
> static int handle_attr_info(struct vio_driver_state *vio,
> 			    struct vio_net_attr_info *pkt)
> {
> -	viodbg(HS, "GOT NET ATTR INFO xmode[0x%x] atype[0x%x] addr[%llx] "
> -	       "ackfreq[%u] mtu[%llu]\n",
> +	struct vnet_port *port = to_vnet_port(vio);
> +	u64	localmtu;
> +	u8	xfer_mode;
> +
> +	viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
> +	       "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
> +	       " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
> 	       pkt->xfer_mode, pkt->addr_type,
> -	       (unsigned long long) pkt->addr,
> -	       pkt->ack_freq,
> -	       (unsigned long long) pkt->mtu);
> +	       (unsigned long long)pkt->addr,
> +	       pkt->ack_freq, pkt->plnk_updt, pkt->options,
> +	       (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
> +	       pkt->ipv4_lso_maxlen);
> 
> 	pkt->tag.sid = vio_send_sid(vio);
> 
> -	if (pkt->xfer_mode != VIO_DRING_MODE ||
> +	xfer_mode = pkt->xfer_mode;
> +	/* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
> +	if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE)
> +		xfer_mode = VIO_NEW_DRING_MODE;
> +
> +	/* MTU negotiation:
> +	 *	< v1.3 - ETH_FRAME_LEN exactly
> +	 *	> v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
> +	 *			pkt->mtu for ACK
> +	 *	= v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
> +	 */
> +	if (vio_version_before(vio, 1, 3)) {
> +		localmtu = ETH_FRAME_LEN;
> +	} else if (vio_version_after(vio, 1, 3)) {
> +		localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET;
> +		localmtu = min(pkt->mtu, localmtu);
> +		pkt->mtu = localmtu;
> +	} else { /* v1.3 */
> +		localmtu = ETH_FRAME_LEN + VLAN_HLEN;
> +	}
> +	port->rmtu = localmtu;
> +
> +	/* for version >= 1.6, ACK packet mode we support */
> +	if (vio_version_after_eq(vio, 1, 6)) {
> +		pkt->xfer_mode = VIO_NEW_DRING_MODE;
> +		pkt->options = VIO_TX_DRING;
> +	}
> +
> +	if (!(xfer_mode | VIO_NEW_DRING_MODE) ||
> 	    pkt->addr_type != VNET_ADDR_ETHERMAC ||
> -	    pkt->mtu != ETH_FRAME_LEN) {
> +	    pkt->mtu != localmtu) {
> 		viodbg(HS, "SEND NET ATTR NACK\n");
> 
> 		pkt->tag.stype = VIO_SUBTYPE_NACK;
> @@ -114,7 +173,14 @@ static int handle_attr_info(struct vio_driver_state *vio,
> 
> 		return -ECONNRESET;
> 	} else {
> -		viodbg(HS, "SEND NET ATTR ACK\n");
> +		viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
> +		       "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
> +		       "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
> +		       pkt->xfer_mode, pkt->addr_type,
> +		       (unsigned long long)pkt->addr,
> +		       pkt->ack_freq, pkt->plnk_updt, pkt->options,
> +		       (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
> +		       pkt->ipv4_lso_maxlen);
> 
> 		pkt->tag.stype = VIO_SUBTYPE_ACK;
> 
> @@ -210,7 +276,7 @@ static int vnet_rx_one(struct vnet_port *port, unsigned int len,
> 	int err;
> 
> 	err = -EMSGSIZE;
> -	if (unlikely(len < ETH_ZLEN || len > ETH_FRAME_LEN)) {
> +	if (unlikely(len < ETH_ZLEN || len > port->rmtu)) {
> 		dev->stats.rx_length_errors++;
> 		goto out_dropped;
> 	}
> @@ -555,8 +621,10 @@ static void vnet_event(void *arg, int event)
> 		vio_link_state_change(vio, event);
> 		spin_unlock_irqrestore(&vio->lock, flags);
> 
> -		if (event == LDC_EVENT_RESET)
> +		if (event == LDC_EVENT_RESET) {
> +			port->rmtu = 0;
> 			vio_port_up(vio);
> +		}
> 		return;
> 	}
> 
> @@ -1048,8 +1116,8 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
> 	void *dring;
> 
> 	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
> -		void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL);
> -		int map_len = (ETH_FRAME_LEN + 7) & ~7;
> +		void *buf = kzalloc(VNET_MAXPACKET + 8, GFP_KERNEL);


This patch doesn't change the VNET_MAXPACKET to 64k, but the patch 2/3 changes
it to 64k+. Allocating buffers of size VNET_MAXPACKET always can consume too much
memory for every port/LDC, that would be more than 32MB.  You may want to allocate
buffers based on the mtu that is negotiated, so that this memory used only when
such large packets are accepted by the peer.

-Raghuram


> +		int map_len = (VNET_MAXPACKET + 7) & ~7;
> 
> 		err = -ENOMEM;
> 		if (!buf)
> diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h
> index da49337..986e04b 100644
> --- a/drivers/net/ethernet/sun/sunvnet.h
> +++ b/drivers/net/ethernet/sun/sunvnet.h
> @@ -11,6 +11,7 @@
>  */
> #define VNET_TX_TIMEOUT			(5 * HZ)
> 
> +#define VNET_MAXPACKET			1518ULL /* ETH_FRAMELEN + VLAN_HDR */
> #define VNET_TX_RING_SIZE		512
> #define VNET_TX_WAKEUP_THRESH(dr)	((dr)->pending / 4)
> 
> @@ -44,6 +45,8 @@ struct vnet_port {
> 	u32			stop_rx_idx;
> 	bool			stop_rx;
> 	bool			start_cons;
> +
> +	u64			rmtu;
> };
> 
> static inline struct vnet_port *to_vnet_port(struct vio_driver_state *vio)
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-18  0:11 [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6 David L Stevens
  2014-09-18  4:09 ` Raghuram Kothakota
@ 2014-09-18  4:23 ` Raghuram Kothakota
  2014-09-18 13:21   ` David L Stevens
  1 sibling, 1 reply; 13+ messages in thread
From: Raghuram Kothakota @ 2014-09-18  4:23 UTC (permalink / raw)
  To: David L Stevens; +Cc: David Miller, netdev


On Sep 17, 2014, at 5:11 PM, David L Stevens <david.stevens@oracle.com> wrote:

> This patch upgrades the sunvnet driver to support VIO protocol version 1.6.
> In particular, it adds per-port MTU negotiation, allowing MTUs other than
> ETH_FRAMELEN with ports using newer VIO protocol versions.
> 
> Signed-off-by: David L Stevens <david.stevens@oracle.com>
> ---
> arch/sparc/include/asm/vio.h       |   44 ++++++++++++++-
> arch/sparc/kernel/viohs.c          |   14 ++++-
> drivers/net/ethernet/sun/sunvnet.c |  104 +++++++++++++++++++++++++++++------
> drivers/net/ethernet/sun/sunvnet.h |    3 +
> 4 files changed, 143 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
> index 5c0ebe7..107d4a4 100644
> --- a/arch/sparc/include/asm/vio.h
> +++ b/arch/sparc/include/asm/vio.h
> @@ -65,6 +65,7 @@ struct vio_dring_register {
> 	u16			options;
> #define VIO_TX_DRING		0x0001
> #define VIO_RX_DRING		0x0002
> +#define VIO_RX_DRING_DATA	0x0004
> 	u16			resv;
> 	u32			num_cookies;
> 	struct ldc_trans_cookie	cookies[0];
> @@ -80,6 +81,8 @@ struct vio_dring_unregister {
> #define VIO_PKT_MODE		0x01 /* Packet based transfer	*/
> #define VIO_DESC_MODE		0x02 /* In-band descriptors	*/
> #define VIO_DRING_MODE		0x03 /* Descriptor rings	*/
> +/* in vers >= 1.2, VIO_DRING_MODE is 0x04 and transfer mode is a bitmask */
> +#define VIO_NEW_DRING_MODE	0x04
> 
> struct vio_dring_data {
> 	struct vio_msg_tag	tag;
> @@ -209,10 +212,20 @@ struct vio_net_attr_info {
> 	u8			addr_type;
> #define VNET_ADDR_ETHERMAC	0x01
> 	u16			ack_freq;
> -	u32			resv1;
> +	u8			plnk_updt;
> +#define PHYSLINK_UPDATE_NONE		0x00
> +#define PHYSLINK_UPDATE_STATE		0x01
> +#define PHYSLINK_UPDATE_STATE_ACK	0x02
> +#define PHYSLINK_UPDATE_STATE_NACK	0x03
> +	u8			options;
> +	u16			resv1;
> 	u64			addr;
> 	u64			mtu;
> -	u64			resv2[3];
> +	u16			cflags;
> +#define VNET_LSO_IPV4_CAPAB		0x0001
> +	u16			ipv4_lso_maxlen;
> +	u32			resv2;
> +	u64			resv3[2];
> };
> 
> #define VNET_NUM_MCAST		7
> @@ -370,6 +383,33 @@ struct vio_driver_state {
> 	struct vio_driver_ops	*ops;
> };
> 
> +static inline bool vio_version_before(struct vio_driver_state *vio,
> +				      u16 major, u16 minor)
> +{
> +	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
> +	u32 want = (u32)major << 16 | minor;
> +
> +	return have < want;
> +}
> +
> +static inline bool vio_version_after(struct vio_driver_state *vio,
> +				      u16 major, u16 minor)
> +{
> +	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
> +	u32 want = (u32)major << 16 | minor;
> +
> +	return have > want;
> +}
> +
> +static inline bool vio_version_after_eq(struct vio_driver_state *vio,
> +					u16 major, u16 minor)
> +{
> +	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
> +	u32 want = (u32)major << 16 | minor;
> +
> +	return have >= want;
> +}
> +
> #define viodbg(TYPE, f, a...) \
> do {	if (vio->debug & VIO_DEBUG_##TYPE) \
> 		printk(KERN_INFO "vio: ID[%lu] " f, \
> diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c
> index f8e7dd5..7ef081a 100644
> --- a/arch/sparc/kernel/viohs.c
> +++ b/arch/sparc/kernel/viohs.c
> @@ -426,6 +426,13 @@ static int process_dreg_info(struct vio_driver_state *vio,
> 	if (vio->dr_state & VIO_DR_STATE_RXREG)
> 		goto send_nack;
> 
> +	/* v1.6 and higher, ACK with desired, supported mode, or NACK */
> +	if (vio_version_after_eq(vio, 1, 6)) {
> +		if (!(pkt->options & VIO_TX_DRING))
> +			goto send_nack;
> +		pkt->options = VIO_TX_DRING;
> +	}
> +


I forget to send this comment in my previous email. The above function is
common to all clients, that includes vdisk.  The vdisk today doesn't use any
version number above 1.2 or 1.3, so it probably won't impact it, but  this
special handling doesn't seem to belong in the common code.

-Raghuram

> 	BUG_ON(vio->desc_buf);
> 
> 	vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC);
> @@ -453,8 +460,11 @@ static int process_dreg_info(struct vio_driver_state *vio,
> 	pkt->tag.stype = VIO_SUBTYPE_ACK;
> 	pkt->dring_ident = ++dr->ident;
> 
> -	viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n",
> -	       (unsigned long long) pkt->dring_ident);
> +	viodbg(HS, "SEND DRING_REG ACK ident[%llx] "
> +	       "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
> +	       (unsigned long long) pkt->dring_ident,
> +	       pkt->num_descr, pkt->descr_size, pkt->options,
> +	       pkt->num_cookies);
> 
> 	len = (sizeof(*pkt) +
> 	       (dr->ncookies * sizeof(struct ldc_trans_cookie)));
> diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
> index 763cdfc..a9638c1 100644
> --- a/drivers/net/ethernet/sun/sunvnet.c
> +++ b/drivers/net/ethernet/sun/sunvnet.c
> @@ -15,6 +15,7 @@
> #include <linux/ethtool.h>
> #include <linux/etherdevice.h>
> #include <linux/mutex.h>
> +#include <linux/if_vlan.h>
> 
> #include <asm/vio.h>
> #include <asm/ldc.h>
> @@ -41,6 +42,7 @@ static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
> 
> /* Ordered from largest major to lowest */
> static struct vio_version vnet_versions[] = {
> +	{ .major = 1, .minor = 6 },
> 	{ .major = 1, .minor = 0 },
> };
> 
> @@ -67,6 +69,7 @@ static int vnet_send_attr(struct vio_driver_state *vio)
> 	struct vnet_port *port = to_vnet_port(vio);
> 	struct net_device *dev = port->vp->dev;
> 	struct vio_net_attr_info pkt;
> +	int framelen = ETH_FRAME_LEN;
> 	int i;
> 
> 	memset(&pkt, 0, sizeof(pkt));
> @@ -74,19 +77,41 @@ static int vnet_send_attr(struct vio_driver_state *vio)
> 	pkt.tag.stype = VIO_SUBTYPE_INFO;
> 	pkt.tag.stype_env = VIO_ATTR_INFO;
> 	pkt.tag.sid = vio_send_sid(vio);
> -	pkt.xfer_mode = VIO_DRING_MODE;
> +	if (vio_version_before(vio, 1, 2))
> +		pkt.xfer_mode = VIO_DRING_MODE;
> +	else
> +		pkt.xfer_mode = VIO_NEW_DRING_MODE;
> 	pkt.addr_type = VNET_ADDR_ETHERMAC;
> 	pkt.ack_freq = 0;
> 	for (i = 0; i < 6; i++)
> 		pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
> -	pkt.mtu = ETH_FRAME_LEN;
> +	if (vio_version_after(vio, 1, 3)) {
> +		if (port->rmtu) {
> +			port->rmtu = min(VNET_MAXPACKET, port->rmtu);
> +			pkt.mtu = port->rmtu;
> +		} else {
> +			port->rmtu = VNET_MAXPACKET;
> +			pkt.mtu = port->rmtu;
> +		}
> +		if (vio_version_after_eq(vio, 1, 6))
> +			pkt.options = VIO_TX_DRING;
> +	} else if (vio_version_before(vio, 1, 3)) {
> +		pkt.mtu = framelen;
> +	} else { /* v1.3 */
> +		pkt.mtu = framelen + VLAN_HLEN;
> +	}
> +
> +	pkt.plnk_updt = PHYSLINK_UPDATE_NONE;
> +	pkt.cflags = 0;
> 
> 	viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
> -	       "ackfreq[%u] mtu[%llu]\n",
> +	       "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
> +	       "cflags[0x%04x] lso_max[%u]\n",
> 	       pkt.xfer_mode, pkt.addr_type,
> -	       (unsigned long long) pkt.addr,
> -	       pkt.ack_freq,
> -	       (unsigned long long) pkt.mtu);
> +	       (unsigned long long)pkt.addr,
> +	       pkt.ack_freq, pkt.plnk_updt, pkt.options,
> +	       (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen);
> +
> 
> 	return vio_ldc_send(vio, &pkt, sizeof(pkt));
> }
> @@ -94,18 +119,52 @@ static int vnet_send_attr(struct vio_driver_state *vio)
> static int handle_attr_info(struct vio_driver_state *vio,
> 			    struct vio_net_attr_info *pkt)
> {
> -	viodbg(HS, "GOT NET ATTR INFO xmode[0x%x] atype[0x%x] addr[%llx] "
> -	       "ackfreq[%u] mtu[%llu]\n",
> +	struct vnet_port *port = to_vnet_port(vio);
> +	u64	localmtu;
> +	u8	xfer_mode;
> +
> +	viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
> +	       "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
> +	       " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
> 	       pkt->xfer_mode, pkt->addr_type,
> -	       (unsigned long long) pkt->addr,
> -	       pkt->ack_freq,
> -	       (unsigned long long) pkt->mtu);
> +	       (unsigned long long)pkt->addr,
> +	       pkt->ack_freq, pkt->plnk_updt, pkt->options,
> +	       (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
> +	       pkt->ipv4_lso_maxlen);
> 
> 	pkt->tag.sid = vio_send_sid(vio);
> 
> -	if (pkt->xfer_mode != VIO_DRING_MODE ||
> +	xfer_mode = pkt->xfer_mode;
> +	/* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
> +	if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE)
> +		xfer_mode = VIO_NEW_DRING_MODE;
> +
> +	/* MTU negotiation:
> +	 *	< v1.3 - ETH_FRAME_LEN exactly
> +	 *	> v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
> +	 *			pkt->mtu for ACK
> +	 *	= v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
> +	 */
> +	if (vio_version_before(vio, 1, 3)) {
> +		localmtu = ETH_FRAME_LEN;
> +	} else if (vio_version_after(vio, 1, 3)) {
> +		localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET;
> +		localmtu = min(pkt->mtu, localmtu);
> +		pkt->mtu = localmtu;
> +	} else { /* v1.3 */
> +		localmtu = ETH_FRAME_LEN + VLAN_HLEN;
> +	}
> +	port->rmtu = localmtu;
> +
> +	/* for version >= 1.6, ACK packet mode we support */
> +	if (vio_version_after_eq(vio, 1, 6)) {
> +		pkt->xfer_mode = VIO_NEW_DRING_MODE;
> +		pkt->options = VIO_TX_DRING;
> +	}
> +
> +	if (!(xfer_mode | VIO_NEW_DRING_MODE) ||
> 	    pkt->addr_type != VNET_ADDR_ETHERMAC ||
> -	    pkt->mtu != ETH_FRAME_LEN) {
> +	    pkt->mtu != localmtu) {
> 		viodbg(HS, "SEND NET ATTR NACK\n");
> 
> 		pkt->tag.stype = VIO_SUBTYPE_NACK;
> @@ -114,7 +173,14 @@ static int handle_attr_info(struct vio_driver_state *vio,
> 
> 		return -ECONNRESET;
> 	} else {
> -		viodbg(HS, "SEND NET ATTR ACK\n");
> +		viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
> +		       "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
> +		       "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
> +		       pkt->xfer_mode, pkt->addr_type,
> +		       (unsigned long long)pkt->addr,
> +		       pkt->ack_freq, pkt->plnk_updt, pkt->options,
> +		       (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
> +		       pkt->ipv4_lso_maxlen);
> 
> 		pkt->tag.stype = VIO_SUBTYPE_ACK;
> 
> @@ -210,7 +276,7 @@ static int vnet_rx_one(struct vnet_port *port, unsigned int len,
> 	int err;
> 
> 	err = -EMSGSIZE;
> -	if (unlikely(len < ETH_ZLEN || len > ETH_FRAME_LEN)) {
> +	if (unlikely(len < ETH_ZLEN || len > port->rmtu)) {
> 		dev->stats.rx_length_errors++;
> 		goto out_dropped;
> 	}
> @@ -555,8 +621,10 @@ static void vnet_event(void *arg, int event)
> 		vio_link_state_change(vio, event);
> 		spin_unlock_irqrestore(&vio->lock, flags);
> 
> -		if (event == LDC_EVENT_RESET)
> +		if (event == LDC_EVENT_RESET) {
> +			port->rmtu = 0;
> 			vio_port_up(vio);
> +		}
> 		return;
> 	}
> 
> @@ -1048,8 +1116,8 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
> 	void *dring;
> 
> 	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
> -		void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL);
> -		int map_len = (ETH_FRAME_LEN + 7) & ~7;
> +		void *buf = kzalloc(VNET_MAXPACKET + 8, GFP_KERNEL);
> +		int map_len = (VNET_MAXPACKET + 7) & ~7;
> 
> 		err = -ENOMEM;
> 		if (!buf)
> diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h
> index da49337..986e04b 100644
> --- a/drivers/net/ethernet/sun/sunvnet.h
> +++ b/drivers/net/ethernet/sun/sunvnet.h
> @@ -11,6 +11,7 @@
>  */
> #define VNET_TX_TIMEOUT			(5 * HZ)
> 
> +#define VNET_MAXPACKET			1518ULL /* ETH_FRAMELEN + VLAN_HDR */
> #define VNET_TX_RING_SIZE		512
> #define VNET_TX_WAKEUP_THRESH(dr)	((dr)->pending / 4)
> 
> @@ -44,6 +45,8 @@ struct vnet_port {
> 	u32			stop_rx_idx;
> 	bool			stop_rx;
> 	bool			start_cons;
> +
> +	u64			rmtu;
> };
> 
> static inline struct vnet_port *to_vnet_port(struct vio_driver_state *vio)
> -- 
> 1.7.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-18  4:09 ` Raghuram Kothakota
@ 2014-09-18 13:03   ` David L Stevens
  2014-09-18 18:49     ` Raghuram Kothakota
  2014-09-23 16:24     ` David Miller
  0 siblings, 2 replies; 13+ messages in thread
From: David L Stevens @ 2014-09-18 13:03 UTC (permalink / raw)
  To: Raghuram Kothakota; +Cc: David Miller, netdev



On 09/18/2014 12:09 AM, Raghuram Kothakota wrote:

>> @@ -1048,8 +1116,8 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
>> 	void *dring;
>>
>> 	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
>> -		void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL);
>> -		int map_len = (ETH_FRAME_LEN + 7) & ~7;
>> +		void *buf = kzalloc(VNET_MAXPACKET + 8, GFP_KERNEL);
> 
> 
> This patch doesn't change the VNET_MAXPACKET to 64k, but the patch 2/3 changes
> it to 64k+. Allocating buffers of size VNET_MAXPACKET always can consume too much
> memory for every port/LDC, that would be more than 32MB.  You may want to allocate
> buffers based on the mtu that is negotiated, so that this memory used only when
> such large packets are accepted by the peer.

I originally had code to dynamically allocate them after the MTU negotiation, but
that opens up a can of worms regarding stopping and freeing an active ring. I don't
believe the shutdown code addresses this adequately, either, and I think this is
worth addressing, but separately.

I convinced myself to do it this way because:
a) memory is cheap
b) I think most people will want to use large MTUs for performance; enough so
	that perhaps the bring-up MTU should be 64K too
c) future (actually current) TSO/GSO work will want large buffers even if the MTU
	is not changed

So, if this is actually too much memory, I was more inclined to reduce the ring
size rather than either add complicating code to handle active-ring reallocation
that would typically be run once per boot, or another alternative of adding
module parameters to specify the buffer size TSO/GSO will need 64K to perform
well, regardless of the device MTU.

							+-DLS

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-18  4:23 ` Raghuram Kothakota
@ 2014-09-18 13:21   ` David L Stevens
  0 siblings, 0 replies; 13+ messages in thread
From: David L Stevens @ 2014-09-18 13:21 UTC (permalink / raw)
  To: Raghuram Kothakota; +Cc: David Miller, netdev



On 09/18/2014 12:23 AM, Raghuram Kothakota wrote:

>>
>> +	/* v1.6 and higher, ACK with desired, supported mode, or NACK */
>> +	if (vio_version_after_eq(vio, 1, 6)) {
>> +		if (!(pkt->options & VIO_TX_DRING))
>> +			goto send_nack;
>> +		pkt->options = VIO_TX_DRING;
>> +	}
>> +
> 
> 
> I forget to send this comment in my previous email. The above function is
> common to all clients, that includes vdisk.  The vdisk today doesn't use any
> version number above 1.2 or 1.3, so it probably won't impact it, but  this
> special handling doesn't seem to belong in the common code.

Yes, this is updating the VIO protocol, which is used by both sunvnet and sunvdc.
If the vdisk uses a newer protocol version, it will need to do exactly what this
code does (and it already will, because this code is there).

Any protocol version differences in the vdisk-specific code will need to be part
of a vdisk version upgrade patch. The particular VIO version that is negotiated
is specified in the individual drivers and my patches update that for sunvnet only.
The sunvdc code is still negotiating and using 1.0 in the net-next kernel, so
none of these version checks change anything for the sunvdc driver.

							+-DLS

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-18 13:03   ` David L Stevens
@ 2014-09-18 18:49     ` Raghuram Kothakota
  2014-09-18 19:58       ` David L Stevens
  2014-09-23 16:24     ` David Miller
  1 sibling, 1 reply; 13+ messages in thread
From: Raghuram Kothakota @ 2014-09-18 18:49 UTC (permalink / raw)
  To: David L Stevens; +Cc: David Miller, netdev


On Sep 18, 2014, at 6:03 AM, David L Stevens <david.stevens@oracle.com> wrote:

> 
> 
> On 09/18/2014 12:09 AM, Raghuram Kothakota wrote:
> 
>>> @@ -1048,8 +1116,8 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
>>> 	void *dring;
>>> 
>>> 	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
>>> -		void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL);
>>> -		int map_len = (ETH_FRAME_LEN + 7) & ~7;
>>> +		void *buf = kzalloc(VNET_MAXPACKET + 8, GFP_KERNEL);
>> 
>> 
>> This patch doesn't change the VNET_MAXPACKET to 64k, but the patch 2/3 changes
>> it to 64k+. Allocating buffers of size VNET_MAXPACKET always can consume too much
>> memory for every port/LDC, that would be more than 32MB.  You may want to allocate
>> buffers based on the mtu that is negotiated, so that this memory used only when
>> such large packets are accepted by the peer.
> 
> I originally had code to dynamically allocate them after the MTU negotiation, but
> that opens up a can of worms regarding stopping and freeing an active ring. I don't
> believe the shutdown code addresses this adequately, either, and I think this is
> worth addressing, but separately.
> 

I am probably not as knowledgeable of sunvnet as you may be, but I would assume
the code is capable of handling the a vport removal and should have sufficient method
cleanup as well. 

> I convinced myself to do it this way because:
> a) memory is cheap

In the virtualization world, we want resources to be efficiently used and memory is
still very important resource. My concern is mostly because this memory usage of
32+MB is on a per LDC basis. LDoms today supports a max of 128 domains, but
from my experience seen actual deployments of the order of 50 domains. This is
going up as the platforms getting more and more powerful.  If there are really
that many peers,  then the amount of memory consumed by one vnet instance
is 50 * 32+MB = 1.6GB+.  It's fine if this memory is really used, but it seems like this
will be useful only when the peer is another linux guest with this version of vnet and
also the MTU is configured to use 64K.  The memory is being wasted for all other
peers that either don't support 64K MTU or not configured to use it and also 
the switch port as obviously it doesn't support 64K MTU today.

Note, it's just buffer space that is being consumed here, it is also the LDC shared
memory space. Luckily the SHADOW_MAPPED shared memory space has
very less limitations otherwise it can cause other impact to other virtual devices.


> b) I think most people will want to use large MTUs for performance; enough so
> 	that perhaps the bring-up MTU should be 64K too


>From my experience in SPARC world, most customers pushed us back for any
proposal to use Jumbo Frames. The customers who configured Jumbo frames,
mostly used 9K for performance of NFS etc.

> c) future (actually current) TSO/GSO work will want large buffers even if the MTU
> 	is not changed
> 

When we implemented TSO support, we evaluated the cost of the buffers vs
performance. We were able to limit TSO support to 8K(actually bit less) and still
achieve high performance, for example we are able to drive line rate on a 10G
and guest-to-guest of the order of 45+Gbps. So, my suggestion would be to
increase the parallelism of the code more than depending on large MTU.

> So, if this is actually too much memory, I was more inclined to reduce the ring
> size rather than either add complicating code to handle active-ring reallocation
> that would typically be run once per boot, or another alternative of adding
> module parameters to specify the buffer size TSO/GSO will need 64K to perform
> well, regardless of the device MTU.
> 

Note, my experience shows that reducing ring size can have larger impact on the
standard MTU case. I would assume the sunvnet code to be dynamic in any case
to deal with ports being added and removed and ports going down and up and
this is just one aspect of those operations.

-Raghuram

> 							+-DLS

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-18 18:49     ` Raghuram Kothakota
@ 2014-09-18 19:58       ` David L Stevens
  2014-09-22  4:40         ` David L Stevens
  0 siblings, 1 reply; 13+ messages in thread
From: David L Stevens @ 2014-09-18 19:58 UTC (permalink / raw)
  To: Raghuram Kothakota; +Cc: David Miller, netdev



On 09/18/2014 02:49 PM, Raghuram Kothakota wrote:

> 
> I am probably not as knowledgeable of sunvnet as you may be, but I would assume
> the code is capable of handling the a vport removal and should have sufficient method
> cleanup as well. 

There is nothing in the code that I saw that checks if pending descriptors are marked as
VIO_DESC_DONE before freeing the buffer space when the device is shutdown. From that, I
assume a shutdown during active transmits could result in a fault or the remote looking
at garbage. Adding a wait for that condition could be "forever" if the remote disappears
while processing receives, so it'd need a timeout, too, and it all needs to switch gracefully
to the new buffer set.

I didn't want to put MTU changes in the list of things to trigger that, as I believe they
also would, nor to complicate the MTU-relevant pieces I have with something that I think
ultimately should use fewer, larger buffers.

> In the virtualization world, we want resources to be efficiently used and memory is
> still very important resource. My concern is mostly because this memory usage of
> 32+MB is on a per LDC basis. LDoms today supports a max of 128 domains, but
> from my experience seen actual deployments of the order of 50 domains. This is
> going up as the platforms getting more and more powerful.  If there are really
> that many peers,  then the amount of memory consumed by one vnet instance
> is 50 * 32+MB = 1.6GB+.  It's fine if this memory is really used, but it seems like this
> will be useful only when the peer is another linux guest with this version of vnet and
> also the MTU is configured to use 64K.  The memory is being wasted for all other
> peers that either don't support 64K MTU or not configured to use it and also 
> the switch port as obviously it doesn't support 64K MTU today.

Since the current code allows only 1500-byte MTUs, this patch set is useful for any
value larger than that-- on Solaris, up to 16000, and on Linux, as high as 64K.
I think dynamic buffer allocation should not be tied to these other items, but it
should be done. This isn't the end of sunvnet development, but the beginning. However,
if 64K is too large, what value is not? Any number we pick is arbitrary and with large
numbers of LDOMs may be "too many." Of course, the smaller it is, the smaller the
benefit, too.

I would prefer to reduce VNET_MAXPACKET to some lower value, or make it a module
parameter, than to link this patchset to dynamic allocation of buffers. But the
16000 value Solaris supports would correspond to ~400MB in your example above --
still a large number for a single virtual interface.

>> b) I think most people will want to use large MTUs for performance; enough so
>> 	that perhaps the bring-up MTU should be 64K too
> 
> 
> From my experience in SPARC world, most customers pushed us back for any
> proposal to use Jumbo Frames. The customers who configured Jumbo frames,
> mostly used 9K for performance of NFS etc.

Customers have never been able to use jumbo frames on Linux LDOMs before, and I
expect an 8X improvement in throughput might affect their judgement on it. Moreso,
if those large buffers and throughput improvements can be done with GSO/TSO by
default with no MTU adjustments on the devices.

> When we implemented TSO support, we evaluated the cost of the buffers vs
> performance. We were able to limit TSO support to 8K(actually bit less) and still
> achieve high performance, for example we are able to drive line rate on a 10G
> and guest-to-guest of the order of 45+Gbps. So, my suggestion would be to
> increase the parallelism of the code more than depending on large MTU.

That's nice for Solaris. I see:

dlsl1 880 # ip link set mtu 8192 dev eth1
dlsl1 881 # netperf -H 10.0.0.2
TCP STREAM TEST to 10.0.0.2 : interval
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    3176.10
dlsl1 882 # ip link set mtu 65535 dev eth1
dlsl1 883 # netperf -H 10.0.0.2
TCP STREAM TEST to 10.0.0.2 : interval
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    7651.42

So, on Linux, throughput is more than doubled by supporting all the way to
64K-- something I expect to translate to TSO as well. Beyond that, it ought
to be whatever the admin wants-- not some arbitrary limit set in advance.
Since it is a virtual device, there is no physical constraint, as in Ethernet
signaling, to force it to 1500, or 9K. The only limit ought to be the IPv4
max packet size of 64K IP data + framing (and arguably not even that, if it's
using primarily or exclusively IPv6).

						+-DLS

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-18 19:58       ` David L Stevens
@ 2014-09-22  4:40         ` David L Stevens
  2014-09-22 16:40           ` Raghuram Kothakota
  0 siblings, 1 reply; 13+ messages in thread
From: David L Stevens @ 2014-09-22  4:40 UTC (permalink / raw)
  To: Raghuram Kothakota; +Cc: David Miller, netdev



> On 09/18/2014 02:49 PM, Raghuram Kothakota wrote:

>> In the virtualization world, we want resources to be efficiently used and memory is
>> still very important resource. My concern is mostly because this memory usage of
>> 32+MB is on a per LDC basis. LDoms today supports a max of 128 domains, but
>> from my experience seen actual deployments of the order of 50 domains. This is
>> going up as the platforms getting more and more powerful.  If there are really
>> that many peers,  then the amount of memory consumed by one vnet instance
>> is 50 * 32+MB = 1.6GB+.  It's fine if this memory is really used, but it seems like this
>> will be useful only when the peer is another linux guest with this version of vnet and
>> also the MTU is configured to use 64K.  The memory is being wasted for all other
>> peers that either don't support 64K MTU or not configured to use it and also 
>> the switch port as obviously it doesn't support 64K MTU today.

I think I have a solution for this -- I'm doing some experimenting, but it may be a few days.

However, fundamentally, the problem is that there are n^2-n links both ways, so 50 LDOMs on the same vswitch
will always be 2450X the resources of a single pair, and lead to scary aggregate numbers. Large installations
really need more vswitches with fewer LDOMs per switch, at least with the current code.

								+-DLS

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-22  4:40         ` David L Stevens
@ 2014-09-22 16:40           ` Raghuram Kothakota
  0 siblings, 0 replies; 13+ messages in thread
From: Raghuram Kothakota @ 2014-09-22 16:40 UTC (permalink / raw)
  To: David L Stevens; +Cc: David Miller, netdev


On Sep 21, 2014, at 9:40 PM, David L Stevens <david.stevens@oracle.com> wrote:

> 
> 
>> On 09/18/2014 02:49 PM, Raghuram Kothakota wrote:
> 
>>> In the virtualization world, we want resources to be efficiently used and memory is
>>> still very important resource. My concern is mostly because this memory usage of
>>> 32+MB is on a per LDC basis. LDoms today supports a max of 128 domains, but
>>> from my experience seen actual deployments of the order of 50 domains. This is
>>> going up as the platforms getting more and more powerful.  If there are really
>>> that many peers,  then the amount of memory consumed by one vnet instance
>>> is 50 * 32+MB = 1.6GB+.  It's fine if this memory is really used, but it seems like this
>>> will be useful only when the peer is another linux guest with this version of vnet and
>>> also the MTU is configured to use 64K.  The memory is being wasted for all other
>>> peers that either don't support 64K MTU or not configured to use it and also 
>>> the switch port as obviously it doesn't support 64K MTU today.
> 
> I think I have a solution for this -- I'm doing some experimenting, but it may be a few days.
> 
> However, fundamentally, the problem is that there are n^2-n links both ways, so 50 LDOMs on the same vswitch
> will always be 2450X the resources of a single pair, and lead to scary aggregate numbers. Large installations
> really need more vswitches with fewer LDOMs per switch, at least with the current code.

My example is certainly an extreme case, we introduced an option to
disable these inter-vnet-links mainly due to this explosion of LDC usage.
We advise customers to disable the inter-vnet-links when they see the
need to create a large number of vnets in a given vswtich, typically it is
the case with management networks.
We are also looking to automatically disabling these links when we detect more
vnets(probably >16) in a given vswitch.

-Raghuram
> 
> 								+-DLS

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-18 13:03   ` David L Stevens
  2014-09-18 18:49     ` Raghuram Kothakota
@ 2014-09-23 16:24     ` David Miller
  2014-09-23 16:49       ` David L Stevens
  1 sibling, 1 reply; 13+ messages in thread
From: David Miller @ 2014-09-23 16:24 UTC (permalink / raw)
  To: david.stevens; +Cc: Raghuram.Kothakota, netdev

From: David L Stevens <david.stevens@oracle.com>
Date: Thu, 18 Sep 2014 09:03:52 -0400

> So, if this is actually too much memory, I was more inclined to reduce the ring
> size rather than either add complicating code to handle active-ring reallocation
> that would typically be run once per boot, or another alternative of adding
> module parameters to specify the buffer size TSO/GSO will need 64K to perform
> well, regardless of the device MTU.

The only reason we are having this discussion is because of how we
handle TX packets.

I think we really should directly map the SKBs in vnet_start_xmit()
instead of having these preallocate TX buffers.

The only thing to accomodate is the VNET_PACKET_SKIP, but that
shouldn't be hard at all.

And I am rather certain that an LDC map call will be cheaper than
copying the entire packet.

Then the MTU will have no material impact on per-vnet_port memory
costs, and bulk sending performance should also increase.

David I know you've worked hard on this patch set, but I'm going to
defer on this series for now.  There are several implementation level
issues that are still seemingly up in the air.

I'm almost completely sold on your PMTU scheme, however if we do
direct mapping of SKBs in vnet_start_xmit() then the performance
characteristics with larger MTUs might be different.

Thanks.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-23 16:24     ` David Miller
@ 2014-09-23 16:49       ` David L Stevens
  2014-09-23 18:44         ` David Miller
  0 siblings, 1 reply; 13+ messages in thread
From: David L Stevens @ 2014-09-23 16:49 UTC (permalink / raw)
  To: David Miller; +Cc: Raghuram.Kothakota, netdev



On 09/23/2014 12:24 PM, David Miller wrote:
> From: David L Stevens <david.stevens@oracle.com>
> Date: Thu, 18 Sep 2014 09:03:52 -0400
> 
>> So, if this is actually too much memory, I was more inclined to reduce the ring
>> size rather than either add complicating code to handle active-ring reallocation
>> that would typically be run once per boot, or another alternative of adding
>> module parameters to specify the buffer size TSO/GSO will need 64K to perform
>> well, regardless of the device MTU.
> 
> The only reason we are having this discussion is because of how we
> handle TX packets.
> 
> I think we really should directly map the SKBs in vnet_start_xmit()
> instead of having these preallocate TX buffers.
> 
> The only thing to accomodate is the VNET_PACKET_SKIP, but that
> shouldn't be hard at all.
> 
> And I am rather certain that an LDC map call will be cheaper than
> copying the entire packet.
> 
> Then the MTU will have no material impact on per-vnet_port memory
> costs, and bulk sending performance should also increase.
> 

Actually, that's exactly what I've been working on for the last few
days. I hope to post this soon. Currently, I allow for misaligned
packets by reallocating the skbs with the proper alignment, skip and
length restrictions, so the code can handle either, but still copies
most of the time. Once I have all the kinks worked out there, I was
planning to possibly make *all* skb allocations on LDOMs and/or SPARC64 fit
those requirements, since they are compatible with the existing alignments
and would allow using the HV copy in any case.

> David I know you've worked hard on this patch set, but I'm going to
> defer on this series for now.  There are several implementation level
> issues that are still seemingly up in the air.

	Yes, sorry if it wasn't clear in my response to Raghuram, but I
agree to the extent that we shouldn't attach large-buffer allocations to
something scaling at O(n^2), which is why I started on this other patch.

> I'm almost completely sold on your PMTU scheme, however if we do
> direct mapping of SKBs in vnet_start_xmit() then the performance
> characteristics with larger MTUs might be different.

	Yes; the good news is that without the fixed-size buffers,
memory use is only for the pending traffic, greatly improving scalability.
The bad news is that the allocs and frees will have a performance cost,
which I'm hoping will be balanced or bettered by removing the copy.
	Anyway, I'll repost when I have all this ready.

						+-DLS

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-23 16:49       ` David L Stevens
@ 2014-09-23 18:44         ` David Miller
  2014-09-24 14:43           ` David L Stevens
  0 siblings, 1 reply; 13+ messages in thread
From: David Miller @ 2014-09-23 18:44 UTC (permalink / raw)
  To: david.stevens; +Cc: Raghuram.Kothakota, netdev

From: David L Stevens <david.stevens@oracle.com>
Date: Tue, 23 Sep 2014 12:49:27 -0400

> Actually, that's exactly what I've been working on for the last few
> days. I hope to post this soon. Currently, I allow for misaligned
> packets by reallocating the skbs with the proper alignment, skip and
> length restrictions, so the code can handle either, but still copies
> most of the time. Once I have all the kinks worked out there, I was
> planning to possibly make *all* skb allocations on LDOMs and/or SPARC64 fit
> those requirements, since they are compatible with the existing alignments
> and would allow using the HV copy in any case.

You should be able to avoid the copy on TX almost all of the time.

If you do a skb_push(skb, VNET_PACKET_SKIP) (and initialize with some
garbage bytes) it ought to be aligned.

If not, we could tool around with the hard_header_ops to make
things come out the way we need it to.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6
  2014-09-23 18:44         ` David Miller
@ 2014-09-24 14:43           ` David L Stevens
  0 siblings, 0 replies; 13+ messages in thread
From: David L Stevens @ 2014-09-24 14:43 UTC (permalink / raw)
  To: David Miller; +Cc: Raghuram.Kothakota, netdev



On 09/23/2014 02:44 PM, David Miller wrote:
> From: David L Stevens <david.stevens@oracle.com>
> Date: Tue, 23 Sep 2014 12:49:27 -0400
> 
>> Actually, that's exactly what I've been working on for the last few
>> days. I hope to post this soon. Currently, I allow for misaligned
>> packets by reallocating the skbs with the proper alignment, skip and
>> length restrictions, so the code can handle either, but still copies
>> most of the time. Once I have all the kinks worked out there, I was
>> planning to possibly make *all* skb allocations on LDOMs and/or SPARC64 fit
>> those requirements, since they are compatible with the existing alignments
>> and would allow using the HV copy in any case.
> 
> You should be able to avoid the copy on TX almost all of the time.
> 
> If you do a skb_push(skb, VNET_PACKET_SKIP) (and initialize with some
> garbage bytes) it ought to be aligned.

I can't touch the data buffer (head or tail) without getting a COW copy,
which is often also misaligned, but the code I have now is mapping the
existing head and tail as long as they are part of the skb (ie, headroom
and tailroom to fit it) and with that, I can avoid copies almost all the
time in TCP.

ICMP and ARP still copy usually, but aren't generally high-volume. I didn't
try out UDP yet.

Initial testing shows a ~25% reduction in throughput for the default MTU
(from ~1Gbps to ~750Mbps), but with 64K MTU, I get a ~25% increase in throughput--
from ~7.5Gbps with the original patches to 9.6Gbps with the no-copy, but
remapping, allocating, freeing and unmapping on demand.

Of course the reduction in throughput on the low end eliminates static tx
buffers so allows scaling up the number of LDOMs per vswitch without any
penalty in memory, instead of the n^2 growth before.

If the current static buffer allocation is "good enough," despite its poor
scaling, then we might consider a hybrid where we essentially use the old
code for smaller packets, and direct mapping for larger ones. I have some
other ideas to experiment with, too.

							+-DLS

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2014-09-24 14:43 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-18  0:11 [PATCHv6 net-next 1/3] sunvnet: upgrade to VIO protocol version 1.6 David L Stevens
2014-09-18  4:09 ` Raghuram Kothakota
2014-09-18 13:03   ` David L Stevens
2014-09-18 18:49     ` Raghuram Kothakota
2014-09-18 19:58       ` David L Stevens
2014-09-22  4:40         ` David L Stevens
2014-09-22 16:40           ` Raghuram Kothakota
2014-09-23 16:24     ` David Miller
2014-09-23 16:49       ` David L Stevens
2014-09-23 18:44         ` David Miller
2014-09-24 14:43           ` David L Stevens
2014-09-18  4:23 ` Raghuram Kothakota
2014-09-18 13:21   ` David L Stevens

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).