All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] cxgb4: improve and tune TC-MQPRIO offload
@ 2020-05-15 17:11 Rahul Lakkireddy
  2020-05-15 17:11 ` [PATCH net-next 1/3] cxgb4: improve credits recovery in TC-MQPRIO Tx path Rahul Lakkireddy
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Rahul Lakkireddy @ 2020-05-15 17:11 UTC (permalink / raw)
  To: netdev; +Cc: davem, nirranjan, vishal, dt

Patch 1 improves the Tx path's credit request and recovery mechanism
when running under heavy load.

Patch 2 adds ability to tune the burst buffer sizes of all traffic
classes to improve performance for <= 1500 MTU, under heavy load.

Patch 3 adds support to track EOTIDs and dump software queue
contexts used by TC-MQPRIO offload.

Thanks,
Rahul

Rahul Lakkireddy (3):
  cxgb4: improve credits recovery in TC-MQPRIO Tx path
  cxgb4: tune burst buffer size for TC-MQPRIO offload
  cxgb4: add EOTID tracking and software context dump

 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h    |  30 ++--
 .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c    | 144 ++++++++++++++----
 .../net/ethernet/chelsio/cxgb4/cxgb4_main.c   |   3 +-
 .../ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c  |  17 +++
 .../ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h  |   1 +
 .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h    |   5 +
 drivers/net/ethernet/chelsio/cxgb4/sched.c    |   3 +-
 drivers/net/ethernet/chelsio/cxgb4/sge.c      |  40 +++--
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c    |   8 +-
 9 files changed, 190 insertions(+), 61 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH net-next 1/3] cxgb4: improve credits recovery in TC-MQPRIO Tx path
  2020-05-15 17:11 [PATCH net-next 0/3] cxgb4: improve and tune TC-MQPRIO offload Rahul Lakkireddy
@ 2020-05-15 17:11 ` Rahul Lakkireddy
  2020-05-15 17:11 ` [PATCH net-next 2/3] cxgb4: tune burst buffer size for TC-MQPRIO offload Rahul Lakkireddy
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Rahul Lakkireddy @ 2020-05-15 17:11 UTC (permalink / raw)
  To: netdev; +Cc: davem, nirranjan, vishal, dt

Request credit update for every half credits consumed, including
the current request. Also, avoid re-trying to post packets when there
are no credits left. The credit update reply via interrupt will
eventually restore the credits and will invoke the Tx path again.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/sge.c | 40 +++++++++++++++---------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 6516c45864b3..1359158652b7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2091,10 +2091,9 @@ static inline u8 ethofld_calc_tx_flits(struct adapter *adap,
 	return flits + nsgl;
 }
 
-static inline void *write_eo_wr(struct adapter *adap,
-				struct sge_eosw_txq *eosw_txq,
-				struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr,
-				u32 hdr_len, u32 wrlen)
+static void *write_eo_wr(struct adapter *adap, struct sge_eosw_txq *eosw_txq,
+			 struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr,
+			 u32 hdr_len, u32 wrlen)
 {
 	const struct skb_shared_info *ssi = skb_shinfo(skb);
 	struct cpl_tx_pkt_core *cpl;
@@ -2113,7 +2112,8 @@ static inline void *write_eo_wr(struct adapter *adap,
 	immd_len += hdr_len;
 
 	if (!eosw_txq->ncompl ||
-	    eosw_txq->last_compl >= adap->params.ofldq_wr_cred / 2) {
+	    (eosw_txq->last_compl + wrlen16) >=
+	    (adap->params.ofldq_wr_cred / 2)) {
 		compl = true;
 		eosw_txq->ncompl++;
 		eosw_txq->last_compl = 0;
@@ -2153,8 +2153,8 @@ static inline void *write_eo_wr(struct adapter *adap,
 	return cpl;
 }
 
-static void ethofld_hard_xmit(struct net_device *dev,
-			      struct sge_eosw_txq *eosw_txq)
+static int ethofld_hard_xmit(struct net_device *dev,
+			     struct sge_eosw_txq *eosw_txq)
 {
 	struct port_info *pi = netdev2pinfo(dev);
 	struct adapter *adap = netdev2adap(dev);
@@ -2167,8 +2167,8 @@ static void ethofld_hard_xmit(struct net_device *dev,
 	bool skip_eotx_wr = false;
 	struct tx_sw_desc *d;
 	struct sk_buff *skb;
+	int left, ret = 0;
 	u8 flits, ndesc;
-	int left;
 
 	eohw_txq = &adap->sge.eohw_txq[eosw_txq->hwqid];
 	spin_lock(&eohw_txq->lock);
@@ -2198,11 +2198,19 @@ static void ethofld_hard_xmit(struct net_device *dev,
 	wrlen = flits * 8;
 	wrlen16 = DIV_ROUND_UP(wrlen, 16);
 
-	/* If there are no CPL credits, then wait for credits
-	 * to come back and retry again
+	left = txq_avail(&eohw_txq->q) - ndesc;
+
+	/* If there are no descriptors left in hardware queues or no
+	 * CPL credits left in software queues, then wait for them
+	 * to come back and retry again. Note that we always request
+	 * for credits update via interrupt for every half credits
+	 * consumed. So, the interrupt will eventually restore the
+	 * credits and invoke the Tx path again.
 	 */
-	if (unlikely(wrlen16 > eosw_txq->cred))
+	if (unlikely(left < 0 || wrlen16 > eosw_txq->cred)) {
+		ret = -ENOMEM;
 		goto out_unlock;
+	}
 
 	if (unlikely(skip_eotx_wr)) {
 		start = (u64 *)wr;
@@ -2231,7 +2239,8 @@ static void ethofld_hard_xmit(struct net_device *dev,
 	sgl = (u64 *)inline_tx_skb_header(skb, &eohw_txq->q, (void *)start,
 					  hdr_len);
 	if (data_len) {
-		if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, d->addr))) {
+		ret = cxgb4_map_skb(adap->pdev_dev, skb, d->addr);
+		if (unlikely(ret)) {
 			memset(d->addr, 0, sizeof(d->addr));
 			eohw_txq->mapping_err++;
 			goto out_unlock;
@@ -2277,12 +2286,13 @@ static void ethofld_hard_xmit(struct net_device *dev,
 
 out_unlock:
 	spin_unlock(&eohw_txq->lock);
+	return ret;
 }
 
 static void ethofld_xmit(struct net_device *dev, struct sge_eosw_txq *eosw_txq)
 {
 	struct sk_buff *skb;
-	int pktcount;
+	int pktcount, ret;
 
 	switch (eosw_txq->state) {
 	case CXGB4_EO_STATE_ACTIVE:
@@ -2307,7 +2317,9 @@ static void ethofld_xmit(struct net_device *dev, struct sge_eosw_txq *eosw_txq)
 			continue;
 		}
 
-		ethofld_hard_xmit(dev, eosw_txq);
+		ret = ethofld_hard_xmit(dev, eosw_txq);
+		if (ret)
+			break;
 	}
 }
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH net-next 2/3] cxgb4: tune burst buffer size for TC-MQPRIO offload
  2020-05-15 17:11 [PATCH net-next 0/3] cxgb4: improve and tune TC-MQPRIO offload Rahul Lakkireddy
  2020-05-15 17:11 ` [PATCH net-next 1/3] cxgb4: improve credits recovery in TC-MQPRIO Tx path Rahul Lakkireddy
@ 2020-05-15 17:11 ` Rahul Lakkireddy
  2020-05-15 17:11 ` [PATCH net-next 3/3] cxgb4: add EOTID tracking and software context dump Rahul Lakkireddy
  2020-05-15 17:54 ` [PATCH net-next 0/3] cxgb4: improve and tune TC-MQPRIO offload David Miller
  3 siblings, 0 replies; 5+ messages in thread
From: Rahul Lakkireddy @ 2020-05-15 17:11 UTC (permalink / raw)
  To: netdev; +Cc: davem, nirranjan, vishal, dt

For each traffic class, firmware handles up to 4 * MTU amount of data
per burst cycle. Under heavy load, this small buffer size is a
bottleneck when buffering large TSO packets in <= 1500 MTU case.
Increase the burst buffer size to 8 * MTU when supported.

Also, keep the driver's traffic class configuration API similar to
the firmware API counterpart.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h    | 30 ++++++++++---------
 .../net/ethernet/chelsio/cxgb4/cxgb4_main.c   |  2 +-
 .../ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c  |  7 +++++
 drivers/net/ethernet/chelsio/cxgb4/sched.c    |  3 +-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c    |  8 +++--
 5 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 30d25a37fc3b..fc1405a8ed74 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1125,19 +1125,20 @@ struct adapter {
  * programmed with various parameters.
  */
 struct ch_sched_params {
-	s8   type;                     /* packet or flow */
+	u8   type;                     /* packet or flow */
 	union {
 		struct {
-			s8   level;    /* scheduler hierarchy level */
-			s8   mode;     /* per-class or per-flow */
-			s8   rateunit; /* bit or packet rate */
-			s8   ratemode; /* %port relative or kbps absolute */
-			s8   channel;  /* scheduler channel [0..N] */
-			s8   class;    /* scheduler class [0..N] */
-			s32  minrate;  /* minimum rate */
-			s32  maxrate;  /* maximum rate */
-			s16  weight;   /* percent weight */
-			s16  pktsize;  /* average packet size */
+			u8   level;    /* scheduler hierarchy level */
+			u8   mode;     /* per-class or per-flow */
+			u8   rateunit; /* bit or packet rate */
+			u8   ratemode; /* %port relative or kbps absolute */
+			u8   channel;  /* scheduler channel [0..N] */
+			u8   class;    /* scheduler class [0..N] */
+			u32  minrate;  /* minimum rate */
+			u32  maxrate;  /* maximum rate */
+			u16  weight;   /* percent weight */
+			u16  pktsize;  /* average packet size */
+			u16  burstsize;  /* burst buffer size */
 		} params;
 	} u;
 };
@@ -1952,9 +1953,10 @@ int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid,
 		   enum ctxt_type ctype, u32 *data);
 int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
 		      enum ctxt_type ctype, u32 *data);
-int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
-		    int rateunit, int ratemode, int channel, int class,
-		    int minrate, int maxrate, int weight, int pktsize);
+int t4_sched_params(struct adapter *adapter, u8 type, u8 level, u8 mode,
+		    u8 rateunit, u8 ratemode, u8 channel, u8 class,
+		    u32 minrate, u32 maxrate, u16 weight, u16 pktsize,
+		    u16 burstsize);
 void t4_sge_decode_idma_state(struct adapter *adapter, int state);
 void t4_idma_monitor_init(struct adapter *adapter,
 			  struct sge_idma_monitor_state *idma);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index a70018f067aa..196451f8006f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3021,7 +3021,7 @@ static int cxgb4_mgmt_set_vf_rate(struct net_device *dev, int vf,
 			      SCHED_CLASS_RATEUNIT_BITS,
 			      SCHED_CLASS_RATEMODE_ABS,
 			      pi->tx_chan, class_id, 0,
-			      max_tx_rate * 1000, 0, pktsize);
+			      max_tx_rate * 1000, 0, pktsize, 0);
 	if (ret) {
 		dev_err(adap->pdev_dev, "Err %d for Traffic Class config\n",
 			ret);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
index e6af4906d674..56079c9937db 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
@@ -342,6 +342,13 @@ static int cxgb4_mqprio_alloc_tc(struct net_device *dev,
 		p.u.params.minrate = div_u64(mqprio->min_rate[i] * 8, 1000);
 		p.u.params.maxrate = div_u64(mqprio->max_rate[i] * 8, 1000);
 
+		/* Request larger burst buffer for smaller MTU, so
+		 * that hardware can work on more data per burst
+		 * cycle.
+		 */
+		if (dev->mtu <= ETH_DATA_LEN)
+			p.u.params.burstsize = 8 * dev->mtu;
+
 		e = cxgb4_sched_class_alloc(dev, &p);
 		if (!e) {
 			ret = -ENOMEM;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
index cebe1412d960..fde93c50cfec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c
@@ -57,7 +57,8 @@ static int t4_sched_class_fw_cmd(struct port_info *pi,
 				      p->u.params.ratemode,
 				      p->u.params.channel, e->idx,
 				      p->u.params.minrate, p->u.params.maxrate,
-				      p->u.params.weight, p->u.params.pktsize);
+				      p->u.params.weight, p->u.params.pktsize,
+				      p->u.params.burstsize);
 		break;
 	default:
 		err = -ENOTSUPP;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 2a3480fc1d91..1c8068c02728 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -10361,9 +10361,10 @@ int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
 	return ret;
 }
 
-int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
-		    int rateunit, int ratemode, int channel, int class,
-		    int minrate, int maxrate, int weight, int pktsize)
+int t4_sched_params(struct adapter *adapter, u8 type, u8 level, u8 mode,
+		    u8 rateunit, u8 ratemode, u8 channel, u8 class,
+		    u32 minrate, u32 maxrate, u16 weight, u16 pktsize,
+		    u16 burstsize)
 {
 	struct fw_sched_cmd cmd;
 
@@ -10385,6 +10386,7 @@ int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
 	cmd.u.params.max = cpu_to_be32(maxrate);
 	cmd.u.params.weight = cpu_to_be16(weight);
 	cmd.u.params.pktsize = cpu_to_be16(pktsize);
+	cmd.u.params.burstsize = cpu_to_be16(burstsize);
 
 	return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd),
 			       NULL, 1);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH net-next 3/3] cxgb4: add EOTID tracking and software context dump
  2020-05-15 17:11 [PATCH net-next 0/3] cxgb4: improve and tune TC-MQPRIO offload Rahul Lakkireddy
  2020-05-15 17:11 ` [PATCH net-next 1/3] cxgb4: improve credits recovery in TC-MQPRIO Tx path Rahul Lakkireddy
  2020-05-15 17:11 ` [PATCH net-next 2/3] cxgb4: tune burst buffer size for TC-MQPRIO offload Rahul Lakkireddy
@ 2020-05-15 17:11 ` Rahul Lakkireddy
  2020-05-15 17:54 ` [PATCH net-next 0/3] cxgb4: improve and tune TC-MQPRIO offload David Miller
  3 siblings, 0 replies; 5+ messages in thread
From: Rahul Lakkireddy @ 2020-05-15 17:11 UTC (permalink / raw)
  To: netdev; +Cc: davem, nirranjan, vishal, dt

Rework and add support for dumping EOTID software context used by
TC-MQPRIO. Also track number of EOTIDs in use.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
---
 .../ethernet/chelsio/cxgb4/cxgb4_debugfs.c    | 144 ++++++++++++++----
 .../net/ethernet/chelsio/cxgb4/cxgb4_main.c   |   1 +
 .../ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c  |  10 ++
 .../ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h  |   1 +
 .../net/ethernet/chelsio/cxgb4/cxgb4_uld.h    |   5 +
 5 files changed, 133 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index ebed99f3d4cf..7818c392da50 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -49,6 +49,7 @@
 #include "cudbg_lib_common.h"
 #include "cudbg_entity.h"
 #include "cudbg_lib.h"
+#include "cxgb4_tc_mqprio.h"
 
 /* generic seq_file support for showing a table of size rows x width. */
 static void *seq_tab_get_idx(struct seq_tab *tb, loff_t pos)
@@ -2657,32 +2658,19 @@ static int sge_qinfo_uld_ciq_entries(const struct adapter *adap, int uld)
 
 static int sge_qinfo_show(struct seq_file *seq, void *v)
 {
-	int eth_entries, ctrl_entries, eo_entries = 0;
+	int eth_entries, ctrl_entries, eohw_entries = 0, eosw_entries = 0;
 	int uld_rxq_entries[CXGB4_ULD_MAX] = { 0 };
 	int uld_ciq_entries[CXGB4_ULD_MAX] = { 0 };
 	int uld_txq_entries[CXGB4_TX_MAX] = { 0 };
 	const struct sge_uld_txq_info *utxq_info;
 	const struct sge_uld_rxq_info *urxq_info;
+	struct cxgb4_tc_port_mqprio *port_mqprio;
 	struct adapter *adap = seq->private;
-	int i, n, r = (uintptr_t)v - 1;
+	int i, j, n, r = (uintptr_t)v - 1;
 	struct sge *s = &adap->sge;
 
 	eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
 	ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
-	if (adap->sge.eohw_txq)
-		eo_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
-
-	mutex_lock(&uld_mutex);
-	if (s->uld_txq_info)
-		for (i = 0; i < ARRAY_SIZE(uld_txq_entries); i++)
-			uld_txq_entries[i] = sge_qinfo_uld_txq_entries(adap, i);
-
-	if (s->uld_rxq_info) {
-		for (i = 0; i < ARRAY_SIZE(uld_rxq_entries); i++) {
-			uld_rxq_entries[i] = sge_qinfo_uld_rxq_entries(adap, i);
-			uld_ciq_entries[i] = sge_qinfo_uld_ciq_entries(adap, i);
-		}
-	}
 
 	if (r)
 		seq_putc(seq, '\n');
@@ -2759,11 +2747,21 @@ do { \
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
-		goto unlock;
+		goto out;
 	}
 
 	r -= eth_entries;
-	if (r < eo_entries) {
+	if (!adap->tc_mqprio)
+		goto skip_mqprio;
+
+	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
+	if (!refcount_read(&adap->tc_mqprio->refcnt)) {
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+		goto skip_mqprio;
+	}
+
+	eohw_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
+	if (r < eohw_entries) {
 		int base_qset = r * 4;
 		const struct sge_ofld_rxq *rx = &s->eohw_rxq[base_qset];
 		const struct sge_eohw_txq *tx = &s->eohw_txq[base_qset];
@@ -2808,10 +2806,71 @@ do { \
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
-		goto unlock;
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+		goto out;
+	}
+
+	r -= eohw_entries;
+	for (j = 0; j < adap->params.nports; j++) {
+		int entries;
+		u8 tc;
+
+		port_mqprio = &adap->tc_mqprio->port_mqprio[j];
+		entries = 0;
+		for (tc = 0; tc < port_mqprio->mqprio.qopt.num_tc; tc++)
+			entries += port_mqprio->mqprio.qopt.count[tc];
+
+		if (!entries)
+			continue;
+
+		eosw_entries = DIV_ROUND_UP(entries, 4);
+		if (r < eosw_entries) {
+			const struct sge_eosw_txq *tx;
+
+			n = min(4, entries - 4 * r);
+			tx = &port_mqprio->eosw_txq[4 * r];
+
+			S("QType:", "EOSW-TXQ");
+			S("Interface:",
+			  adap->port[j] ? adap->port[j]->name : "N/A");
+			T("EOTID:", hwtid);
+			T("HWQID:", hwqid);
+			T("State:", state);
+			T("Size:", ndesc);
+			T("In-Use:", inuse);
+			T("Credits:", cred);
+			T("Compl:", ncompl);
+			T("Last-Compl:", last_compl);
+			T("PIDX:", pidx);
+			T("Last-PIDX:", last_pidx);
+			T("CIDX:", cidx);
+			T("Last-CIDX:", last_cidx);
+			T("FLOWC-IDX:", flowc_idx);
+
+			mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+			goto out;
+		}
+
+		r -= eosw_entries;
+	}
+	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+
+skip_mqprio:
+	if (!is_uld(adap))
+		goto skip_uld;
+
+	mutex_lock(&uld_mutex);
+	if (s->uld_txq_info)
+		for (i = 0; i < ARRAY_SIZE(uld_txq_entries); i++)
+			uld_txq_entries[i] = sge_qinfo_uld_txq_entries(adap, i);
+
+	if (s->uld_rxq_info) {
+		for (i = 0; i < ARRAY_SIZE(uld_rxq_entries); i++) {
+			uld_rxq_entries[i] = sge_qinfo_uld_rxq_entries(adap, i);
+			uld_ciq_entries[i] = sge_qinfo_uld_ciq_entries(adap, i);
+		}
 	}
 
-	r -= eo_entries;
 	if (r < uld_txq_entries[CXGB4_TX_OFLD]) {
 		const struct sge_uld_txq *tx;
 
@@ -2994,6 +3053,9 @@ do { \
 	}
 
 	r -= uld_txq_entries[CXGB4_TX_CRYPTO];
+	mutex_unlock(&uld_mutex);
+
+skip_uld:
 	if (r < ctrl_entries) {
 		const struct sge_ctrl_txq *tx = &s->ctrlq[r * 4];
 
@@ -3008,7 +3070,7 @@ do { \
 		TL("TxQFull:", q.stops);
 		TL("TxQRestarts:", q.restarts);
 
-		goto unlock;
+		goto out;
 	}
 
 	r -= ctrl_entries;
@@ -3026,11 +3088,9 @@ do { \
 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
 			   s->counter_val[evtq->pktcnt_idx]);
 
-		goto unlock;
+		goto out;
 	}
 
-unlock:
-	mutex_unlock(&uld_mutex);
 #undef R
 #undef RL
 #undef T
@@ -3039,13 +3099,38 @@ do { \
 #undef R3
 #undef T3
 #undef S3
+out:
+	return 0;
+
+unlock:
+	mutex_unlock(&uld_mutex);
 	return 0;
 }
 
 static int sge_queue_entries(const struct adapter *adap)
 {
-	int tot_uld_entries = 0;
-	int i;
+	int i, tot_uld_entries = 0, eohw_entries = 0, eosw_entries = 0;
+
+	if (adap->tc_mqprio) {
+		struct cxgb4_tc_port_mqprio *port_mqprio;
+		u8 tc;
+
+		mutex_lock(&adap->tc_mqprio->mqprio_mutex);
+		if (adap->sge.eohw_txq)
+			eohw_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
+
+		for (i = 0; i < adap->params.nports; i++) {
+			u32 entries = 0;
+
+			port_mqprio = &adap->tc_mqprio->port_mqprio[i];
+			for (tc = 0; tc < port_mqprio->mqprio.qopt.num_tc; tc++)
+				entries += port_mqprio->mqprio.qopt.count[tc];
+
+			if (entries)
+				eosw_entries += DIV_ROUND_UP(entries, 4);
+		}
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+	}
 
 	if (!is_uld(adap))
 		goto lld_only;
@@ -3062,8 +3147,7 @@ static int sge_queue_entries(const struct adapter *adap)
 
 lld_only:
 	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
-	       (adap->sge.eohw_txq ? DIV_ROUND_UP(adap->sge.eoqsets, 4) : 0) +
-	       tot_uld_entries +
+	       eohw_entries + eosw_entries + tot_uld_entries +
 	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
 }
 
@@ -3244,6 +3328,10 @@ static int tid_info_show(struct seq_file *seq, void *v)
 	if (t->nhpftids)
 		seq_printf(seq, "HPFTID range: %u..%u\n", t->hpftid_base,
 			   t->hpftid_base + t->nhpftids - 1);
+	if (t->neotids)
+		seq_printf(seq, "EOTID range: %u..%u, in use: %u\n",
+			   t->eotid_base, t->eotid_base + t->neotids - 1,
+			   atomic_read(&t->eotids_in_use));
 	if (t->ntids)
 		seq_printf(seq, "HW TID usage: %u IP users, %u IPv6 users\n",
 			   t4_read_reg(adap, LE_DB_ACT_CNT_IPV4_A),
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 196451f8006f..d05c2371d8c7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1579,6 +1579,7 @@ static int tid_init(struct tid_info *t)
 	atomic_set(&t->tids_in_use, 0);
 	atomic_set(&t->conns_in_use, 0);
 	atomic_set(&t->hash_tids_in_use, 0);
+	atomic_set(&t->eotids_in_use, 0);
 
 	/* Setup the free list for atid_tab and clear the stid bitmap. */
 	if (natids) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
index 56079c9937db..ae7123a9de8e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
@@ -574,6 +574,7 @@ static void cxgb4_mqprio_disable_offload(struct net_device *dev)
 int cxgb4_setup_tc_mqprio(struct net_device *dev,
 			  struct tc_mqprio_qopt_offload *mqprio)
 {
+	struct adapter *adap = netdev2adap(dev);
 	bool needs_bring_up = false;
 	int ret;
 
@@ -581,6 +582,8 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev,
 	if (ret)
 		return ret;
 
+	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
+
 	/* To configure tc params, the current allocated EOTIDs must
 	 * be freed up. However, they can't be freed up if there's
 	 * traffic running on the interface. So, ensure interface is
@@ -616,6 +619,7 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev,
 	if (needs_bring_up)
 		cxgb_open(dev);
 
+	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
 	return ret;
 }
 
@@ -628,6 +632,7 @@ void cxgb4_mqprio_stop_offload(struct adapter *adap)
 	if (!adap->tc_mqprio || !adap->tc_mqprio->port_mqprio)
 		return;
 
+	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
 	for_each_port(adap, i) {
 		dev = adap->port[i];
 		if (!dev)
@@ -639,6 +644,7 @@ void cxgb4_mqprio_stop_offload(struct adapter *adap)
 
 		cxgb4_mqprio_disable_offload(dev);
 	}
+	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
 }
 
 int cxgb4_init_tc_mqprio(struct adapter *adap)
@@ -660,6 +666,8 @@ int cxgb4_init_tc_mqprio(struct adapter *adap)
 		goto out_free_mqprio;
 	}
 
+	mutex_init(&tc_mqprio->mqprio_mutex);
+
 	tc_mqprio->port_mqprio = tc_port_mqprio;
 	for (i = 0; i < adap->params.nports; i++) {
 		port_mqprio = &tc_mqprio->port_mqprio[i];
@@ -694,6 +702,7 @@ void cxgb4_cleanup_tc_mqprio(struct adapter *adap)
 	u8 i;
 
 	if (adap->tc_mqprio) {
+		mutex_lock(&adap->tc_mqprio->mqprio_mutex);
 		if (adap->tc_mqprio->port_mqprio) {
 			for (i = 0; i < adap->params.nports; i++) {
 				struct net_device *dev = adap->port[i];
@@ -705,6 +714,7 @@ void cxgb4_cleanup_tc_mqprio(struct adapter *adap)
 			}
 			kfree(adap->tc_mqprio->port_mqprio);
 		}
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
 		kfree(adap->tc_mqprio);
 	}
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
index ff8794132b22..be96f1dc0372 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
@@ -33,6 +33,7 @@ struct cxgb4_tc_port_mqprio {
 
 struct cxgb4_tc_mqprio {
 	refcount_t refcnt; /* Refcount for adapter-wide resources */
+	struct mutex mqprio_mutex; /* Lock for accessing MQPRIO info */
 	struct cxgb4_tc_port_mqprio *port_mqprio; /* Per port MQPRIO info */
 };
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index be831317520a..16796785eea3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -147,6 +147,9 @@ struct tid_info {
 	/* TIDs in the HASH */
 	atomic_t hash_tids_in_use;
 	atomic_t conns_in_use;
+	/* ETHOFLD TIDs used for rate limiting */
+	atomic_t eotids_in_use;
+
 	/* lock for setting/clearing filter bitmap */
 	spinlock_t ftid_lock;
 
@@ -221,12 +224,14 @@ static inline void cxgb4_alloc_eotid(struct tid_info *t, u32 eotid, void *data)
 {
 	set_bit(eotid, t->eotid_bmap);
 	t->eotid_tab[eotid].data = data;
+	atomic_inc(&t->eotids_in_use);
 }
 
 static inline void cxgb4_free_eotid(struct tid_info *t, u32 eotid)
 {
 	clear_bit(eotid, t->eotid_bmap);
 	t->eotid_tab[eotid].data = NULL;
+	atomic_dec(&t->eotids_in_use);
 }
 
 int cxgb4_alloc_atid(struct tid_info *t, void *data);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH net-next 0/3] cxgb4: improve and tune TC-MQPRIO offload
  2020-05-15 17:11 [PATCH net-next 0/3] cxgb4: improve and tune TC-MQPRIO offload Rahul Lakkireddy
                   ` (2 preceding siblings ...)
  2020-05-15 17:11 ` [PATCH net-next 3/3] cxgb4: add EOTID tracking and software context dump Rahul Lakkireddy
@ 2020-05-15 17:54 ` David Miller
  3 siblings, 0 replies; 5+ messages in thread
From: David Miller @ 2020-05-15 17:54 UTC (permalink / raw)
  To: rahul.lakkireddy; +Cc: netdev, nirranjan, vishal, dt

From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 15 May 2020 22:41:02 +0530

> Patch 1 improves the Tx path's credit request and recovery mechanism
> when running under heavy load.
> 
> Patch 2 adds ability to tune the burst buffer sizes of all traffic
> classes to improve performance for <= 1500 MTU, under heavy load.
> 
> Patch 3 adds support to track EOTIDs and dump software queue
> contexts used by TC-MQPRIO offload.

Series applied, thank you.

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-05-15 17:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-15 17:11 [PATCH net-next 0/3] cxgb4: improve and tune TC-MQPRIO offload Rahul Lakkireddy
2020-05-15 17:11 ` [PATCH net-next 1/3] cxgb4: improve credits recovery in TC-MQPRIO Tx path Rahul Lakkireddy
2020-05-15 17:11 ` [PATCH net-next 2/3] cxgb4: tune burst buffer size for TC-MQPRIO offload Rahul Lakkireddy
2020-05-15 17:11 ` [PATCH net-next 3/3] cxgb4: add EOTID tracking and software context dump Rahul Lakkireddy
2020-05-15 17:54 ` [PATCH net-next 0/3] cxgb4: improve and tune TC-MQPRIO offload David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.