[PATCH net-next] dpaa2-eth: Introduce TX congestion management

* [PATCH net-next] dpaa2-eth: Introduce TX congestion management
@ 2018-11-07 10:31 Ioana Ciocoi Radulescu
  2018-11-08  6:07 ` David Miller
  0 siblings, 1 reply; 4+ messages in thread
From: Ioana Ciocoi Radulescu @ 2018-11-07 10:31 UTC (permalink / raw)
  To: netdev, davem; +Cc: Ioana Ciornei

DPAA2 hardware can be configured to signal when a set of queues
become congested (i.e., they accumulate a number of frames/bytes
larger than a configurable threshold).

Add the firmware API to control the congestion settings and use it
to enable congestion management on the TX path.

DPAA2 ethernet devices use a set of TX queues to transmit frames and
another set of queues to receive confirmation that the transmission
was successful. Congestion is reached when the cumulated amount of
pending bytes on all frames of one type (either Tx or Tx conf)
is higher than the threshold.

When congestion is detected, all netdev Tx queues on the interface
are stopped until state changes back to uncongested.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
---
We chose this mechanism over BQL (to which it is conceptually
very similar) because a) we can take advantage of the hardware
offloading and b) BQL doesn't match well with our driver fastpath
(we process ingress (Rx or Tx conf) frames in batches of up to 16,
which in certain scenarios confuses the BQL adaptive algorithm,
resulting in too low values of the limit and low performance).

 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c   | 89 ++++++++++++++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h   | 15 ++++
 .../net/ethernet/freescale/dpaa2/dpaa2-ethtool.c   |  2 +
 drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h    | 23 ++++++
 drivers/net/ethernet/freescale/dpaa2/dpni.c        | 44 +++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpni.h        | 85 +++++++++++++++++++++
 6 files changed, 258 insertions(+)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 88f7acc..c51cd79 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -564,6 +564,18 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
 	dev_kfree_skb(skb);
 }
 
+bool dpaa2_eth_tx_congested(struct dpaa2_eth_priv *priv)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+
+	dma_sync_single_for_cpu(dev, priv->cscn_dma, DPAA2_CSCN_SIZE * 2,
+				DMA_FROM_DEVICE);
+
+	/* Check the congestion state of both queue groups (Tx and Tx conf) */
+	return (dpaa2_cscn_state_congested(priv->cscn_mem) ||
+		dpaa2_cscn_state_congested(priv->cscn_mem + DPAA2_CSCN_SIZE));
+}
+
 static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
@@ -575,6 +587,12 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	unsigned int needed_headroom;
 	int err, i;
 
+	/* If we're congested, stop the software queues; transmission
+	 * of the current skb happens regardless of congestion state
+	 */
+	if (dpaa2_eth_tx_congested(priv))
+		netif_tx_stop_all_queues(net_dev);
+
 	percpu_stats = this_cpu_ptr(priv->percpu_stats);
 	percpu_extras = this_cpu_ptr(priv->percpu_extras);
 
@@ -675,6 +693,10 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
 	percpu_extras->tx_conf_frames++;
 	percpu_extras->tx_conf_bytes += dpaa2_fd_get_len(fd);
 
+	/* Check congestion state and wake all queues if necessary */
+	if (netif_queue_stopped(priv->net_dev) && !dpaa2_eth_tx_congested(priv))
+		netif_tx_wake_all_queues(priv->net_dev);
+
 	/* Check frame errors in the FD field */
 	fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK;
 	free_tx_fd(priv, fd);
@@ -1858,6 +1880,65 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
 	return 0;
 }
 
+static int set_tx_congestion(struct dpaa2_eth_priv *priv)
+{
+	struct dpni_cong_notif_cfg cfg = {0};
+	struct device *dev = priv->net_dev->dev.parent;
+	void *tmp;
+	int err;
+
+	/* We need two congestion state buffers, one for Tx queue group
+	 * and one for Tx confirmation queue group
+	 */
+	tmp = devm_kzalloc(dev, DPAA2_CSCN_SIZE * 2 + DPAA2_CSCN_ALIGN,
+			   GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	priv->cscn_mem = PTR_ALIGN(tmp, DPAA2_CSCN_ALIGN);
+	priv->cscn_dma = dma_map_single(dev, priv->cscn_mem,
+					DPAA2_CSCN_SIZE * 2, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, priv->cscn_dma)) {
+		dev_err(dev, "Error mapping CSCN memory area\n");
+		err = -ENOMEM;
+		goto free;
+	}
+
+	cfg.units = DPNI_CONGESTION_UNIT_BYTES;
+	cfg.threshold_entry = DPAA2_ETH_TXCONG_ENTRY_THRESH;
+	cfg.threshold_exit = DPAA2_ETH_TXCONG_EXIT_THRESH;
+	cfg.message_ctx = (u64)priv;
+	cfg.message_iova = priv->cscn_dma;
+	cfg.notification_mode = DPNI_CONG_OPT_WRITE_MEM_ON_ENTER |
+				DPNI_CONG_OPT_WRITE_MEM_ON_EXIT |
+				DPNI_CONG_OPT_COHERENT_WRITE;
+
+	err = dpni_set_congestion_notification(priv->mc_io, 0, priv->mc_token,
+					       DPNI_QUEUE_TX, 0, &cfg);
+	if (err) {
+		dev_err(dev, "dpni_set_congestion_notification(TX) failed\n");
+		goto unmap;
+	}
+
+	cfg.message_iova = priv->cscn_dma + DPAA2_CSCN_SIZE;
+	err = dpni_set_congestion_notification(priv->mc_io, 0, priv->mc_token,
+					       DPNI_QUEUE_TX_CONFIRM, 0, &cfg);
+	if (err) {
+		dev_err(dev, "dpni_set_congestion_notification(TX_CONF) failed\n");
+		goto unmap;
+	}
+
+	return 0;
+
+unmap:
+	dma_unmap_single(dev, priv->cscn_dma, DPAA2_CSCN_SIZE * 2,
+			 DMA_FROM_DEVICE);
+free:
+	devm_kfree(dev, tmp);
+
+	return err;
+}
+
 /* Configure the DPNI object this interface is associated with */
 static int setup_dpni(struct fsl_mc_device *ls_dev)
 {
@@ -1926,6 +2007,7 @@ static int setup_dpni(struct fsl_mc_device *ls_dev)
 
 static void free_dpni(struct dpaa2_eth_priv *priv)
 {
+	struct device *dev = priv->net_dev->dev.parent;
 	int err;
 
 	err = dpni_reset(priv->mc_io, 0, priv->mc_token);
@@ -1934,6 +2016,9 @@ static void free_dpni(struct dpaa2_eth_priv *priv)
 			    err);
 
 	dpni_close(priv->mc_io, 0, priv->mc_token);
+
+	dma_unmap_single(dev, priv->cscn_dma, DPAA2_CSCN_SIZE * 2,
+			 DMA_FROM_DEVICE);
 }
 
 static int setup_rx_flow(struct dpaa2_eth_priv *priv,
@@ -2347,6 +2432,10 @@ static int bind_dpni(struct dpaa2_eth_priv *priv)
 			return err;
 	}
 
+	err = set_tx_congestion(priv);
+	if (err)
+		return err;
+
 	err = dpni_get_qdid(priv->mc_io, 0, priv->mc_token,
 			    DPNI_QUEUE_TX, &priv->tx_qdid);
 	if (err) {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 452a8e9..f91133b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -40,6 +40,16 @@
  */
 #define DPAA2_ETH_TAILDROP_THRESH	(64 * 1024)
 
+/* Tx congestion entry and exit thresholds, in number of bytes.
+ * Allow a maximum of 512KB worth of pending egress frames.
+ * Congestion is reached when the cumulated amount of pending bytes
+ * on all frames of one type (either Tx or Tx conf) is higher
+ * than the threshold.
+ */
+#define DPAA2_ETH_TXCONG_ENTRY_THRESH	(512 * 1024)
+#define DPAA2_ETH_TXCONG_EXIT_THRESH	\
+	(DPAA2_ETH_TXCONG_ENTRY_THRESH * 7 / 8)
+
 /* Maximum number of Tx confirmation frames to be processed
  * in a single NAPI call
  */
@@ -352,6 +362,10 @@ struct dpaa2_eth_priv {
 	u64 rx_hash_fields;
 	struct dpaa2_eth_cls_rule *cls_rules;
 	u8 rx_cls_enabled;
+
+	/* Tx congestion status is written here by hardware */
+	void *cscn_mem;
+	dma_addr_t cscn_dma;
 };
 
 #define DPAA2_RXH_SUPPORTED	(RXH_L2DA | RXH_VLAN | RXH_L3_PROTO \
@@ -442,5 +456,6 @@ static int dpaa2_eth_queue_count(struct dpaa2_eth_priv *priv)
 int dpaa2_eth_set_hash(struct net_device *net_dev, u64 flags);
 int dpaa2_eth_cls_key_size(void);
 int dpaa2_eth_cls_fld_off(int prot, int field);
+bool dpaa2_eth_tx_congested(struct dpaa2_eth_priv *priv);
 
 #endif	/* __DPAA2_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 26bd5a2..8e1835c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -45,6 +45,7 @@ static char dpaa2_ethtool_extras[][ETH_GSTRING_LEN] = {
 	"[drv] dequeue portal busy",
 	"[drv] channel pull errors",
 	"[drv] cdan",
+	"[drv] tx congestion state",
 };
 
 #define DPAA2_ETH_NUM_EXTRA_STATS	ARRAY_SIZE(dpaa2_ethtool_extras)
@@ -222,6 +223,7 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 	*(data + i++) = portal_busy;
 	*(data + i++) = pull_err;
 	*(data + i++) = cdan;
+	*(data + i++) = dpaa2_eth_tx_congested(priv);
 }
 
 static int prep_eth_rule(struct ethhdr *eth_value, struct ethhdr *eth_mask,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
index 7b44d7d..26e0338 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
@@ -566,4 +566,27 @@ struct dpni_cmd_remove_fs_entry {
 	__le64 mask_iova;
 };
 
+#define DPNI_CONG_UNITS_SHIFT		4
+#define DPNI_CONG_UNITS_SIZE		2
+
+struct dpni_cmd_set_congestion_notification {
+	/* cmd word 0 */
+	u8 qtype;
+	u8 tc;
+	u8 pad[6];
+	/* cmd word 1 */
+	__le32 dest_id;
+	__le16 notification_mode;
+	u8 dest_priority;
+	/* from LSB: dest_type:4, units:2 */
+	u8 type_units;
+	/* cmd word 2 */
+	__le64 message_iova;
+	/* cmd word 3 */
+	__le64 message_ctx;
+	/* cmd word 4 */
+	__le32 threshold_entry;
+	__le32 threshold_exit;
+};
+
 #endif /* _FSL_DPNI_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
index 220dfc8..046ae19 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c
@@ -1750,3 +1750,47 @@ int dpni_remove_fs_entry(struct fsl_mc_io *mc_io,
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
+
+/**
+ * dpni_set_congestion_notification() - Set congestion notification config
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue - Rx, Tx and Tx confirm types are supported
+ * @tc_id:	Traffic class selection (0-7)
+ * @cfg:	Congestion notification configuration
+ *
+ * Return:	'0' on Success; error code otherwise.
+ */
+int dpni_set_congestion_notification(struct fsl_mc_io *mc_io,
+				     u32 cmd_flags,
+				     u16 token,
+				     enum dpni_queue_type qtype,
+				     u8 tc_id,
+				     const struct dpni_cong_notif_cfg *cfg)
+{
+	struct dpni_cmd_set_congestion_notification *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header =
+		mc_encode_cmd_header(DPNI_CMDID_SET_CONGESTION_NOTIFICATION,
+				     cmd_flags,
+				     token);
+	cmd_params = (struct dpni_cmd_set_congestion_notification *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc_id;
+	cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
+	cmd_params->notification_mode = cpu_to_le16(cfg->notification_mode);
+	cmd_params->dest_priority = cfg->dest_cfg.priority;
+	dpni_set_field(cmd_params->type_units, DEST_TYPE,
+		       cfg->dest_cfg.dest_type);
+	dpni_set_field(cmd_params->type_units, CONG_UNITS, cfg->units);
+	cmd_params->message_iova = cpu_to_le64(cfg->message_iova);
+	cmd_params->message_ctx = cpu_to_le64(cfg->message_ctx);
+	cmd_params->threshold_entry = cpu_to_le32(cfg->threshold_entry);
+	cmd_params->threshold_exit = cpu_to_le32(cfg->threshold_exit);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index a521242..63d337c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -810,6 +810,91 @@ enum dpni_congestion_point {
 };
 
 /**
+ * struct dpni_dest_cfg - Structure representing DPNI destination parameters
+ * @dest_type:	Destination type
+ * @dest_id:	Either DPIO ID or DPCON ID, depending on the destination type
+ * @priority:	Priority selection within the DPIO or DPCON channel; valid
+ *		values are 0-1 or 0-7, depending on the number of priorities
+ *		in that channel; not relevant for 'DPNI_DEST_NONE' option
+ */
+struct dpni_dest_cfg {
+	enum dpni_dest dest_type;
+	int dest_id;
+	u8 priority;
+};
+
+/* DPNI congestion options */
+
+/**
+ * CSCN message is written to message_iova once entering a
+ * congestion state (see 'threshold_entry')
+ */
+#define DPNI_CONG_OPT_WRITE_MEM_ON_ENTER        0x00000001
+/**
+ * CSCN message is written to message_iova once exiting a
+ * congestion state (see 'threshold_exit')
+ */
+#define DPNI_CONG_OPT_WRITE_MEM_ON_EXIT         0x00000002
+/**
+ * CSCN write will attempt to allocate into a cache (coherent write);
+ * valid only if 'DPNI_CONG_OPT_WRITE_MEM_<X>' is selected
+ */
+#define DPNI_CONG_OPT_COHERENT_WRITE            0x00000004
+/**
+ * if 'dest_cfg.dest_type != DPNI_DEST_NONE' CSCN message is sent to
+ * DPIO/DPCON's WQ channel once entering a congestion state
+ * (see 'threshold_entry')
+ */
+#define DPNI_CONG_OPT_NOTIFY_DEST_ON_ENTER      0x00000008
+/**
+ * if 'dest_cfg.dest_type != DPNI_DEST_NONE' CSCN message is sent to
+ * DPIO/DPCON's WQ channel once exiting a congestion state
+ * (see 'threshold_exit')
+ */
+#define DPNI_CONG_OPT_NOTIFY_DEST_ON_EXIT       0x00000010
+/**
+ * if 'dest_cfg.dest_type != DPNI_DEST_NONE' when the CSCN is written to the
+ * sw-portal's DQRR, the DQRI interrupt is asserted immediately (if enabled)
+ */
+#define DPNI_CONG_OPT_INTR_COALESCING_DISABLED  0x00000020
+/**
+ * This congestion will trigger flow control or priority flow control.
+ * This will have effect only if flow control is enabled with
+ * dpni_set_link_cfg().
+ */
+#define DPNI_CONG_OPT_FLOW_CONTROL		0x00000040
+
+/**
+ * struct dpni_cong_notif_cfg - configuration for congestion notifications
+ * @units: Units type
+ * @threshold_entry: Above this threshold we enter a congestion state.
+ *		set it to '0' to disable it
+ * @threshold_exit: Below this threshold we exit the congestion state.
+ * @message_ctx: The context that will be part of the CSCN message
+ * @message_iova: I/O virtual address (must be in DMA-able memory),
+ *		must be 16B aligned; valid only if 'DPNI_CONG_OPT_WRITE_MEM_<X>'
+ *		is contained in 'options'
+ * @dest_cfg: CSCN can be send to either DPIO or DPCON WQ channel
+ * @notification_mode: Mask of available options; use 'DPNI_CONG_OPT_<X>' values
+ */
+struct dpni_cong_notif_cfg {
+	enum dpni_congestion_unit units;
+	u32 threshold_entry;
+	u32 threshold_exit;
+	u64 message_ctx;
+	u64 message_iova;
+	struct dpni_dest_cfg dest_cfg;
+	u16 notification_mode;
+};
+
+int dpni_set_congestion_notification(struct fsl_mc_io *mc_io,
+				     u32 cmd_flags,
+				     u16 token,
+				     enum dpni_queue_type qtype,
+				     u8 tc_id,
+				     const struct dpni_cong_notif_cfg *cfg);
+
+/**
  * struct dpni_taildrop - Structure representing the taildrop
  * @enable:	Indicates whether the taildrop is active or not.
  * @units:	Indicates the unit of THRESHOLD. Queue taildrop only supports
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread