All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/4] Add QPL mode for DQO descriptor format
@ 2023-08-02 21:33 Rushil Gupta
  2023-08-02 21:33 ` [PATCH net-next 1/4] gve: Control path for DQO-QPL Rushil Gupta
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Rushil Gupta @ 2023-08-02 21:33 UTC (permalink / raw)
  To: netdev, davem, kuba, willemb, edumazet, pabeni; +Cc: Rushil Gupta

GVE supports QPL ("queue-page-list") mode where
all data is communicated through a set of pre-registered
pages. Adding this mode to DQO.

Rushil Gupta (4):
  gve: Control path for DQO-QPL
  gve: Tx path for DQO-QPL
  gve: RX path for DQO-QPL
  gve: update gve.rst

 .../device_drivers/ethernet/google/gve.rst    |   9 +
 drivers/net/ethernet/google/gve/gve.h         | 112 ++++-
 drivers/net/ethernet/google/gve/gve_adminq.c  |  93 +++-
 drivers/net/ethernet/google/gve/gve_adminq.h  |  10 +
 drivers/net/ethernet/google/gve/gve_main.c    |  20 +-
 drivers/net/ethernet/google/gve/gve_rx_dqo.c  | 126 +++++-
 drivers/net/ethernet/google/gve/gve_tx_dqo.c  | 404 ++++++++++++++----
 7 files changed, 653 insertions(+), 121 deletions(-)

-- 
2.41.0.585.gd2178a4bd4-goog


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH net-next 1/4] gve: Control path for DQO-QPL
  2023-08-02 21:33 [PATCH net-next 0/4] Add QPL mode for DQO descriptor format Rushil Gupta
@ 2023-08-02 21:33 ` Rushil Gupta
  2023-08-03  1:56   ` Jesse Brandeburg
  2023-08-03  9:28   ` Simon Horman
  2023-08-02 21:33 ` [PATCH net-next 2/4] gve: Tx " Rushil Gupta
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 8+ messages in thread
From: Rushil Gupta @ 2023-08-02 21:33 UTC (permalink / raw)
  To: netdev, davem, kuba, willemb, edumazet, pabeni
  Cc: Rushil Gupta, Praveen Kaligineedi, Bailey Forrest

Add checks, abi-changes and device options to support
QPL mode for DQO in addition to GQI. Also, use
pages-per-qpl supplied by device-option to control the
size of the "queue-page-list".

Signed-off-by: Rushil Gupta <rushilg@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Bailey Forrest <bcf@google.com>
---
 drivers/net/ethernet/google/gve/gve.h        | 20 ++++-
 drivers/net/ethernet/google/gve/gve_adminq.c | 93 +++++++++++++++++---
 drivers/net/ethernet/google/gve/gve_adminq.h | 10 +++
 drivers/net/ethernet/google/gve/gve_main.c   | 20 +++--
 4 files changed, 123 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 4b425bf71ede..517a63b60cb9 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -51,6 +51,12 @@
 
 #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
 
+#define DQO_QPL_DEFAULT_TX_PAGES 512
+#define DQO_QPL_DEFAULT_RX_PAGES 2048
+
+/* Maximum TSO size supported on DQO */
+#define GVE_DQO_TX_MAX	0x3FFFF
+
 /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
 struct gve_rx_desc_queue {
 	struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -531,6 +537,7 @@ enum gve_queue_format {
 	GVE_GQI_RDA_FORMAT		= 0x1,
 	GVE_GQI_QPL_FORMAT		= 0x2,
 	GVE_DQO_RDA_FORMAT		= 0x3,
+	GVE_DQO_QPL_FORMAT		= 0x4,
 };
 
 struct gve_priv {
@@ -550,7 +557,8 @@ struct gve_priv {
 	u16 num_event_counters;
 	u16 tx_desc_cnt; /* num desc per ring */
 	u16 rx_desc_cnt; /* num desc per ring */
-	u16 tx_pages_per_qpl; /* tx buffer length */
+	u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */
+	u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */
 	u16 rx_data_slot_cnt; /* rx buffer length */
 	u64 max_registered_pages;
 	u64 num_registered_pages; /* num pages registered with NIC */
@@ -808,11 +816,17 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
 	return (priv->num_ntfy_blks / 2) + queue_idx;
 }
 
+static inline bool gve_is_qpl(struct gve_priv *priv)
+{
+	return priv->queue_format == GVE_GQI_QPL_FORMAT ||
+		priv->queue_format == GVE_DQO_QPL_FORMAT;
+}
+
 /* Returns the number of tx queue page lists
  */
 static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
 {
-	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+	if (!gve_is_qpl(priv))
 		return 0;
 
 	return priv->tx_cfg.num_queues + priv->num_xdp_queues;
@@ -832,7 +846,7 @@ static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
  */
 static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
 {
-	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+	if (!gve_is_qpl(priv))
 		return 0;
 
 	return priv->rx_cfg.num_queues;
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index 252974202a3f..a16e7cf21911 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -39,7 +39,8 @@ void gve_parse_device_option(struct gve_priv *priv,
 			     struct gve_device_option_gqi_rda **dev_op_gqi_rda,
 			     struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
 			     struct gve_device_option_dqo_rda **dev_op_dqo_rda,
-			     struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
+			     struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
+			     struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
 {
 	u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
 	u16 option_length = be16_to_cpu(option->option_length);
@@ -112,6 +113,22 @@ void gve_parse_device_option(struct gve_priv *priv,
 		}
 		*dev_op_dqo_rda = (void *)(option + 1);
 		break;
+	case GVE_DEV_OPT_ID_DQO_QPL:
+		if (option_length < sizeof(**dev_op_dqo_qpl) ||
+		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) {
+			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
+				 "DQO QPL", (int)sizeof(**dev_op_dqo_qpl),
+				 GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL,
+				 option_length, req_feat_mask);
+			break;
+		}
+
+		if (option_length > sizeof(**dev_op_dqo_qpl)) {
+			dev_warn(&priv->pdev->dev,
+				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO QPL");
+		}
+		*dev_op_dqo_qpl = (void *)(option + 1);
+		break;
 	case GVE_DEV_OPT_ID_JUMBO_FRAMES:
 		if (option_length < sizeof(**dev_op_jumbo_frames) ||
 		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
@@ -146,7 +163,8 @@ gve_process_device_options(struct gve_priv *priv,
 			   struct gve_device_option_gqi_rda **dev_op_gqi_rda,
 			   struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
 			   struct gve_device_option_dqo_rda **dev_op_dqo_rda,
-			   struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
+			   struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
+			   struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
 {
 	const int num_options = be16_to_cpu(descriptor->num_device_options);
 	struct gve_device_option *dev_opt;
@@ -166,7 +184,8 @@ gve_process_device_options(struct gve_priv *priv,
 
 		gve_parse_device_option(priv, descriptor, dev_opt,
 					dev_op_gqi_rda, dev_op_gqi_qpl,
-					dev_op_dqo_rda, dev_op_jumbo_frames);
+					dev_op_dqo_rda, dev_op_jumbo_frames,
+					dev_op_dqo_qpl);
 		dev_opt = next_opt;
 	}
 
@@ -505,12 +524,24 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
 
 		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
 	} else {
+		u16 comp_ring_size = 0;
+		u32 qpl_id = 0;
+
+		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+			qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
+			comp_ring_size =
+				priv->options_dqo_rda.tx_comp_ring_entries;
+		} else {
+			qpl_id = tx->dqo.qpl->id;
+			comp_ring_size = priv->tx_desc_cnt;
+		}
+		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
 		cmd.create_tx_queue.tx_ring_size =
 			cpu_to_be16(priv->tx_desc_cnt);
 		cmd.create_tx_queue.tx_comp_ring_addr =
 			cpu_to_be64(tx->complq_bus_dqo);
 		cmd.create_tx_queue.tx_comp_ring_size =
-			cpu_to_be16(priv->options_dqo_rda.tx_comp_ring_entries);
+			cpu_to_be16(comp_ring_size);
 	}
 
 	return gve_adminq_issue_cmd(priv, &cmd);
@@ -555,6 +586,18 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
 		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
 		cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
 	} else {
+		u16 rx_buff_ring_entries = 0;
+		u32 qpl_id = 0;
+
+		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+			qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
+			rx_buff_ring_entries =
+				priv->options_dqo_rda.rx_buff_ring_entries;
+		} else {
+			qpl_id = rx->dqo.qpl->id;
+			rx_buff_ring_entries = priv->rx_desc_cnt;
+		}
+		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
 		cmd.create_rx_queue.rx_ring_size =
 			cpu_to_be16(priv->rx_desc_cnt);
 		cmd.create_rx_queue.rx_desc_ring_addr =
@@ -564,7 +607,7 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
 		cmd.create_rx_queue.packet_buffer_size =
 			cpu_to_be16(priv->data_buffer_size_dqo);
 		cmd.create_rx_queue.rx_buff_ring_size =
-			cpu_to_be16(priv->options_dqo_rda.rx_buff_ring_entries);
+			cpu_to_be16(rx_buff_ring_entries);
 		cmd.create_rx_queue.enable_rsc =
 			!!(priv->dev->features & NETIF_F_LRO);
 	}
@@ -675,9 +718,13 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
 		     const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
 {
 	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
+	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
+
+	if (priv->queue_format == GVE_DQO_QPL_FORMAT)
+		return 0;
+
 	priv->options_dqo_rda.tx_comp_ring_entries =
 		be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
-	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
 	priv->options_dqo_rda.rx_buff_ring_entries =
 		be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);
 
@@ -687,7 +734,9 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
 static void gve_enable_supported_features(struct gve_priv *priv,
 					  u32 supported_features_mask,
 					  const struct gve_device_option_jumbo_frames
-						  *dev_op_jumbo_frames)
+					  *dev_op_jumbo_frames,
+					  const struct gve_device_option_dqo_qpl
+					  *dev_op_dqo_qpl)
 {
 	/* Before control reaches this point, the page-size-capped max MTU from
 	 * the gve_device_descriptor field has already been stored in
@@ -699,6 +748,20 @@ static void gve_enable_supported_features(struct gve_priv *priv,
 			 "JUMBO FRAMES device option enabled.\n");
 		priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu);
 	}
+
+	/* Override pages for qpl for DQO-QPL */
+	if (dev_op_dqo_qpl) {
+		dev_info(&priv->pdev->dev,
+			 "DQO QPL device option enabled.\n");
+		priv->tx_pages_per_qpl =
+			be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl);
+		priv->rx_pages_per_qpl =
+			be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl);
+		if (priv->tx_pages_per_qpl == 0)
+			priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES;
+		if (priv->rx_pages_per_qpl == 0)
+			priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES;
+	}
 }
 
 int gve_adminq_describe_device(struct gve_priv *priv)
@@ -707,6 +770,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
 	struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
 	struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
+	struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL;
 	struct gve_device_descriptor *descriptor;
 	u32 supported_features_mask = 0;
 	union gve_adminq_command cmd;
@@ -733,13 +797,14 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 
 	err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
 					 &dev_op_gqi_qpl, &dev_op_dqo_rda,
-					 &dev_op_jumbo_frames);
+					 &dev_op_jumbo_frames,
+					 &dev_op_dqo_qpl);
 	if (err)
 		goto free_device_descriptor;
 
 	/* If the GQI_RAW_ADDRESSING option is not enabled and the queue format
 	 * is not set to GqiRda, choose the queue format in a priority order:
-	 * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
+	 * DqoRda, DqoQpl, GqiRda, GqiQpl. Use GqiQpl as default.
 	 */
 	if (dev_op_dqo_rda) {
 		priv->queue_format = GVE_DQO_RDA_FORMAT;
@@ -747,7 +812,13 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 			 "Driver is running with DQO RDA queue format.\n");
 		supported_features_mask =
 			be32_to_cpu(dev_op_dqo_rda->supported_features_mask);
-	} else if (dev_op_gqi_rda) {
+	} else if (dev_op_dqo_qpl) {
+		priv->queue_format = GVE_DQO_QPL_FORMAT;
+		dev_info(&priv->pdev->dev,
+			 "Driver is running with DQO QPL queue format.\n");
+		supported_features_mask =
+			be32_to_cpu(dev_op_dqo_qpl->supported_features_mask);
+	}  else if (dev_op_gqi_rda) {
 		priv->queue_format = GVE_GQI_RDA_FORMAT;
 		dev_info(&priv->pdev->dev,
 			 "Driver is running with GQI RDA queue format.\n");
@@ -798,7 +869,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
 
 	gve_enable_supported_features(priv, supported_features_mask,
-				      dev_op_jumbo_frames);
+				      dev_op_jumbo_frames, dev_op_dqo_qpl);
 
 free_device_descriptor:
 	dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor,
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
index f894beb3deaf..38a22279e863 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -109,6 +109,14 @@ struct gve_device_option_dqo_rda {
 
 static_assert(sizeof(struct gve_device_option_dqo_rda) == 8);
 
+struct gve_device_option_dqo_qpl {
+	__be32 supported_features_mask;
+	__be16 tx_pages_per_qpl;
+	__be16 rx_pages_per_qpl;
+};
+
+static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8);
+
 struct gve_device_option_jumbo_frames {
 	__be32 supported_features_mask;
 	__be16 max_mtu;
@@ -130,6 +138,7 @@ enum gve_dev_opt_id {
 	GVE_DEV_OPT_ID_GQI_RDA = 0x2,
 	GVE_DEV_OPT_ID_GQI_QPL = 0x3,
 	GVE_DEV_OPT_ID_DQO_RDA = 0x4,
+	GVE_DEV_OPT_ID_DQO_QPL = 0x7,
 	GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
 };
 
@@ -139,6 +148,7 @@ enum gve_dev_opt_req_feat_mask {
 	GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
 	GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
 	GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
+	GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
 };
 
 enum gve_sup_feature_mask {
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index e6f1711d9be0..b40fafe1460a 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -31,7 +31,6 @@
 
 // Minimum amount of time between queue kicks in msec (10 seconds)
 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
-#define DQO_TX_MAX	0x3FFFF
 
 char gve_driver_name[] = "gve";
 const char gve_version_str[] = GVE_VERSION;
@@ -494,7 +493,7 @@ static int gve_setup_device_resources(struct gve_priv *priv)
 		goto abort_with_stats_report;
 	}
 
-	if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
+	if (!gve_is_gqi(priv)) {
 		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
 					       GFP_KERNEL);
 		if (!priv->ptype_lut_dqo) {
@@ -1085,9 +1084,10 @@ static int gve_alloc_qpls(struct gve_priv *priv)
 	int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
 	int start_id;
 	int i, j;
+	int page_count;
 	int err;
 
-	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
+	if (!gve_is_qpl(priv))
 		return 0;
 
 	priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
@@ -1095,17 +1095,25 @@ static int gve_alloc_qpls(struct gve_priv *priv)
 		return -ENOMEM;
 
 	start_id = gve_tx_start_qpl_id(priv);
+	page_count = priv->tx_pages_per_qpl;
 	for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
 		err = gve_alloc_queue_page_list(priv, i,
-						priv->tx_pages_per_qpl);
+						page_count);
 		if (err)
 			goto free_qpls;
 	}
 
 	start_id = gve_rx_start_qpl_id(priv);
+
+	/* For GQI_QPL number of pages allocated have 1:1 relationship with
+	 * number of descriptors. For DQO, number of pages required are
+	 * more than descriptors (because of out of order completions).
+	 */
+	page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
+		priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
 	for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
 		err = gve_alloc_queue_page_list(priv, i,
-						priv->rx_data_slot_cnt);
+						page_count);
 		if (err)
 			goto free_qpls;
 	}
@@ -2051,7 +2059,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
 
 	/* Big TCP is only supported on DQ*/
 	if (!gve_is_gqi(priv))
-		netif_set_tso_max_size(priv->dev, DQO_TX_MAX);
+		netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
 
 	priv->num_registered_pages = 0;
 	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
-- 
2.41.0.585.gd2178a4bd4-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 2/4] gve: Tx path for DQO-QPL
  2023-08-02 21:33 [PATCH net-next 0/4] Add QPL mode for DQO descriptor format Rushil Gupta
  2023-08-02 21:33 ` [PATCH net-next 1/4] gve: Control path for DQO-QPL Rushil Gupta
@ 2023-08-02 21:33 ` Rushil Gupta
  2023-08-02 21:33 ` [PATCH net-next 3/4] gve: RX " Rushil Gupta
  2023-08-02 21:33 ` [PATCH net-next 4/4] gve: update gve.rst Rushil Gupta
  3 siblings, 0 replies; 8+ messages in thread
From: Rushil Gupta @ 2023-08-02 21:33 UTC (permalink / raw)
  To: netdev, davem, kuba, willemb, edumazet, pabeni
  Cc: Rushil Gupta, Praveen Kaligineedi, Bailey Forrest

Each QPL page is divided into GVE_TX_BUFS_PER_PAGE_DQO buffers.
When a packet needs to be transmitted, we break the packet into max
GVE_TX_BUF_SIZE_DQO sized chunks and transmit each chunk using a TX
descriptor.
We allocate the TX buffers from the free list in dqo_tx.
We store these TX buffer indices in an array in the pending_packet
structure.

The TX buffers are returned to the free list in dqo_compl after
receiving packet completion or when removing packets from miss
completions list.

Signed-off-by: Rushil Gupta <rushilg@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Bailey Forrest <bcf@google.com>
---
 drivers/net/ethernet/google/gve/gve.h        |  83 +++-
 drivers/net/ethernet/google/gve/gve_tx_dqo.c | 404 +++++++++++++++----
 2 files changed, 404 insertions(+), 83 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index 517a63b60cb9..dee2db6f6750 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -57,6 +57,20 @@
 /* Maximum TSO size supported on DQO */
 #define GVE_DQO_TX_MAX	0x3FFFF
 
+#define GVE_TX_BUF_SHIFT_DQO 11
+
+/* 2K buffers for DQO-QPL */
+#define GVE_TX_BUF_SIZE_DQO BIT(GVE_TX_BUF_SHIFT_DQO)
+#define GVE_TX_BUFS_PER_PAGE_DQO (PAGE_SIZE >> GVE_TX_BUF_SHIFT_DQO)
+#define GVE_MAX_TX_BUFS_PER_PKT (DIV_ROUND_UP(GVE_DQO_TX_MAX, GVE_TX_BUF_SIZE_DQO))
+
+/* If number of free/recyclable buffers are less than this threshold; driver
+ * allocs and uses a non-qpl page on the receive path of DQO QPL to free
+ * up buffers.
+ * Value is set big enough to post at least 3 64K LRO packet via 2K buffer to NIC.
+ */
+#define GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD 96
+
 /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
 struct gve_rx_desc_queue {
 	struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -334,8 +348,14 @@ struct gve_tx_pending_packet_dqo {
 	 * All others correspond to `skb`'s frags and should be unmapped with
 	 * `dma_unmap_page`.
 	 */
-	DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]);
-	DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]);
+	union {
+		struct {
+			DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]);
+			DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]);
+		};
+		s16 tx_qpl_buf_ids[GVE_MAX_TX_BUFS_PER_PKT];
+	};
+
 	u16 num_bufs;
 
 	/* Linked list index to next element in the list, or -1 if none */
@@ -390,6 +410,32 @@ struct gve_tx_ring {
 			 * set.
 			 */
 			u32 last_re_idx;
+
+			/* free running number of packet buf descriptors posted */
+			u16 posted_packet_desc_cnt;
+			/* free running number of packet buf descriptors completed */
+			u16 completed_packet_desc_cnt;
+
+			/* QPL fields */
+			struct {
+			       /* Linked list of gve_tx_buf_dqo. Index into
+				* tx_qpl_buf_next, or -1 if empty.
+				*
+				* This is a consumer list owned by the TX path. When it
+				* runs out, the producer list is stolen from the
+				* completion handling path
+				* (dqo_compl.free_tx_qpl_buf_head).
+				*/
+				s16 free_tx_qpl_buf_head;
+
+			       /* Free running count of the number of QPL tx buffers
+				* allocated
+				*/
+				u32 alloc_tx_qpl_buf_cnt;
+
+				/* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */
+				u32 free_tx_qpl_buf_cnt;
+			};
 		} dqo_tx;
 	};
 
@@ -433,6 +479,24 @@ struct gve_tx_ring {
 			 * reached a specified timeout.
 			 */
 			struct gve_index_list timed_out_completions;
+
+			/* QPL fields */
+			struct {
+				/* Linked list of gve_tx_buf_dqo. Index into
+				 * tx_qpl_buf_next, or -1 if empty.
+				 *
+				 * This is the producer list, owned by the completion
+				 * handling path. When the consumer list
+				 * (dqo_tx.free_tx_qpl_buf_head) is runs out, this list
+				 * will be stolen.
+				 */
+				atomic_t free_tx_qpl_buf_head;
+
+				/* Free running count of the number of tx buffers
+				 * freed
+				 */
+				atomic_t free_tx_qpl_buf_cnt;
+			};
 		} dqo_compl;
 	} ____cacheline_aligned;
 	u64 pkt_done; /* free-running - total packets completed */
@@ -459,6 +523,21 @@ struct gve_tx_ring {
 			s16 num_pending_packets;
 
 			u32 complq_mask; /* complq size is complq_mask + 1 */
+
+			/* QPL fields */
+			struct {
+				/* qpl assigned to this queue */
+				struct gve_queue_page_list *qpl;
+
+				/* Each QPL page is divided into TX bounce buffers
+				 * of size GVE_TX_BUF_SIZE_DQO. tx_qpl_buf_next is
+				 * an array to manage linked lists of TX buffers.
+				 * An entry j at index i implies that j'th buffer
+				 * is next on the list after i
+				 */
+				s16 *tx_qpl_buf_next;
+				u32 num_tx_qpl_bufs;
+			};
 		} dqo;
 	} ____cacheline_aligned;
 	struct netdev_queue *netdev_txq;
diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
index 3c09e66ba1ab..1e19b834a613 100644
--- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c
@@ -13,6 +13,89 @@
 #include <linux/slab.h>
 #include <linux/skbuff.h>
 
+/* Returns true if tx_bufs are available. */
+static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count)
+{
+	int num_avail;
+
+	if (!tx->dqo.qpl)
+		return true;
+
+	num_avail = tx->dqo.num_tx_qpl_bufs -
+		(tx->dqo_tx.alloc_tx_qpl_buf_cnt -
+		 tx->dqo_tx.free_tx_qpl_buf_cnt);
+
+	if (count <= num_avail)
+		return true;
+
+	/* Update cached value from dqo_compl. */
+	tx->dqo_tx.free_tx_qpl_buf_cnt =
+		atomic_read_acquire(&tx->dqo_compl.free_tx_qpl_buf_cnt);
+
+	num_avail = tx->dqo.num_tx_qpl_bufs -
+		(tx->dqo_tx.alloc_tx_qpl_buf_cnt -
+		 tx->dqo_tx.free_tx_qpl_buf_cnt);
+
+	return count <= num_avail;
+}
+
+static s16
+gve_alloc_tx_qpl_buf(struct gve_tx_ring *tx)
+{
+	s16 index;
+
+	index = tx->dqo_tx.free_tx_qpl_buf_head;
+
+	/* No TX buffers available, try to steal the list from the
+	 * completion handler.
+	 */
+	if (unlikely(index == -1)) {
+		tx->dqo_tx.free_tx_qpl_buf_head =
+			atomic_xchg(&tx->dqo_compl.free_tx_qpl_buf_head, -1);
+		index = tx->dqo_tx.free_tx_qpl_buf_head;
+
+		if (unlikely(index == -1))
+			return index;
+	}
+
+	/* Remove TX buf from free list */
+	tx->dqo_tx.free_tx_qpl_buf_head = tx->dqo.tx_qpl_buf_next[index];
+
+	return index;
+}
+
+static void
+gve_free_tx_qpl_bufs(struct gve_tx_ring *tx,
+		     struct gve_tx_pending_packet_dqo *pkt)
+{
+	s16 index;
+	int i;
+
+	if (!pkt->num_bufs)
+		return;
+
+	index = pkt->tx_qpl_buf_ids[0];
+	/* Create a linked list of buffers to be added to the free list */
+	for (i = 1; i < pkt->num_bufs; i++) {
+		tx->dqo.tx_qpl_buf_next[index] = pkt->tx_qpl_buf_ids[i];
+		index = pkt->tx_qpl_buf_ids[i];
+	}
+
+	while (true) {
+		s16 old_head = atomic_read_acquire(&tx->dqo_compl.free_tx_qpl_buf_head);
+
+		tx->dqo.tx_qpl_buf_next[index] = old_head;
+		if (atomic_cmpxchg(&tx->dqo_compl.free_tx_qpl_buf_head,
+				   old_head,
+				   pkt->tx_qpl_buf_ids[0]) == old_head) {
+			break;
+		}
+	}
+
+	atomic_add(pkt->num_bufs, &tx->dqo_compl.free_tx_qpl_buf_cnt);
+	pkt->num_bufs = 0;
+}
+
 /* Returns true if a gve_tx_pending_packet_dqo object is available. */
 static bool gve_has_pending_packet(struct gve_tx_ring *tx)
 {
@@ -136,9 +219,40 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, int idx)
 	kvfree(tx->dqo.pending_packets);
 	tx->dqo.pending_packets = NULL;
 
+	kvfree(tx->dqo.tx_qpl_buf_next);
+	tx->dqo.tx_qpl_buf_next = NULL;
+
+	if (tx->dqo.qpl) {
+		gve_unassign_qpl(priv, tx->dqo.qpl->id);
+		tx->dqo.qpl = NULL;
+	}
+
 	netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
 }
 
+static int gve_tx_qpl_buf_init(struct gve_tx_ring *tx)
+{
+	int num_tx_qpl_bufs = GVE_TX_BUFS_PER_PAGE_DQO *
+		tx->dqo.qpl->num_entries;
+	int i;
+
+	tx->dqo.tx_qpl_buf_next = kvcalloc(num_tx_qpl_bufs,
+					   sizeof(tx->dqo.tx_qpl_buf_next[0]),
+					   GFP_KERNEL);
+	if (!tx->dqo.tx_qpl_buf_next)
+		return -ENOMEM;
+
+	tx->dqo.num_tx_qpl_bufs = num_tx_qpl_bufs;
+
+	/* Generate free TX buf list */
+	for (i = 0; i < num_tx_qpl_bufs - 1; i++)
+		tx->dqo.tx_qpl_buf_next[i] = i + 1;
+	tx->dqo.tx_qpl_buf_next[num_tx_qpl_bufs - 1] = -1;
+
+	atomic_set_release(&tx->dqo_compl.free_tx_qpl_buf_head, -1);
+	return 0;
+}
+
 static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
 {
 	struct gve_tx_ring *tx = &priv->tx[idx];
@@ -155,7 +269,9 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
 
 	/* Queue sizes must be a power of 2 */
 	tx->mask = priv->tx_desc_cnt - 1;
-	tx->dqo.complq_mask = priv->options_dqo_rda.tx_comp_ring_entries - 1;
+	tx->dqo.complq_mask = priv->queue_format == GVE_DQO_RDA_FORMAT ?
+		priv->options_dqo_rda.tx_comp_ring_entries - 1 :
+		tx->mask;
 
 	/* The max number of pending packets determines the maximum number of
 	 * descriptors which maybe written to the completion queue.
@@ -211,6 +327,15 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, int idx)
 	if (!tx->q_resources)
 		goto err;
 
+	if (gve_is_qpl(priv)) {
+		tx->dqo.qpl = gve_assign_tx_qpl(priv, idx);
+		if (!tx->dqo.qpl)
+			goto err;
+
+		if (gve_tx_qpl_buf_init(tx))
+			goto err;
+	}
+
 	gve_tx_add_to_block(priv, idx);
 
 	return 0;
@@ -267,20 +392,27 @@ static u32 num_avail_tx_slots(const struct gve_tx_ring *tx)
 	return tx->mask - num_used;
 }
 
+static bool gve_has_avail_slots_tx_dqo(struct gve_tx_ring *tx,
+				       int desc_count, int buf_count)
+{
+	return gve_has_pending_packet(tx) &&
+		   num_avail_tx_slots(tx) >= desc_count &&
+		   gve_has_free_tx_qpl_bufs(tx, buf_count);
+}
+
 /* Stops the queue if available descriptors is less than 'count'.
  * Return: 0 if stop is not required.
  */
-static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, int count)
+static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx,
+				 int desc_count, int buf_count)
 {
-	if (likely(gve_has_pending_packet(tx) &&
-		   num_avail_tx_slots(tx) >= count))
+	if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
 		return 0;
 
 	/* Update cached TX head pointer */
 	tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head);
 
-	if (likely(gve_has_pending_packet(tx) &&
-		   num_avail_tx_slots(tx) >= count))
+	if (likely(gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
 		return 0;
 
 	/* No space, so stop the queue */
@@ -295,8 +427,7 @@ static int gve_maybe_stop_tx_dqo(struct gve_tx_ring *tx, int count)
 	 */
 	tx->dqo_tx.head = atomic_read_acquire(&tx->dqo_compl.hw_tx_head);
 
-	if (likely(!gve_has_pending_packet(tx) ||
-		   num_avail_tx_slots(tx) < count))
+	if (likely(!gve_has_avail_slots_tx_dqo(tx, desc_count, buf_count)))
 		return -EBUSY;
 
 	netif_tx_start_queue(tx->netdev_txq);
@@ -444,44 +575,16 @@ gve_tx_fill_general_ctx_desc(struct gve_tx_general_context_desc_dqo *desc,
 	};
 }
 
-/* Returns 0 on success, or < 0 on error.
- *
- * Before this function is called, the caller must ensure
- * gve_has_pending_packet(tx) returns true.
- */
 static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
-				      struct sk_buff *skb)
+				      struct sk_buff *skb,
+				      struct gve_tx_pending_packet_dqo *pkt,
+				      s16 completion_tag,
+				      u32 *desc_idx,
+				      bool is_gso)
 {
 	const struct skb_shared_info *shinfo = skb_shinfo(skb);
-	const bool is_gso = skb_is_gso(skb);
-	u32 desc_idx = tx->dqo_tx.tail;
-
-	struct gve_tx_pending_packet_dqo *pkt;
-	struct gve_tx_metadata_dqo metadata;
-	s16 completion_tag;
 	int i;
 
-	pkt = gve_alloc_pending_packet(tx);
-	pkt->skb = skb;
-	pkt->num_bufs = 0;
-	completion_tag = pkt - tx->dqo.pending_packets;
-
-	gve_extract_tx_metadata_dqo(skb, &metadata);
-	if (is_gso) {
-		int header_len = gve_prep_tso(skb);
-
-		if (unlikely(header_len < 0))
-			goto err;
-
-		gve_tx_fill_tso_ctx_desc(&tx->dqo.tx_ring[desc_idx].tso_ctx,
-					 skb, &metadata, header_len);
-		desc_idx = (desc_idx + 1) & tx->mask;
-	}
-
-	gve_tx_fill_general_ctx_desc(&tx->dqo.tx_ring[desc_idx].general_ctx,
-				     &metadata);
-	desc_idx = (desc_idx + 1) & tx->mask;
-
 	/* Note: HW requires that the size of a non-TSO packet be within the
 	 * range of [17, 9728].
 	 *
@@ -490,6 +593,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
 	 * - Hypervisor won't allow MTU larger than 9216.
 	 */
 
+	pkt->num_bufs = 0;
 	/* Map the linear portion of skb */
 	{
 		u32 len = skb_headlen(skb);
@@ -503,7 +607,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
 		dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
 		++pkt->num_bufs;
 
-		gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, skb, len, addr,
+		gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr,
 					 completion_tag,
 					 /*eop=*/shinfo->nr_frags == 0, is_gso);
 	}
@@ -522,10 +626,139 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
 		dma_unmap_addr_set(pkt, dma[pkt->num_bufs], addr);
 		++pkt->num_bufs;
 
-		gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, skb, len, addr,
+		gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb, len, addr,
 					 completion_tag, is_eop, is_gso);
 	}
 
+	return 0;
+err:
+	for (i = 0; i < pkt->num_bufs; i++) {
+		if (i == 0) {
+			dma_unmap_single(tx->dev,
+					 dma_unmap_addr(pkt, dma[i]),
+					 dma_unmap_len(pkt, len[i]),
+					 DMA_TO_DEVICE);
+		} else {
+			dma_unmap_page(tx->dev,
+				       dma_unmap_addr(pkt, dma[i]),
+				       dma_unmap_len(pkt, len[i]),
+				       DMA_TO_DEVICE);
+		}
+	}
+	pkt->num_bufs = 0;
+	return -1;
+}
+
+/* Tx buffer i corresponds to
+ * qpl_page_id = i / GVE_TX_BUFS_PER_PAGE_DQO
+ * qpl_page_offset = (i % GVE_TX_BUFS_PER_PAGE_DQO) * GVE_TX_BUF_SIZE_DQO
+ */
+static void gve_tx_buf_get_addr(struct gve_tx_ring *tx,
+				s16 index,
+				void **va, dma_addr_t *dma_addr)
+{
+	int page_id = index >> (PAGE_SHIFT - GVE_TX_BUF_SHIFT_DQO);
+	int offset = (index & (GVE_TX_BUFS_PER_PAGE_DQO - 1)) << GVE_TX_BUF_SHIFT_DQO;
+
+	*va = page_address(tx->dqo.qpl->pages[page_id]) + offset;
+	*dma_addr = tx->dqo.qpl->page_buses[page_id] + offset;
+}
+
+static int gve_tx_add_skb_copy_dqo(struct gve_tx_ring *tx,
+				   struct sk_buff *skb,
+				   struct gve_tx_pending_packet_dqo *pkt,
+				   s16 completion_tag,
+				   u32 *desc_idx,
+				   bool is_gso)
+{
+	u32 copy_offset = 0;
+	dma_addr_t dma_addr;
+	u32 copy_len;
+	s16 index;
+	void *va;
+
+	/* Break the packet into buffer size chunks */
+	pkt->num_bufs = 0;
+	while (copy_offset < skb->len) {
+		index = gve_alloc_tx_qpl_buf(tx);
+		if (unlikely(index == -1))
+			goto err;
+
+		gve_tx_buf_get_addr(tx, index, &va, &dma_addr);
+		copy_len = min_t(u32, GVE_TX_BUF_SIZE_DQO,
+				 skb->len - copy_offset);
+		skb_copy_bits(skb, copy_offset, va, copy_len);
+
+		copy_offset += copy_len;
+		dma_sync_single_for_device(tx->dev, dma_addr,
+					   copy_len, DMA_TO_DEVICE);
+		gve_tx_fill_pkt_desc_dqo(tx, desc_idx, skb,
+					 copy_len,
+					 dma_addr,
+					 completion_tag,
+					 copy_offset == skb->len,
+					 is_gso);
+
+		pkt->tx_qpl_buf_ids[pkt->num_bufs] = index;
+		++tx->dqo_tx.alloc_tx_qpl_buf_cnt;
+		++pkt->num_bufs;
+	}
+
+	return 0;
+err:
+	/* Should not be here if gve_has_free_tx_qpl_bufs() check is correct */
+	gve_free_tx_qpl_bufs(tx, pkt);
+	return -ENOMEM;
+}
+
+/* Returns 0 on success, or < 0 on error.
+ *
+ * Before this function is called, the caller must ensure
+ * gve_has_pending_packet(tx) returns true.
+ */
+static int gve_tx_add_skb_dqo(struct gve_tx_ring *tx,
+			      struct sk_buff *skb)
+{
+	const bool is_gso = skb_is_gso(skb);
+	u32 desc_idx = tx->dqo_tx.tail;
+	struct gve_tx_pending_packet_dqo *pkt;
+	struct gve_tx_metadata_dqo metadata;
+	s16 completion_tag;
+
+	pkt = gve_alloc_pending_packet(tx);
+	pkt->skb = skb;
+	completion_tag = pkt - tx->dqo.pending_packets;
+
+	gve_extract_tx_metadata_dqo(skb, &metadata);
+	if (is_gso) {
+		int header_len = gve_prep_tso(skb);
+
+		if (unlikely(header_len < 0))
+			goto err;
+
+		gve_tx_fill_tso_ctx_desc(&tx->dqo.tx_ring[desc_idx].tso_ctx,
+					 skb, &metadata, header_len);
+		desc_idx = (desc_idx + 1) & tx->mask;
+	}
+
+	gve_tx_fill_general_ctx_desc(&tx->dqo.tx_ring[desc_idx].general_ctx,
+				     &metadata);
+	desc_idx = (desc_idx + 1) & tx->mask;
+
+	if (tx->dqo.qpl) {
+		if (gve_tx_add_skb_copy_dqo(tx, skb, pkt,
+					    completion_tag,
+					    &desc_idx, is_gso))
+			goto err;
+	}  else {
+		if (gve_tx_add_skb_no_copy_dqo(tx, skb, pkt,
+					       completion_tag,
+					       &desc_idx, is_gso))
+			goto err;
+	}
+
+	tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs;
+
 	/* Commit the changes to our state */
 	tx->dqo_tx.tail = desc_idx;
 
@@ -547,22 +780,7 @@ static int gve_tx_add_skb_no_copy_dqo(struct gve_tx_ring *tx,
 	return 0;
 
 err:
-	for (i = 0; i < pkt->num_bufs; i++) {
-		if (i == 0) {
-			dma_unmap_single(tx->dev,
-					 dma_unmap_addr(pkt, dma[i]),
-					 dma_unmap_len(pkt, len[i]),
-					 DMA_TO_DEVICE);
-		} else {
-			dma_unmap_page(tx->dev,
-				       dma_unmap_addr(pkt, dma[i]),
-				       dma_unmap_len(pkt, len[i]),
-				       DMA_TO_DEVICE);
-		}
-	}
-
 	pkt->skb = NULL;
-	pkt->num_bufs = 0;
 	gve_free_pending_packet(tx, pkt);
 
 	return -1;
@@ -636,40 +854,56 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx,
 	int num_buffer_descs;
 	int total_num_descs;
 
-	if (skb_is_gso(skb)) {
-		/* If TSO doesn't meet HW requirements, attempt to linearize the
-		 * packet.
-		 */
-		if (unlikely(!gve_can_send_tso(skb) &&
-			     skb_linearize(skb) < 0)) {
-			net_err_ratelimited("%s: Failed to transmit TSO packet\n",
-					    priv->dev->name);
-			goto drop;
-		}
-
-		if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
-			goto drop;
+	if (tx->dqo.qpl) {
+		if (skb_is_gso(skb))
+			if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
+				goto drop;
 
-		num_buffer_descs = gve_num_buffer_descs_needed(skb);
+		/* We do not need to verify the number of buffers used per
+		 * packet or per segment in case of TSO as with 2K size buffers
+		 * none of the TX packet rules would be violated.
+		 *
+		 * gve_can_send_tso() checks that each TCP segment of gso_size is
+		 * not distributed over more than 9 SKB frags..
+		 */
+		num_buffer_descs = DIV_ROUND_UP(skb->len, GVE_TX_BUF_SIZE_DQO);
 	} else {
-		num_buffer_descs = gve_num_buffer_descs_needed(skb);
+		if (skb_is_gso(skb)) {
+			/* If TSO doesn't meet HW requirements, attempt to linearize the
+			 * packet.
+			 */
+			if (unlikely(!gve_can_send_tso(skb) &&
+				     skb_linearize(skb) < 0)) {
+				net_err_ratelimited("%s: Failed to transmit TSO packet\n",
+						    priv->dev->name);
+				goto drop;
+			}
 
-		if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) {
-			if (unlikely(skb_linearize(skb) < 0))
+			if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
 				goto drop;
 
-			num_buffer_descs = 1;
+			num_buffer_descs = gve_num_buffer_descs_needed(skb);
+		} else {
+			num_buffer_descs = gve_num_buffer_descs_needed(skb);
+
+			if (unlikely(num_buffer_descs > GVE_TX_MAX_DATA_DESCS)) {
+				if (unlikely(skb_linearize(skb) < 0))
+					goto drop;
+
+				num_buffer_descs = 1;
+			}
 		}
 	}
 
 	/* Metadata + (optional TSO) + data descriptors. */
 	total_num_descs = 1 + skb_is_gso(skb) + num_buffer_descs;
 	if (unlikely(gve_maybe_stop_tx_dqo(tx, total_num_descs +
-			GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP))) {
+			GVE_TX_MIN_DESC_PREVENT_CACHE_OVERLAP,
+			num_buffer_descs))) {
 		return -1;
 	}
 
-	if (unlikely(gve_tx_add_skb_no_copy_dqo(tx, skb) < 0))
+	if (unlikely(gve_tx_add_skb_dqo(tx, skb) < 0))
 		goto drop;
 
 	netdev_tx_sent_queue(tx->netdev_txq, skb->len);
@@ -817,7 +1051,11 @@ static void gve_handle_packet_completion(struct gve_priv *priv,
 			return;
 		}
 	}
-	gve_unmap_packet(tx->dev, pending_packet);
+	tx->dqo_tx.completed_packet_desc_cnt += pending_packet->num_bufs;
+	if (tx->dqo.qpl)
+		gve_free_tx_qpl_bufs(tx, pending_packet);
+	else
+		gve_unmap_packet(tx->dev, pending_packet);
 
 	*bytes += pending_packet->skb->len;
 	(*pkts)++;
@@ -875,12 +1113,16 @@ static void remove_miss_completions(struct gve_priv *priv,
 
 		remove_from_list(tx, &tx->dqo_compl.miss_completions,
 				 pending_packet);
-		/* Unmap buffers and free skb but do not unallocate packet i.e.
+		/* Unmap/free TX buffers and free skb but do not unallocate packet i.e.
 		 * the completion tag is not freed to ensure that the driver
 		 * can take appropriate action if a corresponding valid
 		 * completion is received later.
 		 */
-		gve_unmap_packet(tx->dev, pending_packet);
+		if (tx->dqo.qpl)
+			gve_free_tx_qpl_bufs(tx, pending_packet);
+		else
+			gve_unmap_packet(tx->dev, pending_packet);
+
 		/* This indicates the packet was dropped. */
 		dev_kfree_skb_any(pending_packet->skb);
 		pending_packet->skb = NULL;
-- 
2.41.0.585.gd2178a4bd4-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 3/4] gve: RX path for DQO-QPL
  2023-08-02 21:33 [PATCH net-next 0/4] Add QPL mode for DQO descriptor format Rushil Gupta
  2023-08-02 21:33 ` [PATCH net-next 1/4] gve: Control path for DQO-QPL Rushil Gupta
  2023-08-02 21:33 ` [PATCH net-next 2/4] gve: Tx " Rushil Gupta
@ 2023-08-02 21:33 ` Rushil Gupta
  2023-08-02 21:33 ` [PATCH net-next 4/4] gve: update gve.rst Rushil Gupta
  3 siblings, 0 replies; 8+ messages in thread
From: Rushil Gupta @ 2023-08-02 21:33 UTC (permalink / raw)
  To: netdev, davem, kuba, willemb, edumazet, pabeni
  Cc: Rushil Gupta, Praveen Kaligineedi, Bailey Forrest

The RX path allocates the QPL page pool at queue creation, and
tries to reuse these pages through page recycling. This patch
ensures that on refill no non-QPL pages are posted to the device.

When the driver is running low on free buffers, an ondemand
allocation step kicks in that allocates a non-qpl page for
SKB business to free up the QPL page in use.

gve_try_recycle_buf was moved to gve_rx_append_frags so that driver does
not attempt to mark buffer as used if a non-qpl page was allocated
ondemand.

Signed-off-by: Rushil Gupta <rushilg@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Bailey Forrest <bcf@google.com>
---
 drivers/net/ethernet/google/gve/gve.h        |   9 ++
 drivers/net/ethernet/google/gve/gve_rx_dqo.c | 126 ++++++++++++++++---
 2 files changed, 117 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
index dee2db6f6750..534714349b30 100644
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -237,6 +237,15 @@ struct gve_rx_ring {
 			 * which cannot be reused yet.
 			 */
 			struct gve_index_list used_buf_states;
+
+			/* index into queue page list */
+			u32 next_qpl_page_idx;
+
+			/* qpl assigned to this queue */
+			struct gve_queue_page_list *qpl;
+
+			/* track number of used buffers */
+			u16 used_buf_states_cnt;
 		} dqo;
 	};
 
diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
index e57b73eb70f6..2b9392a1f113 100644
--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
@@ -22,11 +22,13 @@ static int gve_buf_ref_cnt(struct gve_rx_buf_state_dqo *bs)
 }
 
 static void gve_free_page_dqo(struct gve_priv *priv,
-			      struct gve_rx_buf_state_dqo *bs)
+			      struct gve_rx_buf_state_dqo *bs,
+			      bool free_page)
 {
 	page_ref_sub(bs->page_info.page, bs->page_info.pagecnt_bias - 1);
-	gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
-		      DMA_FROM_DEVICE);
+	if (free_page)
+		gve_free_page(&priv->pdev->dev, bs->page_info.page, bs->addr,
+			      DMA_FROM_DEVICE);
 	bs->page_info.page = NULL;
 }
 
@@ -130,12 +132,20 @@ gve_get_recycled_buf_state(struct gve_rx_ring *rx)
 	 */
 	for (i = 0; i < 5; i++) {
 		buf_state = gve_dequeue_buf_state(rx, &rx->dqo.used_buf_states);
-		if (gve_buf_ref_cnt(buf_state) == 0)
+		if (gve_buf_ref_cnt(buf_state) == 0) {
+			rx->dqo.used_buf_states_cnt--;
 			return buf_state;
+		}
 
 		gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
 	}
 
+	/* For QPL, we cannot allocate any new buffers and must
+	 * wait for the existing ones to be available.
+	 */
+	if (rx->dqo.qpl)
+		return NULL;
+
 	/* If there are no free buf states discard an entry from
 	 * `used_buf_states` so it can be used.
 	 */
@@ -144,23 +154,39 @@ gve_get_recycled_buf_state(struct gve_rx_ring *rx)
 		if (gve_buf_ref_cnt(buf_state) == 0)
 			return buf_state;
 
-		gve_free_page_dqo(rx->gve, buf_state);
+		gve_free_page_dqo(rx->gve, buf_state, true);
 		gve_free_buf_state(rx, buf_state);
 	}
 
 	return NULL;
 }
 
-static int gve_alloc_page_dqo(struct gve_priv *priv,
+static int gve_alloc_page_dqo(struct gve_rx_ring *rx,
 			      struct gve_rx_buf_state_dqo *buf_state)
 {
-	int err;
+	struct gve_priv *priv = rx->gve;
+	u32 idx;
 
-	err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page,
-			     &buf_state->addr, DMA_FROM_DEVICE, GFP_ATOMIC);
-	if (err)
-		return err;
+	if (!rx->dqo.qpl) {
+		int err;
 
+		err = gve_alloc_page(priv, &priv->pdev->dev,
+				     &buf_state->page_info.page,
+				     &buf_state->addr,
+				     DMA_FROM_DEVICE, GFP_ATOMIC);
+		if (err)
+			return err;
+	} else {
+		idx = rx->dqo.next_qpl_page_idx;
+		if (idx >= priv->rx_pages_per_qpl) {
+			net_err_ratelimited("%s: Out of QPL pages\n",
+					    priv->dev->name);
+			return -ENOMEM;
+		}
+		buf_state->page_info.page = rx->dqo.qpl->pages[idx];
+		buf_state->addr = rx->dqo.qpl->page_buses[idx];
+		rx->dqo.next_qpl_page_idx++;
+	}
 	buf_state->page_info.page_offset = 0;
 	buf_state->page_info.page_address =
 		page_address(buf_state->page_info.page);
@@ -195,9 +221,13 @@ static void gve_rx_free_ring_dqo(struct gve_priv *priv, int idx)
 
 	for (i = 0; i < rx->dqo.num_buf_states; i++) {
 		struct gve_rx_buf_state_dqo *bs = &rx->dqo.buf_states[i];
-
+		/* Only free page for RDA. QPL pages are freed in gve_main. */
 		if (bs->page_info.page)
-			gve_free_page_dqo(priv, bs);
+			gve_free_page_dqo(priv, bs, !rx->dqo.qpl);
+	}
+	if (rx->dqo.qpl) {
+		gve_unassign_qpl(priv, rx->dqo.qpl->id);
+		rx->dqo.qpl = NULL;
 	}
 
 	if (rx->dqo.bufq.desc_ring) {
@@ -229,7 +259,8 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
 	int i;
 
 	const u32 buffer_queue_slots =
-		priv->options_dqo_rda.rx_buff_ring_entries;
+		priv->queue_format == GVE_DQO_RDA_FORMAT ?
+		priv->options_dqo_rda.rx_buff_ring_entries : priv->rx_desc_cnt;
 	const u32 completion_queue_slots = priv->rx_desc_cnt;
 
 	netif_dbg(priv, drv, priv->dev, "allocating rx ring DQO\n");
@@ -243,7 +274,9 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
 	rx->ctx.skb_head = NULL;
 	rx->ctx.skb_tail = NULL;
 
-	rx->dqo.num_buf_states = min_t(s16, S16_MAX, buffer_queue_slots * 4);
+	rx->dqo.num_buf_states = priv->queue_format == GVE_DQO_RDA_FORMAT ?
+		min_t(s16, S16_MAX, buffer_queue_slots * 4) :
+		priv->rx_pages_per_qpl;
 	rx->dqo.buf_states = kvcalloc(rx->dqo.num_buf_states,
 				      sizeof(rx->dqo.buf_states[0]),
 				      GFP_KERNEL);
@@ -275,6 +308,13 @@ static int gve_rx_alloc_ring_dqo(struct gve_priv *priv, int idx)
 	if (!rx->dqo.bufq.desc_ring)
 		goto err;
 
+	if (priv->queue_format != GVE_DQO_RDA_FORMAT) {
+		rx->dqo.qpl = gve_assign_rx_qpl(priv, rx->q_num);
+		if (!rx->dqo.qpl)
+			goto err;
+		rx->dqo.next_qpl_page_idx = 0;
+	}
+
 	rx->q_resources = dma_alloc_coherent(hdev, sizeof(*rx->q_resources),
 					     &rx->q_resources_bus, GFP_KERNEL);
 	if (!rx->q_resources)
@@ -352,7 +392,7 @@ void gve_rx_post_buffers_dqo(struct gve_rx_ring *rx)
 			if (unlikely(!buf_state))
 				break;
 
-			if (unlikely(gve_alloc_page_dqo(priv, buf_state))) {
+			if (unlikely(gve_alloc_page_dqo(rx, buf_state))) {
 				u64_stats_update_begin(&rx->statss);
 				rx->rx_buf_alloc_fail++;
 				u64_stats_update_end(&rx->statss);
@@ -415,6 +455,7 @@ static void gve_try_recycle_buf(struct gve_priv *priv, struct gve_rx_ring *rx,
 
 mark_used:
 	gve_enqueue_buf_state(rx, &rx->dqo.used_buf_states, buf_state);
+	rx->dqo.used_buf_states_cnt++;
 }
 
 static void gve_rx_skb_csum(struct sk_buff *skb,
@@ -475,6 +516,43 @@ static void gve_rx_free_skb(struct gve_rx_ring *rx)
 	rx->ctx.skb_tail = NULL;
 }
 
+static bool gve_rx_should_trigger_copy_ondemand(struct gve_rx_ring *rx)
+{
+	if (!rx->dqo.qpl)
+		return false;
+	if (rx->dqo.used_buf_states_cnt <
+		     (rx->dqo.num_buf_states -
+		     GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD))
+		return false;
+	return true;
+}
+
+static int gve_rx_copy_ondemand(struct gve_rx_ring *rx,
+				struct gve_rx_buf_state_dqo *buf_state,
+				u16 buf_len)
+{
+	struct page *page = alloc_page(GFP_KERNEL);
+	int num_frags;
+
+	if (!page)
+		return -ENOMEM;
+
+	memcpy(page_address(page),
+	       buf_state->page_info.page_address +
+	       buf_state->page_info.page_offset,
+	       buf_len);
+	num_frags = skb_shinfo(rx->ctx.skb_tail)->nr_frags;
+	skb_add_rx_frag(rx->ctx.skb_tail, num_frags, page,
+			0, buf_len, PAGE_SIZE);
+
+	u64_stats_update_begin(&rx->statss);
+	rx->rx_frag_alloc_cnt++;
+	u64_stats_update_end(&rx->statss);
+	/* Return unused buffer. */
+	gve_enqueue_buf_state(rx, &rx->dqo.recycled_buf_states, buf_state);
+	return 0;
+}
+
 /* Chains multi skbs for single rx packet.
  * Returns 0 if buffer is appended, -1 otherwise.
  */
@@ -502,12 +580,20 @@ static int gve_rx_append_frags(struct napi_struct *napi,
 		rx->ctx.skb_head->truesize += priv->data_buffer_size_dqo;
 	}
 
+	/* Trigger ondemand page allocation if we are running low on buffers */
+	if (gve_rx_should_trigger_copy_ondemand(rx))
+		return gve_rx_copy_ondemand(rx, buf_state, buf_len);
+
 	skb_add_rx_frag(rx->ctx.skb_tail, num_frags,
 			buf_state->page_info.page,
 			buf_state->page_info.page_offset,
 			buf_len, priv->data_buffer_size_dqo);
 	gve_dec_pagecnt_bias(&buf_state->page_info);
 
+	/* Advances buffer page-offset if page is partially used.
+	 * Marks buffer as used if page is full.
+	 */
+	gve_try_recycle_buf(priv, rx, buf_state);
 	return 0;
 }
 
@@ -561,8 +647,6 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
 						 priv)) != 0) {
 			goto error;
 		}
-
-		gve_try_recycle_buf(priv, rx, buf_state);
 		return 0;
 	}
 
@@ -588,6 +672,12 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx,
 		goto error;
 	rx->ctx.skb_tail = rx->ctx.skb_head;
 
+	if (gve_rx_should_trigger_copy_ondemand(rx)) {
+		if (gve_rx_copy_ondemand(rx, buf_state, buf_len) < 0)
+			goto error;
+		return 0;
+	}
+
 	skb_add_rx_frag(rx->ctx.skb_head, 0, buf_state->page_info.page,
 			buf_state->page_info.page_offset, buf_len,
 			priv->data_buffer_size_dqo);
-- 
2.41.0.585.gd2178a4bd4-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 4/4] gve: update gve.rst
  2023-08-02 21:33 [PATCH net-next 0/4] Add QPL mode for DQO descriptor format Rushil Gupta
                   ` (2 preceding siblings ...)
  2023-08-02 21:33 ` [PATCH net-next 3/4] gve: RX " Rushil Gupta
@ 2023-08-02 21:33 ` Rushil Gupta
  3 siblings, 0 replies; 8+ messages in thread
From: Rushil Gupta @ 2023-08-02 21:33 UTC (permalink / raw)
  To: netdev, davem, kuba, willemb, edumazet, pabeni
  Cc: Rushil Gupta, Praveen Kaligineedi, Bailey Forrest

Add a note about QPL and RDA mode

Signed-off-by: Rushil Gupta <rushilg@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Bailey Forrest <bcf@google.com>
---
 .../networking/device_drivers/ethernet/google/gve.rst    | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/networking/device_drivers/ethernet/google/gve.rst b/Documentation/networking/device_drivers/ethernet/google/gve.rst
index 6d73ee78f3d7..31d621bca82e 100644
--- a/Documentation/networking/device_drivers/ethernet/google/gve.rst
+++ b/Documentation/networking/device_drivers/ethernet/google/gve.rst
@@ -52,6 +52,15 @@ Descriptor Formats
 GVE supports two descriptor formats: GQI and DQO. These two formats have
 entirely different descriptors, which will be described below.
 
+Addressing Mode
+------------------
+GVE supports two addressing modes: QPL and RDA.
+QPL ("queue-page-list") mode communicates data through a set of
+pre-registered pages.
+
+For RDA ("raw DMA addressing") mode, the set of pages is dynamic.
+Therefore, the packet buffers can be anywhere in guest memory.
+
 Registers
 ---------
 All registers are MMIO.
-- 
2.41.0.585.gd2178a4bd4-goog


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 1/4] gve: Control path for DQO-QPL
  2023-08-02 21:33 ` [PATCH net-next 1/4] gve: Control path for DQO-QPL Rushil Gupta
@ 2023-08-03  1:56   ` Jesse Brandeburg
  2023-08-03 19:18     ` Rushil Gupta
  2023-08-03  9:28   ` Simon Horman
  1 sibling, 1 reply; 8+ messages in thread
From: Jesse Brandeburg @ 2023-08-03  1:56 UTC (permalink / raw)
  To: Rushil Gupta, netdev, davem, kuba, willemb, edumazet, pabeni
  Cc: Praveen Kaligineedi, Bailey Forrest

On 8/2/2023 2:33 PM, Rushil Gupta wrote:
> Add checks, abi-changes and device options to support
> QPL mode for DQO in addition to GQI. Also, use
> pages-per-qpl supplied by device-option to control the
> size of the "queue-page-list".

That is some serious acronym soup there, maybe expand your acronyms upon
first use in the commit message? how are we to know what you mean?


> 
> Signed-off-by: Rushil Gupta <rushilg@google.com>
> Reviewed-by: Willem de Bruijn <willemb@google.com>
> Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
> Signed-off-by: Bailey Forrest <bcf@google.com>
> ---
>  drivers/net/ethernet/google/gve/gve.h        | 20 ++++-
>  drivers/net/ethernet/google/gve/gve_adminq.c | 93 +++++++++++++++++---
>  drivers/net/ethernet/google/gve/gve_adminq.h | 10 +++
>  drivers/net/ethernet/google/gve/gve_main.c   | 20 +++--
>  4 files changed, 123 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> index 4b425bf71ede..517a63b60cb9 100644
> --- a/drivers/net/ethernet/google/gve/gve.h
> +++ b/drivers/net/ethernet/google/gve/gve.h
> @@ -51,6 +51,12 @@
>  
>  #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
>  
> +#define DQO_QPL_DEFAULT_TX_PAGES 512
> +#define DQO_QPL_DEFAULT_RX_PAGES 2048
> +
> +/* Maximum TSO size supported on DQO */
> +#define GVE_DQO_TX_MAX	0x3FFFF
> +
>  /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
>  struct gve_rx_desc_queue {
>  	struct gve_rx_desc *desc_ring; /* the descriptor ring */
> @@ -531,6 +537,7 @@ enum gve_queue_format {
>  	GVE_GQI_RDA_FORMAT		= 0x1,
>  	GVE_GQI_QPL_FORMAT		= 0x2,
>  	GVE_DQO_RDA_FORMAT		= 0x3,
> +	GVE_DQO_QPL_FORMAT		= 0x4,
>  };
>  
>  struct gve_priv {
> @@ -550,7 +557,8 @@ struct gve_priv {
>  	u16 num_event_counters;
>  	u16 tx_desc_cnt; /* num desc per ring */
>  	u16 rx_desc_cnt; /* num desc per ring */
> -	u16 tx_pages_per_qpl; /* tx buffer length */
> +	u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */
> +	u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */
>  	u16 rx_data_slot_cnt; /* rx buffer length */
>  	u64 max_registered_pages;
>  	u64 num_registered_pages; /* num pages registered with NIC */
> @@ -808,11 +816,17 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
>  	return (priv->num_ntfy_blks / 2) + queue_idx;
>  }
>  
> +static inline bool gve_is_qpl(struct gve_priv *priv)
> +{
> +	return priv->queue_format == GVE_GQI_QPL_FORMAT ||
> +		priv->queue_format == GVE_DQO_QPL_FORMAT;
> +}
> +
>  /* Returns the number of tx queue page lists
>   */
>  static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
>  {
> -	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
> +	if (!gve_is_qpl(priv))
>  		return 0;
>  
>  	return priv->tx_cfg.num_queues + priv->num_xdp_queues;
> @@ -832,7 +846,7 @@ static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
>   */
>  static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
>  {
> -	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
> +	if (!gve_is_qpl(priv))
>  		return 0;
>  
>  	return priv->rx_cfg.num_queues;
> diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
> index 252974202a3f..a16e7cf21911 100644
> --- a/drivers/net/ethernet/google/gve/gve_adminq.c
> +++ b/drivers/net/ethernet/google/gve/gve_adminq.c
> @@ -39,7 +39,8 @@ void gve_parse_device_option(struct gve_priv *priv,
>  			     struct gve_device_option_gqi_rda **dev_op_gqi_rda,
>  			     struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
>  			     struct gve_device_option_dqo_rda **dev_op_dqo_rda,
> -			     struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
> +			     struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
> +			     struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
>  {
>  	u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
>  	u16 option_length = be16_to_cpu(option->option_length);
> @@ -112,6 +113,22 @@ void gve_parse_device_option(struct gve_priv *priv,
>  		}
>  		*dev_op_dqo_rda = (void *)(option + 1);
>  		break;
> +	case GVE_DEV_OPT_ID_DQO_QPL:
> +		if (option_length < sizeof(**dev_op_dqo_qpl) ||
> +		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) {
> +			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
> +				 "DQO QPL", (int)sizeof(**dev_op_dqo_qpl),
> +				 GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL,
> +				 option_length, req_feat_mask);
> +			break;
> +		}
> +
> +		if (option_length > sizeof(**dev_op_dqo_qpl)) {
> +			dev_warn(&priv->pdev->dev,
> +				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO QPL");
> +		}
> +		*dev_op_dqo_qpl = (void *)(option + 1);
> +		break;
>  	case GVE_DEV_OPT_ID_JUMBO_FRAMES:
>  		if (option_length < sizeof(**dev_op_jumbo_frames) ||
>  		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
> @@ -146,7 +163,8 @@ gve_process_device_options(struct gve_priv *priv,
>  			   struct gve_device_option_gqi_rda **dev_op_gqi_rda,
>  			   struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
>  			   struct gve_device_option_dqo_rda **dev_op_dqo_rda,
> -			   struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
> +			   struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
> +			   struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
>  {
>  	const int num_options = be16_to_cpu(descriptor->num_device_options);
>  	struct gve_device_option *dev_opt;
> @@ -166,7 +184,8 @@ gve_process_device_options(struct gve_priv *priv,
>  
>  		gve_parse_device_option(priv, descriptor, dev_opt,
>  					dev_op_gqi_rda, dev_op_gqi_qpl,
> -					dev_op_dqo_rda, dev_op_jumbo_frames);
> +					dev_op_dqo_rda, dev_op_jumbo_frames,
> +					dev_op_dqo_qpl);
>  		dev_opt = next_opt;
>  	}
>  
> @@ -505,12 +524,24 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
>  
>  		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
>  	} else {
> +		u16 comp_ring_size = 0;
> +		u32 qpl_id = 0;

these stack initializers are useless, you unconditionally overwrite both
values below.

> +
> +		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
> +			qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
> +			comp_ring_size =
> +				priv->options_dqo_rda.tx_comp_ring_entries;
> +		} else {
> +			qpl_id = tx->dqo.qpl->id;
> +			comp_ring_size = priv->tx_desc_cnt;
> +		}
> +		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
>  		cmd.create_tx_queue.tx_ring_size =
>  			cpu_to_be16(priv->tx_desc_cnt);
>  		cmd.create_tx_queue.tx_comp_ring_addr =
>  			cpu_to_be64(tx->complq_bus_dqo);
>  		cmd.create_tx_queue.tx_comp_ring_size =
> -			cpu_to_be16(priv->options_dqo_rda.tx_comp_ring_entries);
> +			cpu_to_be16(comp_ring_size);
>  	}
>  
>  	return gve_adminq_issue_cmd(priv, &cmd);
> @@ -555,6 +586,18 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
>  		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
>  		cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
>  	} else {
> +		u16 rx_buff_ring_entries = 0;
> +		u32 qpl_id = 0;

same here

> +
> +		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
> +			qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
> +			rx_buff_ring_entries =
> +				priv->options_dqo_rda.rx_buff_ring_entries;
> +		} else {
> +			qpl_id = rx->dqo.qpl->id;
> +			rx_buff_ring_entries = priv->rx_desc_cnt;
> +		}
> +		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
>  		cmd.create_rx_queue.rx_ring_size =
>  			cpu_to_be16(priv->rx_desc_cnt);
>  		cmd.create_rx_queue.rx_desc_ring_addr =
> @@ -564,7 +607,7 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
>  		cmd.create_rx_queue.packet_buffer_size =
>  			cpu_to_be16(priv->data_buffer_size_dqo);
>  		cmd.create_rx_queue.rx_buff_ring_size =
> -			cpu_to_be16(priv->options_dqo_rda.rx_buff_ring_entries);
> +			cpu_to_be16(rx_buff_ring_entries);
>  		cmd.create_rx_queue.enable_rsc =
>  			!!(priv->dev->features & NETIF_F_LRO);
>  	}
> @@ -675,9 +718,13 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
>  		     const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
>  {
>  	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
> +	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
> +
> +	if (priv->queue_format == GVE_DQO_QPL_FORMAT)
> +		return 0;
> +
>  	priv->options_dqo_rda.tx_comp_ring_entries =
>  		be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
> -	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
>  	priv->options_dqo_rda.rx_buff_ring_entries =
>  		be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);
>  
> @@ -687,7 +734,9 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
>  static void gve_enable_supported_features(struct gve_priv *priv,
>  					  u32 supported_features_mask,
>  					  const struct gve_device_option_jumbo_frames
> -						  *dev_op_jumbo_frames)
> +					  *dev_op_jumbo_frames,
> +					  const struct gve_device_option_dqo_qpl
> +					  *dev_op_dqo_qpl)
>  {
>  	/* Before control reaches this point, the page-size-capped max MTU from
>  	 * the gve_device_descriptor field has already been stored in
> @@ -699,6 +748,20 @@ static void gve_enable_supported_features(struct gve_priv *priv,
>  			 "JUMBO FRAMES device option enabled.\n");
>  		priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu);
>  	}
> +
> +	/* Override pages for qpl for DQO-QPL */
> +	if (dev_op_dqo_qpl) {
> +		dev_info(&priv->pdev->dev,
> +			 "DQO QPL device option enabled.\n");

How does this message benefit the user?

> +		priv->tx_pages_per_qpl =
> +			be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl);
> +		priv->rx_pages_per_qpl =
> +			be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl);
> +		if (priv->tx_pages_per_qpl == 0)
> +			priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES;
> +		if (priv->rx_pages_per_qpl == 0)
> +			priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES;
> +	}
>  }
>  
>  int gve_adminq_describe_device(struct gve_priv *priv)
> @@ -707,6 +770,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
>  	struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
>  	struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
>  	struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
> +	struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL;
>  	struct gve_device_descriptor *descriptor;
>  	u32 supported_features_mask = 0;
>  	union gve_adminq_command cmd;
> @@ -733,13 +797,14 @@ int gve_adminq_describe_device(struct gve_priv *priv)
>  
>  	err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
>  					 &dev_op_gqi_qpl, &dev_op_dqo_rda,
> -					 &dev_op_jumbo_frames);
> +					 &dev_op_jumbo_frames,
> +					 &dev_op_dqo_qpl);
>  	if (err)
>  		goto free_device_descriptor;
>  
>  	/* If the GQI_RAW_ADDRESSING option is not enabled and the queue format
>  	 * is not set to GqiRda, choose the queue format in a priority order:
> -	 * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
> +	 * DqoRda, DqoQpl, GqiRda, GqiQpl. Use GqiQpl as default.
>  	 */
>  	if (dev_op_dqo_rda) {
>  		priv->queue_format = GVE_DQO_RDA_FORMAT;
> @@ -747,7 +812,13 @@ int gve_adminq_describe_device(struct gve_priv *priv)
>  			 "Driver is running with DQO RDA queue format.\n");
>  		supported_features_mask =
>  			be32_to_cpu(dev_op_dqo_rda->supported_features_mask);
> -	} else if (dev_op_gqi_rda) {
> +	} else if (dev_op_dqo_qpl) {
> +		priv->queue_format = GVE_DQO_QPL_FORMAT;
> +		dev_info(&priv->pdev->dev,
> +			 "Driver is running with DQO QPL queue format.\n");

I feel like at best these should have been dev_dbg, or at worst just
removed. Messages should always add value for the user if they're printed.

> +		supported_features_mask =
> +			be32_to_cpu(dev_op_dqo_qpl->supported_features_mask);
> +	}  else if (dev_op_gqi_rda) {
>  		priv->queue_format = GVE_GQI_RDA_FORMAT;
>  		dev_info(&priv->pdev->dev,
>  			 "Driver is running with GQI RDA queue format.\n");
> @@ -798,7 +869,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
>  	priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
>  
>  	gve_enable_supported_features(priv, supported_features_mask,
> -				      dev_op_jumbo_frames);
> +				      dev_op_jumbo_frames, dev_op_dqo_qpl);
>  
>  free_device_descriptor:
>  	dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor,
> diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
> index f894beb3deaf..38a22279e863 100644
> --- a/drivers/net/ethernet/google/gve/gve_adminq.h
> +++ b/drivers/net/ethernet/google/gve/gve_adminq.h
> @@ -109,6 +109,14 @@ struct gve_device_option_dqo_rda {
>  
>  static_assert(sizeof(struct gve_device_option_dqo_rda) == 8);
>  
> +struct gve_device_option_dqo_qpl {
> +	__be32 supported_features_mask;
> +	__be16 tx_pages_per_qpl;
> +	__be16 rx_pages_per_qpl;
> +};
> +
> +static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8);
> +
>  struct gve_device_option_jumbo_frames {
>  	__be32 supported_features_mask;
>  	__be16 max_mtu;
> @@ -130,6 +138,7 @@ enum gve_dev_opt_id {
>  	GVE_DEV_OPT_ID_GQI_RDA = 0x2,
>  	GVE_DEV_OPT_ID_GQI_QPL = 0x3,
>  	GVE_DEV_OPT_ID_DQO_RDA = 0x4,
> +	GVE_DEV_OPT_ID_DQO_QPL = 0x7,
>  	GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
>  };
>  
> @@ -139,6 +148,7 @@ enum gve_dev_opt_req_feat_mask {
>  	GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
>  	GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
>  	GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
> +	GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,


Maybe this makes sense to others, but an enum full of defines where all
values are zero? Why are we even writing code?

>  };
>  
>  enum gve_sup_feature_mask {
> diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> index e6f1711d9be0..b40fafe1460a 100644
> --- a/drivers/net/ethernet/google/gve/gve_main.c
> +++ b/drivers/net/ethernet/google/gve/gve_main.c
> @@ -31,7 +31,6 @@
>  
>  // Minimum amount of time between queue kicks in msec (10 seconds)
>  #define MIN_TX_TIMEOUT_GAP (1000 * 10)
> -#define DQO_TX_MAX	0x3FFFF
>  
>  char gve_driver_name[] = "gve";
>  const char gve_version_str[] = GVE_VERSION;
> @@ -494,7 +493,7 @@ static int gve_setup_device_resources(struct gve_priv *priv)
>  		goto abort_with_stats_report;
>  	}
>  
> -	if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
> +	if (!gve_is_gqi(priv)) {
>  		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
>  					       GFP_KERNEL);
>  		if (!priv->ptype_lut_dqo) {
> @@ -1085,9 +1084,10 @@ static int gve_alloc_qpls(struct gve_priv *priv)
>  	int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
>  	int start_id;
>  	int i, j;
> +	int page_count;

RCT please

>  	int err;
>  
> -	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
> +	if (!gve_is_qpl(priv))
>  		return 0;
>  
>  	priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
> @@ -1095,17 +1095,25 @@ static int gve_alloc_qpls(struct gve_priv *priv)
>  		return -ENOMEM;
>  
>  	start_id = gve_tx_start_qpl_id(priv);
> +	page_count = priv->tx_pages_per_qpl;
>  	for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
>  		err = gve_alloc_queue_page_list(priv, i,
> -						priv->tx_pages_per_qpl);
> +						page_count);
>  		if (err)
>  			goto free_qpls;
>  	}
>  
>  	start_id = gve_rx_start_qpl_id(priv);
> +
> +	/* For GQI_QPL number of pages allocated have 1:1 relationship with
> +	 * number of descriptors. For DQO, number of pages required are
> +	 * more than descriptors (because of out of order completions).
> +	 */
> +	page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
> +		priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
>  	for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
>  		err = gve_alloc_queue_page_list(priv, i,
> -						priv->rx_data_slot_cnt);
> +						page_count);
>  		if (err)
>  			goto free_qpls;
>  	}
> @@ -2051,7 +2059,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
>  
>  	/* Big TCP is only supported on DQ*/
>  	if (!gve_is_gqi(priv))
> -		netif_set_tso_max_size(priv->dev, DQO_TX_MAX);
> +		netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
>  
>  	priv->num_registered_pages = 0;
>  	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 1/4] gve: Control path for DQO-QPL
  2023-08-02 21:33 ` [PATCH net-next 1/4] gve: Control path for DQO-QPL Rushil Gupta
  2023-08-03  1:56   ` Jesse Brandeburg
@ 2023-08-03  9:28   ` Simon Horman
  1 sibling, 0 replies; 8+ messages in thread
From: Simon Horman @ 2023-08-03  9:28 UTC (permalink / raw)
  To: Rushil Gupta
  Cc: netdev, davem, kuba, willemb, edumazet, pabeni,
	Praveen Kaligineedi, Bailey Forrest

On Wed, Aug 02, 2023 at 09:33:35PM +0000, Rushil Gupta wrote:

...

Hi Rashil,

> @@ -505,12 +524,24 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
>  
>  		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
>  	} else {
> +		u16 comp_ring_size = 0;
> +		u32 qpl_id = 0;
> +
> +		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
> +			qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
> +			comp_ring_size =
> +				priv->options_dqo_rda.tx_comp_ring_entries;
> +		} else {
> +			qpl_id = tx->dqo.qpl->id;

The qpl field does not appear to be added to struct gve_tx_ring
until a following patch.

> +			comp_ring_size = priv->tx_desc_cnt;
> +		}
> +		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
>  		cmd.create_tx_queue.tx_ring_size =
>  			cpu_to_be16(priv->tx_desc_cnt);
>  		cmd.create_tx_queue.tx_comp_ring_addr =
>  			cpu_to_be64(tx->complq_bus_dqo);
>  		cmd.create_tx_queue.tx_comp_ring_size =
> -			cpu_to_be16(priv->options_dqo_rda.tx_comp_ring_entries);
> +			cpu_to_be16(comp_ring_size);
>  	}
>  
>  	return gve_adminq_issue_cmd(priv, &cmd);
> @@ -555,6 +586,18 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
>  		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
>  		cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
>  	} else {
> +		u16 rx_buff_ring_entries = 0;
> +		u32 qpl_id = 0;
> +
> +		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
> +			qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
> +			rx_buff_ring_entries =
> +				priv->options_dqo_rda.rx_buff_ring_entries;
> +		} else {
> +			qpl_id = rx->dqo.qpl->id;

Likewise, the qpl field does not appear to be added to struct gve_rx_ring
until a following patch.

> +			rx_buff_ring_entries = priv->rx_desc_cnt;
> +		}
> +		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
>  		cmd.create_rx_queue.rx_ring_size =
>  			cpu_to_be16(priv->rx_desc_cnt);
>  		cmd.create_rx_queue.rx_desc_ring_addr =

...

-- 
pw-bot: changes-requested

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 1/4] gve: Control path for DQO-QPL
  2023-08-03  1:56   ` Jesse Brandeburg
@ 2023-08-03 19:18     ` Rushil Gupta
  0 siblings, 0 replies; 8+ messages in thread
From: Rushil Gupta @ 2023-08-03 19:18 UTC (permalink / raw)
  To: Jesse Brandeburg
  Cc: netdev, davem, kuba, willemb, edumazet, pabeni,
	Praveen Kaligineedi, Bailey Forrest

On Wed, Aug 2, 2023 at 6:56 PM Jesse Brandeburg
<jesse.brandeburg@intel.com> wrote:
>
> On 8/2/2023 2:33 PM, Rushil Gupta wrote:
> > Add checks, abi-changes and device options to support
> > QPL mode for DQO in addition to GQI. Also, use
> > pages-per-qpl supplied by device-option to control the
> > size of the "queue-page-list".
>
> That is some serious acronym soup there, maybe expand your acronyms upon
> first use in the commit message? how are we to know what you mean?

I have provided the acronym explanations in the cover letter and .rst files.
However, I agree I should add them to the commit message as well. Will
fix it in v2.
>
>
> >
> > Signed-off-by: Rushil Gupta <rushilg@google.com>
> > Reviewed-by: Willem de Bruijn <willemb@google.com>
> > Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
> > Signed-off-by: Bailey Forrest <bcf@google.com>
> > ---
> >  drivers/net/ethernet/google/gve/gve.h        | 20 ++++-
> >  drivers/net/ethernet/google/gve/gve_adminq.c | 93 +++++++++++++++++---
> >  drivers/net/ethernet/google/gve/gve_adminq.h | 10 +++
> >  drivers/net/ethernet/google/gve/gve_main.c   | 20 +++--
> >  4 files changed, 123 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h
> > index 4b425bf71ede..517a63b60cb9 100644
> > --- a/drivers/net/ethernet/google/gve/gve.h
> > +++ b/drivers/net/ethernet/google/gve/gve.h
> > @@ -51,6 +51,12 @@
> >
> >  #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182
> >
> > +#define DQO_QPL_DEFAULT_TX_PAGES 512
> > +#define DQO_QPL_DEFAULT_RX_PAGES 2048
> > +
> > +/* Maximum TSO size supported on DQO */
> > +#define GVE_DQO_TX_MAX       0x3FFFF
> > +
> >  /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
> >  struct gve_rx_desc_queue {
> >       struct gve_rx_desc *desc_ring; /* the descriptor ring */
> > @@ -531,6 +537,7 @@ enum gve_queue_format {
> >       GVE_GQI_RDA_FORMAT              = 0x1,
> >       GVE_GQI_QPL_FORMAT              = 0x2,
> >       GVE_DQO_RDA_FORMAT              = 0x3,
> > +     GVE_DQO_QPL_FORMAT              = 0x4,
> >  };
> >
> >  struct gve_priv {
> > @@ -550,7 +557,8 @@ struct gve_priv {
> >       u16 num_event_counters;
> >       u16 tx_desc_cnt; /* num desc per ring */
> >       u16 rx_desc_cnt; /* num desc per ring */
> > -     u16 tx_pages_per_qpl; /* tx buffer length */
> > +     u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */
> > +     u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */
> >       u16 rx_data_slot_cnt; /* rx buffer length */
> >       u64 max_registered_pages;
> >       u64 num_registered_pages; /* num pages registered with NIC */
> > @@ -808,11 +816,17 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
> >       return (priv->num_ntfy_blks / 2) + queue_idx;
> >  }
> >
> > +static inline bool gve_is_qpl(struct gve_priv *priv)
> > +{
> > +     return priv->queue_format == GVE_GQI_QPL_FORMAT ||
> > +             priv->queue_format == GVE_DQO_QPL_FORMAT;
> > +}
> > +
> >  /* Returns the number of tx queue page lists
> >   */
> >  static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
> >  {
> > -     if (priv->queue_format != GVE_GQI_QPL_FORMAT)
> > +     if (!gve_is_qpl(priv))
> >               return 0;
> >
> >       return priv->tx_cfg.num_queues + priv->num_xdp_queues;
> > @@ -832,7 +846,7 @@ static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
> >   */
> >  static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
> >  {
> > -     if (priv->queue_format != GVE_GQI_QPL_FORMAT)
> > +     if (!gve_is_qpl(priv))
> >               return 0;
> >
> >       return priv->rx_cfg.num_queues;
> > diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
> > index 252974202a3f..a16e7cf21911 100644
> > --- a/drivers/net/ethernet/google/gve/gve_adminq.c
> > +++ b/drivers/net/ethernet/google/gve/gve_adminq.c
> > @@ -39,7 +39,8 @@ void gve_parse_device_option(struct gve_priv *priv,
> >                            struct gve_device_option_gqi_rda **dev_op_gqi_rda,
> >                            struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
> >                            struct gve_device_option_dqo_rda **dev_op_dqo_rda,
> > -                          struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
> > +                          struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
> > +                          struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
> >  {
> >       u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
> >       u16 option_length = be16_to_cpu(option->option_length);
> > @@ -112,6 +113,22 @@ void gve_parse_device_option(struct gve_priv *priv,
> >               }
> >               *dev_op_dqo_rda = (void *)(option + 1);
> >               break;
> > +     case GVE_DEV_OPT_ID_DQO_QPL:
> > +             if (option_length < sizeof(**dev_op_dqo_qpl) ||
> > +                 req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) {
> > +                     dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
> > +                              "DQO QPL", (int)sizeof(**dev_op_dqo_qpl),
> > +                              GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL,
> > +                              option_length, req_feat_mask);
> > +                     break;
> > +             }
> > +
> > +             if (option_length > sizeof(**dev_op_dqo_qpl)) {
> > +                     dev_warn(&priv->pdev->dev,
> > +                              GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO QPL");
> > +             }
> > +             *dev_op_dqo_qpl = (void *)(option + 1);
> > +             break;
> >       case GVE_DEV_OPT_ID_JUMBO_FRAMES:
> >               if (option_length < sizeof(**dev_op_jumbo_frames) ||
> >                   req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
> > @@ -146,7 +163,8 @@ gve_process_device_options(struct gve_priv *priv,
> >                          struct gve_device_option_gqi_rda **dev_op_gqi_rda,
> >                          struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
> >                          struct gve_device_option_dqo_rda **dev_op_dqo_rda,
> > -                        struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
> > +                        struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
> > +                        struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
> >  {
> >       const int num_options = be16_to_cpu(descriptor->num_device_options);
> >       struct gve_device_option *dev_opt;
> > @@ -166,7 +184,8 @@ gve_process_device_options(struct gve_priv *priv,
> >
> >               gve_parse_device_option(priv, descriptor, dev_opt,
> >                                       dev_op_gqi_rda, dev_op_gqi_qpl,
> > -                                     dev_op_dqo_rda, dev_op_jumbo_frames);
> > +                                     dev_op_dqo_rda, dev_op_jumbo_frames,
> > +                                     dev_op_dqo_qpl);
> >               dev_opt = next_opt;
> >       }
> >
> > @@ -505,12 +524,24 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)
> >
> >               cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
> >       } else {
> > +             u16 comp_ring_size = 0;
> > +             u32 qpl_id = 0;
>
> these stack initializers are useless, you unconditionally overwrite both
> values below.
Will fix.
>
> > +
> > +             if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
> > +                     qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
> > +                     comp_ring_size =
> > +                             priv->options_dqo_rda.tx_comp_ring_entries;
> > +             } else {
> > +                     qpl_id = tx->dqo.qpl->id;
> > +                     comp_ring_size = priv->tx_desc_cnt;
> > +             }
> > +             cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
> >               cmd.create_tx_queue.tx_ring_size =
> >                       cpu_to_be16(priv->tx_desc_cnt);
> >               cmd.create_tx_queue.tx_comp_ring_addr =
> >                       cpu_to_be64(tx->complq_bus_dqo);
> >               cmd.create_tx_queue.tx_comp_ring_size =
> > -                     cpu_to_be16(priv->options_dqo_rda.tx_comp_ring_entries);
> > +                     cpu_to_be16(comp_ring_size);
> >       }
> >
> >       return gve_adminq_issue_cmd(priv, &cmd);
> > @@ -555,6 +586,18 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
> >               cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
> >               cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
> >       } else {
> > +             u16 rx_buff_ring_entries = 0;
> > +             u32 qpl_id = 0;
>
> same here
Will fix.
>
> > +
> > +             if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
> > +                     qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
> > +                     rx_buff_ring_entries =
> > +                             priv->options_dqo_rda.rx_buff_ring_entries;
> > +             } else {
> > +                     qpl_id = rx->dqo.qpl->id;
> > +                     rx_buff_ring_entries = priv->rx_desc_cnt;
> > +             }
> > +             cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
> >               cmd.create_rx_queue.rx_ring_size =
> >                       cpu_to_be16(priv->rx_desc_cnt);
> >               cmd.create_rx_queue.rx_desc_ring_addr =
> > @@ -564,7 +607,7 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
> >               cmd.create_rx_queue.packet_buffer_size =
> >                       cpu_to_be16(priv->data_buffer_size_dqo);
> >               cmd.create_rx_queue.rx_buff_ring_size =
> > -                     cpu_to_be16(priv->options_dqo_rda.rx_buff_ring_entries);
> > +                     cpu_to_be16(rx_buff_ring_entries);
> >               cmd.create_rx_queue.enable_rsc =
> >                       !!(priv->dev->features & NETIF_F_LRO);
> >       }
> > @@ -675,9 +718,13 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
> >                    const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
> >  {
> >       priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
> > +     priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
> > +
> > +     if (priv->queue_format == GVE_DQO_QPL_FORMAT)
> > +             return 0;
> > +
> >       priv->options_dqo_rda.tx_comp_ring_entries =
> >               be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
> > -     priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
> >       priv->options_dqo_rda.rx_buff_ring_entries =
> >               be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);
> >
> > @@ -687,7 +734,9 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
> >  static void gve_enable_supported_features(struct gve_priv *priv,
> >                                         u32 supported_features_mask,
> >                                         const struct gve_device_option_jumbo_frames
> > -                                               *dev_op_jumbo_frames)
> > +                                       *dev_op_jumbo_frames,
> > +                                       const struct gve_device_option_dqo_qpl
> > +                                       *dev_op_dqo_qpl)
> >  {
> >       /* Before control reaches this point, the page-size-capped max MTU from
> >        * the gve_device_descriptor field has already been stored in
> > @@ -699,6 +748,20 @@ static void gve_enable_supported_features(struct gve_priv *priv,
> >                        "JUMBO FRAMES device option enabled.\n");
> >               priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu);
> >       }
> > +
> > +     /* Override pages for qpl for DQO-QPL */
> > +     if (dev_op_dqo_qpl) {
> > +             dev_info(&priv->pdev->dev,
> > +                      "DQO QPL device option enabled.\n");
>
> How does this message benefit the user?
>
> > +             priv->tx_pages_per_qpl =
> > +                     be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl);
> > +             priv->rx_pages_per_qpl =
> > +                     be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl);
> > +             if (priv->tx_pages_per_qpl == 0)
> > +                     priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES;
> > +             if (priv->rx_pages_per_qpl == 0)
> > +                     priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES;
> > +     }
> >  }
> >
> >  int gve_adminq_describe_device(struct gve_priv *priv)
> > @@ -707,6 +770,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
> >       struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
> >       struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
> >       struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
> > +     struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL;
> >       struct gve_device_descriptor *descriptor;
> >       u32 supported_features_mask = 0;
> >       union gve_adminq_command cmd;
> > @@ -733,13 +797,14 @@ int gve_adminq_describe_device(struct gve_priv *priv)
> >
> >       err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
> >                                        &dev_op_gqi_qpl, &dev_op_dqo_rda,
> > -                                      &dev_op_jumbo_frames);
> > +                                      &dev_op_jumbo_frames,
> > +                                      &dev_op_dqo_qpl);
> >       if (err)
> >               goto free_device_descriptor;
> >
> >       /* If the GQI_RAW_ADDRESSING option is not enabled and the queue format
> >        * is not set to GqiRda, choose the queue format in a priority order:
> > -      * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
> > +      * DqoRda, DqoQpl, GqiRda, GqiQpl. Use GqiQpl as default.
> >        */
> >       if (dev_op_dqo_rda) {
> >               priv->queue_format = GVE_DQO_RDA_FORMAT;
> > @@ -747,7 +812,13 @@ int gve_adminq_describe_device(struct gve_priv *priv)
> >                        "Driver is running with DQO RDA queue format.\n");
> >               supported_features_mask =
> >                       be32_to_cpu(dev_op_dqo_rda->supported_features_mask);
> > -     } else if (dev_op_gqi_rda) {
> > +     } else if (dev_op_dqo_qpl) {
> > +             priv->queue_format = GVE_DQO_QPL_FORMAT;
> > +             dev_info(&priv->pdev->dev,
> > +                      "Driver is running with DQO QPL queue format.\n");
>
> I feel like at best these should have been dev_dbg, or at worst just
> removed. Messages should always add value for the user if they're printed.
This message (and one you pointed above) seems to be convention for
the driver code whenever a new format or device-option is added.
But I agree with you in principle that logging messages are not adding
any value here. Will remove.
>
> > +             supported_features_mask =
> > +                     be32_to_cpu(dev_op_dqo_qpl->supported_features_mask);
> > +     }  else if (dev_op_gqi_rda) {
> >               priv->queue_format = GVE_GQI_RDA_FORMAT;
> >               dev_info(&priv->pdev->dev,
> >                        "Driver is running with GQI RDA queue format.\n");
> > @@ -798,7 +869,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
> >       priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);
> >
> >       gve_enable_supported_features(priv, supported_features_mask,
> > -                                   dev_op_jumbo_frames);
> > +                                   dev_op_jumbo_frames, dev_op_dqo_qpl);
> >
> >  free_device_descriptor:
> >       dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor,
> > diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h
> > index f894beb3deaf..38a22279e863 100644
> > --- a/drivers/net/ethernet/google/gve/gve_adminq.h
> > +++ b/drivers/net/ethernet/google/gve/gve_adminq.h
> > @@ -109,6 +109,14 @@ struct gve_device_option_dqo_rda {
> >
> >  static_assert(sizeof(struct gve_device_option_dqo_rda) == 8);
> >
> > +struct gve_device_option_dqo_qpl {
> > +     __be32 supported_features_mask;
> > +     __be16 tx_pages_per_qpl;
> > +     __be16 rx_pages_per_qpl;
> > +};
> > +
> > +static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8);
> > +
> >  struct gve_device_option_jumbo_frames {
> >       __be32 supported_features_mask;
> >       __be16 max_mtu;
> > @@ -130,6 +138,7 @@ enum gve_dev_opt_id {
> >       GVE_DEV_OPT_ID_GQI_RDA = 0x2,
> >       GVE_DEV_OPT_ID_GQI_QPL = 0x3,
> >       GVE_DEV_OPT_ID_DQO_RDA = 0x4,
> > +     GVE_DEV_OPT_ID_DQO_QPL = 0x7,
> >       GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
> >  };
> >
> > @@ -139,6 +148,7 @@ enum gve_dev_opt_req_feat_mask {
> >       GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
> >       GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
> >       GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
> > +     GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
>
>
> Maybe this makes sense to others, but an enum full of defines where all
> values are zero? Why are we even writing code?

If there’s a bug that breaks the DQO-QPL feature, we need to set a bug
bit in this mask GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL.
That is why it is 0 to begin with. Although, I can see why it can be
considered useless.
>
> >  };
> >
> >  enum gve_sup_feature_mask {
> > diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
> > index e6f1711d9be0..b40fafe1460a 100644
> > --- a/drivers/net/ethernet/google/gve/gve_main.c
> > +++ b/drivers/net/ethernet/google/gve/gve_main.c
> > @@ -31,7 +31,6 @@
> >
> >  // Minimum amount of time between queue kicks in msec (10 seconds)
> >  #define MIN_TX_TIMEOUT_GAP (1000 * 10)
> > -#define DQO_TX_MAX   0x3FFFF
> >
> >  char gve_driver_name[] = "gve";
> >  const char gve_version_str[] = GVE_VERSION;
> > @@ -494,7 +493,7 @@ static int gve_setup_device_resources(struct gve_priv *priv)
> >               goto abort_with_stats_report;
> >       }
> >
> > -     if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
> > +     if (!gve_is_gqi(priv)) {
> >               priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
> >                                              GFP_KERNEL);
> >               if (!priv->ptype_lut_dqo) {
> > @@ -1085,9 +1084,10 @@ static int gve_alloc_qpls(struct gve_priv *priv)
> >       int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
> >       int start_id;
> >       int i, j;
> > +     int page_count;
>
> RCT please
>
> >       int err;
> >
> > -     if (priv->queue_format != GVE_GQI_QPL_FORMAT)
> > +     if (!gve_is_qpl(priv))
> >               return 0;
> >
> >       priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
> > @@ -1095,17 +1095,25 @@ static int gve_alloc_qpls(struct gve_priv *priv)
> >               return -ENOMEM;
> >
> >       start_id = gve_tx_start_qpl_id(priv);
> > +     page_count = priv->tx_pages_per_qpl;
> >       for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
> >               err = gve_alloc_queue_page_list(priv, i,
> > -                                             priv->tx_pages_per_qpl);
> > +                                             page_count);
> >               if (err)
> >                       goto free_qpls;
> >       }
> >
> >       start_id = gve_rx_start_qpl_id(priv);
> > +
> > +     /* For GQI_QPL number of pages allocated have 1:1 relationship with
> > +      * number of descriptors. For DQO, number of pages required are
> > +      * more than descriptors (because of out of order completions).
> > +      */
> > +     page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
> > +             priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
> >       for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
> >               err = gve_alloc_queue_page_list(priv, i,
> > -                                             priv->rx_data_slot_cnt);
> > +                                             page_count);
> >               if (err)
> >                       goto free_qpls;
> >       }
> > @@ -2051,7 +2059,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
> >
> >       /* Big TCP is only supported on DQ*/
> >       if (!gve_is_gqi(priv))
> > -             netif_set_tso_max_size(priv->dev, DQO_TX_MAX);
> > +             netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);
> >
> >       priv->num_registered_pages = 0;
> >       priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-08-03 19:18 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-02 21:33 [PATCH net-next 0/4] Add QPL mode for DQO descriptor format Rushil Gupta
2023-08-02 21:33 ` [PATCH net-next 1/4] gve: Control path for DQO-QPL Rushil Gupta
2023-08-03  1:56   ` Jesse Brandeburg
2023-08-03 19:18     ` Rushil Gupta
2023-08-03  9:28   ` Simon Horman
2023-08-02 21:33 ` [PATCH net-next 2/4] gve: Tx " Rushil Gupta
2023-08-02 21:33 ` [PATCH net-next 3/4] gve: RX " Rushil Gupta
2023-08-02 21:33 ` [PATCH net-next 4/4] gve: update gve.rst Rushil Gupta

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.