All of lore.kernel.org
 help / color / mirror / Atom feed
From: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
To: dev@dpdk.org
Cc: matan@mellanox.com, rasland@mellanox.com, olivier.matz@6wind.com,
	thomas@monjalon.net, ferruh.yigit@intel.com
Subject: [dpdk-dev] [PATCH v3 05/17] net/mlx5: create clock queue for packet pacing
Date: Thu, 16 Jul 2020 08:23:08 +0000	[thread overview]
Message-ID: <1594887800-6563-6-git-send-email-viacheslavo@mellanox.com> (raw)
In-Reply-To: <1594887800-6563-1-git-send-email-viacheslavo@mellanox.com>

This patch creates the special completion queue providing
reference completions to schedule packet send from
other transmitting queues.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Matan Azrad <matan@mellanox.com>
---
 drivers/net/mlx5/Makefile        |   1 +
 drivers/net/mlx5/linux/mlx5_os.c |   3 +
 drivers/net/mlx5/meson.build     |   1 +
 drivers/net/mlx5/mlx5.c          |   2 +
 drivers/net/mlx5/mlx5.h          |  47 ++++
 drivers/net/mlx5/mlx5_defs.h     |   7 +
 drivers/net/mlx5/mlx5_trigger.c  |  16 +-
 drivers/net/mlx5/mlx5_txpp.c     | 449 +++++++++++++++++++++++++++++++++++++++
 8 files changed, 521 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_txpp.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index a458402..9eaac6b 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -11,6 +11,7 @@ LIB = librte_pmd_mlx5.a
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxq.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_txq.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_txpp.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx.c
 ifneq ($(filter y,$(CONFIG_RTE_ARCH_X86_64) \
 			$(CONFIG_RTE_ARCH_PPC_64) \
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 63e9350..ea36309 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1869,6 +1869,9 @@
 {
 	int dbmap_env;
 	int err = 0;
+
+	sh->numa_node = spawn->pci_dev->device.numa_node;
+	pthread_mutex_init(&sh->txpp.mutex, NULL);
 	/*
 	 * Configure environment variable "MLX5_BF_SHUT_UP"
 	 * before the device creation. The rdma_core library
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index e95ce02..c06b153 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -26,6 +26,7 @@ sources = files(
 	'mlx5_stats.c',
 	'mlx5_trigger.c',
 	'mlx5_txq.c',
+	'mlx5_txpp.c',
 	'mlx5_vlan.c',
 	'mlx5_utils.c',
 )
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 71e59ac..10196ac 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -767,6 +767,7 @@ struct mlx5_dev_ctx_shared *
 	pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
 	return sh;
 error:
+	pthread_mutex_destroy(&sh->txpp.mutex);
 	pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
 	MLX5_ASSERT(sh);
 	if (sh->cnt_id_tbl) {
@@ -856,6 +857,7 @@ struct mlx5_dev_ctx_shared *
 		claim_zero(mlx5_glue->close_device(sh->ctx));
 	if (sh->flow_id_pool)
 		mlx5_flow_id_pool_release(sh->flow_id_pool);
+	pthread_mutex_destroy(&sh->txpp.mutex);
 	rte_free(sh);
 exit:
 	pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index c760aff..0b73b2a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -531,6 +531,44 @@ struct mlx5_flow_id_pool {
 	uint32_t max_id; /**< Maximum id can be allocated from the pool. */
 };
 
+/* Tx pacing queue structure - for Clock and Rearm queues. */
+struct mlx5_txpp_wq {
+	/* Completion Queue related data.*/
+	struct mlx5_devx_obj *cq;
+	struct mlx5dv_devx_umem *cq_umem;
+	union {
+		volatile void *cq_buf;
+		volatile struct mlx5_cqe *cqes;
+	};
+	volatile uint32_t *cq_dbrec;
+	uint32_t cq_ci:24;
+	uint32_t arm_sn:2;
+	/* Send Queue related data.*/
+	struct mlx5_devx_obj *sq;
+	struct mlx5dv_devx_umem *sq_umem;
+	union {
+		volatile void *sq_buf;
+		volatile struct mlx5_wqe *wqes;
+	};
+	uint16_t sq_size; /* Number of WQEs in the queue. */
+	uint16_t sq_ci; /* Next WQE to execute. */
+	volatile uint32_t *sq_dbrec;
+};
+
+/* Tx packet pacing structure. */
+struct mlx5_dev_txpp {
+	pthread_mutex_t mutex; /* Pacing create/destroy mutex. */
+	uint32_t refcnt; /* Pacing reference counter. */
+	uint32_t freq; /* Timestamp frequency, Hz. */
+	uint32_t tick; /* Completion tick duration in nanoseconds. */
+	uint32_t test; /* Packet pacing test mode. */
+	int32_t skew; /* Scheduling skew. */
+	uint32_t eqn; /* Event Queue number. */
+	struct rte_intr_handle intr_handle; /* Periodic interrupt. */
+	struct mlx5dv_devx_event_channel *echan; /* Event Channel. */
+	struct mlx5_txpp_wq clock_queue; /* Clock Queue. */
+};
+
 /*
  * Shared Infiniband device context for Master/Representors
  * which belong to same IB device with multiple IB ports.
@@ -547,9 +585,12 @@ struct mlx5_dev_ctx_shared {
 	char ibdev_name[DEV_SYSFS_NAME_MAX]; /* SYSFS dev name. */
 	char ibdev_path[DEV_SYSFS_PATH_MAX]; /* SYSFS dev path for secondary */
 	struct mlx5_dev_attr device_attr; /* Device properties. */
+	int numa_node; /* Numa node of backing physical device. */
 	LIST_ENTRY(mlx5_dev_ctx_shared) mem_event_cb;
 	/**< Called by memory event callback. */
 	struct mlx5_mr_share_cache share_cache;
+	/* Packet pacing related structure. */
+	struct mlx5_dev_txpp txpp;
 	/* Shared DV/DR flow data section. */
 	pthread_mutex_t dv_mutex; /* DV context mutex. */
 	uint32_t dv_meta_mask; /* flow META metadata supported mask. */
@@ -622,6 +663,7 @@ struct mlx5_priv {
 	unsigned int representor:1; /* Device is a port representor. */
 	unsigned int master:1; /* Device is a E-Switch master. */
 	unsigned int dr_shared:1; /* DV/DR data is shared. */
+	unsigned int txpp_en:1; /* Tx packet pacing enabled. */
 	unsigned int counter_fallback:1; /* Use counter fallback management. */
 	unsigned int mtr_en:1; /* Whether support meter. */
 	unsigned int mtr_reg_share:1; /* Whether support meter REG_C share. */
@@ -944,4 +986,9 @@ int mlx5_os_read_dev_stat(struct mlx5_priv *priv,
 void mlx5_os_stats_init(struct rte_eth_dev *dev);
 void mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb,
 			   mlx5_dereg_mr_t *dereg_mr_cb);
+/* mlx5_txpp.c */
+
+int mlx5_txpp_start(struct rte_eth_dev *dev);
+void mlx5_txpp_stop(struct rte_eth_dev *dev);
+
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 260f584..07a2b59 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -171,6 +171,13 @@
 #define MLX5_TXDB_NCACHED 1
 #define MLX5_TXDB_HEURISTIC 2
 
+/* Tx accurate scheduling on timestamps parameters. */
+#define MLX5_TXPP_CLKQ_SIZE 1
+/* The minimal size test packet to put into one WQE, padded by HW. */
+#define MLX5_TXPP_TEST_PKT_SIZE (sizeof(struct rte_ether_hdr) +	\
+				 sizeof(struct rte_ipv4_hdr))
+
+
 /* Size of the simple hash table for metadata register table. */
 #define MLX5_FLOW_MREG_HTABLE_SZ 4096
 #define MLX5_FLOW_MREG_HNAME "MARK_COPY_TABLE"
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index ef74609..ca25ad9 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -288,25 +288,29 @@
 			return -rte_errno;
 		}
 	}
+	ret = mlx5_txpp_start(dev);
+	if (ret) {
+		DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
+			dev->data->port_id, strerror(rte_errno));
+		goto error;
+	}
 	ret = mlx5_txq_start(dev);
 	if (ret) {
 		DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
 			dev->data->port_id, strerror(rte_errno));
-		return -rte_errno;
+		goto error;
 	}
 	ret = mlx5_rxq_start(dev);
 	if (ret) {
 		DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
 			dev->data->port_id, strerror(rte_errno));
-		mlx5_txq_stop(dev);
-		return -rte_errno;
+		goto error;
 	}
 	ret = mlx5_hairpin_bind(dev);
 	if (ret) {
 		DRV_LOG(ERR, "port %u hairpin binding failed: %s",
 			dev->data->port_id, strerror(rte_errno));
-		mlx5_txq_stop(dev);
-		return -rte_errno;
+		goto error;
 	}
 	/* Set started flag here for the following steps like control flow. */
 	dev->data->dev_started = 1;
@@ -362,6 +366,7 @@
 	mlx5_traffic_disable(dev);
 	mlx5_txq_stop(dev);
 	mlx5_rxq_stop(dev);
+	mlx5_txpp_stop(dev); /* Stop last. */
 	rte_errno = ret; /* Restore rte_errno. */
 	return -rte_errno;
 }
@@ -398,6 +403,7 @@
 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
 	mlx5_txq_stop(dev);
 	mlx5_rxq_stop(dev);
+	mlx5_txpp_stop(dev);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_txpp.c b/drivers/net/mlx5/mlx5_txpp.c
new file mode 100644
index 0000000..382bd20
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_txpp.c
@@ -0,0 +1,449 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2020 Mellanox Technologies, Ltd
+ */
+#include <rte_ether.h>
+#include <rte_ethdev_driver.h>
+#include <rte_interrupts.h>
+#include <rte_alarm.h>
+#include <rte_malloc.h>
+
+#include "mlx5.h"
+#include "mlx5_rxtx.h"
+
+/* Destroy Event Queue Notification Channel. */
+static void
+mlx5_txpp_destroy_eqn(struct mlx5_dev_ctx_shared *sh)
+{
+	if (sh->txpp.echan) {
+		mlx5_glue->devx_destroy_event_channel(sh->txpp.echan);
+		sh->txpp.echan = NULL;
+	}
+	sh->txpp.eqn = 0;
+}
+
+/* Create Event Queue Notification Channel. */
+static int
+mlx5_txpp_create_eqn(struct mlx5_dev_ctx_shared *sh)
+{
+	uint32_t lcore;
+
+	MLX5_ASSERT(!sh->txpp.echan);
+	lcore = (uint32_t)rte_lcore_to_cpu_id(-1);
+	if (mlx5_glue->devx_query_eqn(sh->ctx, lcore, &sh->txpp.eqn)) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to query EQ number %d.", rte_errno);
+		sh->txpp.eqn = 0;
+		return -rte_errno;
+	}
+	sh->txpp.echan = mlx5_glue->devx_create_event_channel(sh->ctx,
+			MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA);
+	if (!sh->txpp.echan) {
+		sh->txpp.eqn = 0;
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to create event channel %d.",
+			rte_errno);
+		return -rte_errno;
+	}
+	return 0;
+}
+
+static void
+mlx5_txpp_destroy_clock_queue(struct mlx5_dev_ctx_shared *sh)
+{
+	struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue;
+
+	if (wq->sq)
+		claim_zero(mlx5_devx_cmd_destroy(wq->sq));
+	if (wq->sq_umem)
+		claim_zero(mlx5_glue->devx_umem_dereg(wq->sq_umem));
+	if (wq->sq_buf)
+		rte_free((void *)(uintptr_t)wq->sq_buf);
+	if (wq->cq)
+		claim_zero(mlx5_devx_cmd_destroy(wq->cq));
+	if (wq->cq_umem)
+		claim_zero(mlx5_glue->devx_umem_dereg(wq->cq_umem));
+	if (wq->cq_buf)
+		rte_free((void *)(uintptr_t)wq->cq_buf);
+	memset(wq, 0, sizeof(*wq));
+}
+
+static void
+mlx5_txpp_fill_wqe_clock_queue(struct mlx5_dev_ctx_shared *sh)
+{
+	struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue;
+	struct mlx5_wqe *wqe = (struct mlx5_wqe *)(uintptr_t)wq->wqes;
+	struct mlx5_wqe_cseg *cs = &wqe->cseg;
+	uint32_t wqe_size, opcode, i;
+	uint8_t *dst;
+
+	/* For test purposes fill the WQ with SEND inline packet. */
+	if (sh->txpp.test) {
+		wqe_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE +
+				     MLX5_WQE_CSEG_SIZE +
+				     2 * MLX5_WQE_ESEG_SIZE -
+				     MLX5_ESEG_MIN_INLINE_SIZE,
+				     MLX5_WSEG_SIZE);
+		opcode = MLX5_OPCODE_SEND;
+	} else {
+		wqe_size = MLX5_WSEG_SIZE;
+		opcode = MLX5_OPCODE_NOP;
+	}
+	cs->opcode = rte_cpu_to_be_32(opcode | 0); /* Index is ignored. */
+	cs->sq_ds = rte_cpu_to_be_32((wq->sq->id << 8) |
+				     (wqe_size / MLX5_WSEG_SIZE));
+	cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET);
+	cs->misc = RTE_BE32(0);
+	wqe_size = RTE_ALIGN(wqe_size, MLX5_WQE_SIZE);
+	if (sh->txpp.test) {
+		struct mlx5_wqe_eseg *es = &wqe->eseg;
+		struct rte_ether_hdr *eth_hdr;
+		struct rte_ipv4_hdr *ip_hdr;
+		struct rte_udp_hdr *udp_hdr;
+
+		/* Build the inline test packet pattern. */
+		MLX5_ASSERT(wqe_size <= MLX5_WQE_SIZE_MAX);
+		MLX5_ASSERT(MLX5_TXPP_TEST_PKT_SIZE >=
+				(sizeof(struct rte_ether_hdr) +
+				 sizeof(struct rte_ipv4_hdr)));
+		es->flags = 0;
+		es->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
+		es->swp_offs = 0;
+		es->metadata = 0;
+		es->swp_flags = 0;
+		es->mss = 0;
+		es->inline_hdr_sz = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE);
+		/* Build test packet L2 header (Ethernet). */
+		dst = (uint8_t *)&es->inline_data;
+		eth_hdr = (struct rte_ether_hdr *)dst;
+		rte_eth_random_addr(&eth_hdr->d_addr.addr_bytes[0]);
+		rte_eth_random_addr(&eth_hdr->s_addr.addr_bytes[0]);
+		eth_hdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
+		/* Build test packet L3 header (IP v4). */
+		dst += sizeof(struct rte_ether_hdr);
+		ip_hdr = (struct rte_ipv4_hdr *)dst;
+		ip_hdr->version_ihl = RTE_IPV4_VHL_DEF;
+		ip_hdr->type_of_service = 0;
+		ip_hdr->fragment_offset = 0;
+		ip_hdr->time_to_live = 64;
+		ip_hdr->next_proto_id = IPPROTO_UDP;
+		ip_hdr->packet_id = 0;
+		ip_hdr->total_length = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE -
+						sizeof(struct rte_ether_hdr));
+		/* use RFC5735 / RFC2544 reserved network test addresses */
+		ip_hdr->src_addr = RTE_BE32((198U << 24) | (18 << 16) |
+					    (0 << 8) | 1);
+		ip_hdr->dst_addr = RTE_BE32((198U << 24) | (18 << 16) |
+					    (0 << 8) | 2);
+		if (MLX5_TXPP_TEST_PKT_SIZE <
+					(sizeof(struct rte_ether_hdr) +
+					 sizeof(struct rte_ipv4_hdr) +
+					 sizeof(struct rte_udp_hdr)))
+			goto wcopy;
+		/* Build test packet L4 header (UDP). */
+		dst += sizeof(struct rte_ipv4_hdr);
+		udp_hdr = (struct rte_udp_hdr *)dst;
+		udp_hdr->src_port = RTE_BE16(9); /* RFC863 Discard. */
+		udp_hdr->dst_port = RTE_BE16(9);
+		udp_hdr->dgram_len = RTE_BE16(MLX5_TXPP_TEST_PKT_SIZE -
+					      sizeof(struct rte_ether_hdr) -
+					      sizeof(struct rte_ipv4_hdr));
+		udp_hdr->dgram_cksum = 0;
+		/* Fill the test packet data. */
+		dst += sizeof(struct rte_udp_hdr);
+		for (i = sizeof(struct rte_ether_hdr) +
+			sizeof(struct rte_ipv4_hdr) +
+			sizeof(struct rte_udp_hdr);
+				i < MLX5_TXPP_TEST_PKT_SIZE; i++)
+			*dst++ = (uint8_t)(i & 0xFF);
+	}
+wcopy:
+	/* Duplicate the pattern to the next WQEs. */
+	dst = (uint8_t *)(uintptr_t)wq->sq_buf;
+	for (i = 1; i < MLX5_TXPP_CLKQ_SIZE; i++) {
+		dst += wqe_size;
+		rte_memcpy(dst, (void *)(uintptr_t)wq->sq_buf, wqe_size);
+	}
+}
+
+/* Creates the Clock Queue for packet pacing, returns zero on success. */
+static int
+mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh)
+{
+	struct mlx5_devx_create_sq_attr sq_attr = { 0 };
+	struct mlx5_devx_modify_sq_attr msq_attr = { 0 };
+	struct mlx5_devx_cq_attr cq_attr = { 0 };
+	struct mlx5_txpp_wq *wq = &sh->txpp.clock_queue;
+	size_t page_size = sysconf(_SC_PAGESIZE);
+	uint32_t umem_size, umem_dbrec;
+	int ret;
+
+	/* Allocate memory buffer for CQEs and doorbell record. */
+	umem_size = sizeof(struct mlx5_cqe) * MLX5_TXPP_CLKQ_SIZE;
+	umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
+	umem_size += MLX5_DBR_SIZE;
+	wq->cq_buf = rte_zmalloc_socket(__func__, umem_size,
+					page_size, sh->numa_node);
+	if (!wq->cq_buf) {
+		DRV_LOG(ERR, "Failed to allocate memory for Clock Queue.");
+		return -ENOMEM;
+	}
+	/* Register allocated buffer in user space with DevX. */
+	wq->cq_umem = mlx5_glue->devx_umem_reg(sh->ctx,
+					       (void *)(uintptr_t)wq->cq_buf,
+					       umem_size,
+					       IBV_ACCESS_LOCAL_WRITE);
+	if (!wq->cq_umem) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to register umem for Clock Queue.");
+		goto error;
+	}
+	/* Create completion queue object for Clock Queue. */
+	cq_attr.cqe_size = (sizeof(struct mlx5_cqe) == 128) ?
+			    MLX5_CQE_SIZE_128B : MLX5_CQE_SIZE_64B;
+	cq_attr.use_first_only = 1;
+	cq_attr.overrun_ignore = 1;
+	cq_attr.uar_page_id = sh->tx_uar->page_id;
+	cq_attr.eqn = sh->txpp.eqn;
+	cq_attr.q_umem_valid = 1;
+	cq_attr.q_umem_offset = 0;
+	cq_attr.q_umem_id = wq->cq_umem->umem_id;
+	cq_attr.db_umem_valid = 1;
+	cq_attr.db_umem_offset = umem_dbrec;
+	cq_attr.db_umem_id = wq->cq_umem->umem_id;
+	cq_attr.log_cq_size = rte_log2_u32(MLX5_TXPP_CLKQ_SIZE);
+	cq_attr.log_page_size = rte_log2_u32(page_size);
+	wq->cq = mlx5_devx_cmd_create_cq(sh->ctx, &cq_attr);
+	if (!wq->cq) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to create CQ for Clock Queue.");
+		goto error;
+	}
+	wq->cq_dbrec = RTE_PTR_ADD(wq->cq_buf, umem_dbrec);
+	wq->cq_ci = 0;
+	/* Allocate memory buffer for Send Queue WQEs. */
+	if (sh->txpp.test) {
+		wq->sq_size = RTE_ALIGN(MLX5_TXPP_TEST_PKT_SIZE +
+					MLX5_WQE_CSEG_SIZE +
+					2 * MLX5_WQE_ESEG_SIZE -
+					MLX5_ESEG_MIN_INLINE_SIZE,
+					MLX5_WQE_SIZE) / MLX5_WQE_SIZE;
+		wq->sq_size *= MLX5_TXPP_CLKQ_SIZE;
+	} else {
+		wq->sq_size = MLX5_TXPP_CLKQ_SIZE;
+	}
+	/* There should not be WQE leftovers in the cyclic queue. */
+	MLX5_ASSERT(wq->sq_size == (1 << log2above(wq->sq_size)));
+	umem_size =  MLX5_WQE_SIZE * wq->sq_size;
+	umem_dbrec = RTE_ALIGN(umem_size, MLX5_DBR_SIZE);
+	umem_size += MLX5_DBR_SIZE;
+	wq->sq_buf = rte_zmalloc_socket(__func__, umem_size,
+					page_size, sh->numa_node);
+	if (!wq->sq_buf) {
+		DRV_LOG(ERR, "Failed to allocate memory for Clock Queue.");
+		rte_errno = ENOMEM;
+		goto error;
+	}
+	/* Register allocated buffer in user space with DevX. */
+	wq->sq_umem = mlx5_glue->devx_umem_reg(sh->ctx,
+					       (void *)(uintptr_t)wq->sq_buf,
+					       umem_size,
+					       IBV_ACCESS_LOCAL_WRITE);
+	if (!wq->sq_umem) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to register umem for Clock Queue.");
+		goto error;
+	}
+	/* Create send queue object for Clock Queue. */
+	if (sh->txpp.test) {
+		sq_attr.tis_lst_sz = 1;
+		sq_attr.tis_num = sh->tis->id;
+		sq_attr.non_wire = 0;
+		sq_attr.static_sq_wq = 1;
+	} else {
+		sq_attr.non_wire = 1;
+		sq_attr.static_sq_wq = 1;
+	}
+	sq_attr.state = MLX5_SQC_STATE_RST;
+	sq_attr.cqn = wq->cq->id;
+	sq_attr.wq_attr.cd_slave = 1;
+	sq_attr.wq_attr.uar_page = sh->tx_uar->page_id;
+	sq_attr.wq_attr.wq_type = MLX5_WQ_TYPE_CYCLIC;
+	sq_attr.wq_attr.pd = sh->pdn;
+	sq_attr.wq_attr.log_wq_stride = rte_log2_u32(MLX5_WQE_SIZE);
+	sq_attr.wq_attr.log_wq_sz = rte_log2_u32(wq->sq_size);
+	sq_attr.wq_attr.dbr_umem_valid = 1;
+	sq_attr.wq_attr.dbr_addr = umem_dbrec;
+	sq_attr.wq_attr.dbr_umem_id = wq->sq_umem->umem_id;
+	sq_attr.wq_attr.wq_umem_valid = 1;
+	sq_attr.wq_attr.wq_umem_id = wq->sq_umem->umem_id;
+	/* umem_offset must be zero for static_sq_wq queue. */
+	sq_attr.wq_attr.wq_umem_offset = 0;
+	wq->sq = mlx5_devx_cmd_create_sq(sh->ctx, &sq_attr);
+	if (!wq->sq) {
+		rte_errno = errno;
+		DRV_LOG(ERR, "Failed to create SQ for Clock Queue.");
+		goto error;
+	}
+	wq->sq_dbrec = RTE_PTR_ADD(wq->sq_buf, umem_dbrec +
+				   MLX5_SND_DBR * sizeof(uint32_t));
+	/* Build the WQEs in the Send Queue before goto Ready state. */
+	mlx5_txpp_fill_wqe_clock_queue(sh);
+	/* Change queue state to ready. */
+	msq_attr.sq_state = MLX5_SQC_STATE_RST;
+	msq_attr.state = MLX5_SQC_STATE_RDY;
+	wq->sq_ci = 0;
+	ret = mlx5_devx_cmd_modify_sq(wq->sq, &msq_attr);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to set SQ ready state Clock Queue.");
+		goto error;
+	}
+	return 0;
+error:
+	ret = -rte_errno;
+	mlx5_txpp_destroy_clock_queue(sh);
+	rte_errno = -ret;
+	return ret;
+}
+
+/*
+ * The routine initializes the packet pacing infrastructure:
+ * - allocates PP context
+ * - Clock CQ/SQ
+ * - Rearm CQ/SQ
+ * - attaches rearm interrupt handler
+ *
+ * Returns 0 on success, negative otherwise
+ */
+static int
+mlx5_txpp_create(struct mlx5_dev_ctx_shared *sh, struct mlx5_priv *priv)
+{
+	int tx_pp = priv->config.tx_pp;
+	int ret;
+
+	/* Store the requested pacing parameters. */
+	sh->txpp.tick = tx_pp >= 0 ? tx_pp : -tx_pp;
+	sh->txpp.test = !!(tx_pp < 0);
+	sh->txpp.skew = priv->config.tx_skew;
+	sh->txpp.freq = priv->config.hca_attr.dev_freq_khz;
+	ret = mlx5_txpp_create_eqn(sh);
+	if (ret)
+		goto exit;
+	ret = mlx5_txpp_create_clock_queue(sh);
+	if (ret)
+		goto exit;
+exit:
+	if (ret) {
+		mlx5_txpp_destroy_clock_queue(sh);
+		mlx5_txpp_destroy_eqn(sh);
+		sh->txpp.tick = 0;
+		sh->txpp.test = 0;
+		sh->txpp.skew = 0;
+	}
+	return ret;
+}
+
+/*
+ * The routine destroys the packet pacing infrastructure:
+ * - detaches rearm interrupt handler
+ * - Rearm CQ/SQ
+ * - Clock CQ/SQ
+ * - PP context
+ */
+static void
+mlx5_txpp_destroy(struct mlx5_dev_ctx_shared *sh)
+{
+	mlx5_txpp_destroy_clock_queue(sh);
+	mlx5_txpp_destroy_eqn(sh);
+	sh->txpp.tick = 0;
+	sh->txpp.test = 0;
+	sh->txpp.skew = 0;
+}
+
+/**
+ * Creates and starts packet pacing infrastructure on specified device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_txpp_start(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	int err = 0;
+	int ret;
+
+	if (!priv->config.tx_pp) {
+		/* Packet pacing is not requested for the device. */
+		MLX5_ASSERT(priv->txpp_en == 0);
+		return 0;
+	}
+	if (priv->txpp_en) {
+		/* Packet pacing is already enabled for the device. */
+		MLX5_ASSERT(sh->txpp.refcnt);
+		return 0;
+	}
+	if (priv->config.tx_pp > 0) {
+		ret = rte_mbuf_dynflag_lookup
+				(RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL);
+		if (ret < 0)
+			return 0;
+	}
+	ret = pthread_mutex_lock(&sh->txpp.mutex);
+	MLX5_ASSERT(!ret);
+	RTE_SET_USED(ret);
+	if (sh->txpp.refcnt) {
+		priv->txpp_en = 1;
+		++sh->txpp.refcnt;
+	} else {
+		err = mlx5_txpp_create(sh, priv);
+		if (!err) {
+			MLX5_ASSERT(sh->txpp.tick);
+			priv->txpp_en = 1;
+			sh->txpp.refcnt = 1;
+		} else {
+			rte_errno = -err;
+		}
+	}
+	ret = pthread_mutex_unlock(&sh->txpp.mutex);
+	MLX5_ASSERT(!ret);
+	RTE_SET_USED(ret);
+	return err;
+}
+
+/**
+ * Stops and destroys packet pacing infrastructure on specified device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+void
+mlx5_txpp_stop(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_dev_ctx_shared *sh = priv->sh;
+	int ret;
+
+	if (!priv->txpp_en) {
+		/* Packet pacing is already disabled for the device. */
+		return;
+	}
+	priv->txpp_en = 0;
+	ret = pthread_mutex_lock(&sh->txpp.mutex);
+	MLX5_ASSERT(!ret);
+	RTE_SET_USED(ret);
+	MLX5_ASSERT(sh->txpp.refcnt);
+	if (!sh->txpp.refcnt || --sh->txpp.refcnt)
+		return;
+	/* No references any more, do actual destroy. */
+	mlx5_txpp_destroy(sh);
+	ret = pthread_mutex_unlock(&sh->txpp.mutex);
+	MLX5_ASSERT(!ret);
+	RTE_SET_USED(ret);
+}
-- 
1.8.3.1


  parent reply	other threads:[~2020-07-16  8:24 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-10  6:38 [dpdk-dev] [RFC] mbuf: accurate packet Tx scheduling Viacheslav Ovsiienko
2020-06-10 13:33 ` Harman Kalra
2020-06-10 15:16   ` Slava Ovsiienko
2020-06-17 15:57     ` [dpdk-dev] [EXT] " Harman Kalra
2020-07-01 15:46       ` Slava Ovsiienko
2020-07-01 15:36 ` [dpdk-dev] [PATCH 1/2] mbuf: introduce " Viacheslav Ovsiienko
2020-07-01 15:36   ` [dpdk-dev] [PATCH 2/2] app/testpmd: add send scheduling test capability Viacheslav Ovsiienko
2020-07-07 11:50   ` [dpdk-dev] [PATCH 1/2] mbuf: introduce accurate packet Tx scheduling Olivier Matz
2020-07-07 12:46     ` Slava Ovsiienko
2020-07-07 12:59 ` [dpdk-dev] [PATCH v2 " Viacheslav Ovsiienko
2020-07-07 12:59   ` [dpdk-dev] [PATCH v2 2/2] app/testpmd: add send scheduling test capability Viacheslav Ovsiienko
2020-07-07 13:08 ` [dpdk-dev] [PATCH v3 1/2] mbuf: introduce accurate packet Tx scheduling Viacheslav Ovsiienko
2020-07-07 13:08   ` [dpdk-dev] [PATCH v3 2/2] app/testpmd: add send scheduling test capability Viacheslav Ovsiienko
2020-07-07 14:32   ` [dpdk-dev] [PATCH v3 1/2] mbuf: introduce accurate packet Tx scheduling Olivier Matz
2020-07-07 14:57 ` [dpdk-dev] [PATCH v4 " Viacheslav Ovsiienko
2020-07-07 14:57   ` [dpdk-dev] [PATCH v4 2/2] app/testpmd: add send scheduling test capability Viacheslav Ovsiienko
2020-07-07 15:23   ` [dpdk-dev] [PATCH v4 1/2] mbuf: introduce accurate packet Tx scheduling Olivier Matz
2020-07-08 14:16   ` [dpdk-dev] [PATCH v4 1/2] mbuf: introduce accurate packet Txscheduling Morten Brørup
2020-07-08 14:54     ` Slava Ovsiienko
2020-07-08 15:27       ` Morten Brørup
2020-07-08 15:51         ` Slava Ovsiienko
2020-07-08 15:47 ` [dpdk-dev] [PATCH v5 1/2] mbuf: introduce accurate packet Tx scheduling Viacheslav Ovsiienko
2020-07-08 15:47   ` [dpdk-dev] [PATCH v5 2/2] app/testpmd: add send scheduling test capability Viacheslav Ovsiienko
2020-07-08 16:05   ` [dpdk-dev] [PATCH v5 1/2] mbuf: introduce accurate packet Tx scheduling Slava Ovsiienko
2020-07-09 12:26   ` Thomas Monjalon
2020-07-09 12:36 ` [dpdk-dev] [PATCH v6 " Viacheslav Ovsiienko
2020-07-09 12:36   ` [dpdk-dev] [PATCH v6 2/2] app/testpmd: add send scheduling test capability Viacheslav Ovsiienko
2020-07-09 23:58     ` Ferruh Yigit
2020-07-10 12:41       ` Slava Ovsiienko
2020-07-09 23:47   ` [dpdk-dev] [PATCH v6 1/2] mbuf: introduce accurate packet Tx scheduling Ferruh Yigit
2020-07-10 12:32     ` Slava Ovsiienko
2020-07-10 12:39 ` [dpdk-dev] [PATCH v7 " Viacheslav Ovsiienko
2020-07-10 12:39   ` [dpdk-dev] [PATCH v7 2/2] app/testpmd: add send scheduling test capability Viacheslav Ovsiienko
2020-07-10 15:46   ` [dpdk-dev] [PATCH v7 1/2] mbuf: introduce accurate packet Tx scheduling Slava Ovsiienko
2020-07-10 22:07     ` Ferruh Yigit
2020-07-15  6:21 ` [dpdk-dev] [PATCH v2 00/17] net/mlx5: " Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 01/17] common/mlx5: update common part to support packet pacing Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 02/17] net/mlx5: introduce send scheduling devargs Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 03/17] net/mlx5: fix UAR lock sharing for multiport devices Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 04/17] net/mlx5: introduce shared UAR resource Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 05/17] net/mlx5: create clock queue for packet pacing Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 06/17] net/mlx5: create rearm " Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 07/17] net/mlx5: create Tx queues with DevX Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 08/17] net/mlx5: allocate packet pacing context Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 09/17] net/mlx5: introduce clock queue service routine Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 10/17] net/mlx5: prepare Tx queue structures to support timestamp Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 11/17] net/mlx5: convert timestamp to completion index Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 12/17] net/mlx5: prepare Tx datapath to support sheduling Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 13/17] net/mlx5: add scheduling support to send routine template Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 14/17] net/mlx5: add read device clock support Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 15/17] net/mlx5: provide the send scheduling error statistics Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 16/17] common/mlx5: add register access DevX routine Viacheslav Ovsiienko
2020-07-15  6:21   ` [dpdk-dev] [PATCH v2 17/17] net/mlx5: convert Rx timestamps in realtime format Viacheslav Ovsiienko
2020-07-16  8:23 ` [dpdk-dev] [PATCH v3 00/17] net/mlx5: introduce accurate packet Tx scheduling Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 01/17] common/mlx5: update common part to support packet pacing Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 02/17] net/mlx5: introduce send scheduling devargs Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 03/17] net/mlx5: fix UAR lock sharing for multiport devices Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 04/17] net/mlx5: introduce shared UAR resource Viacheslav Ovsiienko
2020-07-16  8:23   ` Viacheslav Ovsiienko [this message]
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 06/17] net/mlx5: create rearm queue for packet pacing Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 07/17] net/mlx5: create Tx queues with DevX Viacheslav Ovsiienko
2020-07-20 14:18     ` Ferruh Yigit
2020-07-20 15:25       ` Ferruh Yigit
2020-07-21 11:35         ` Slava Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 08/17] net/mlx5: allocate packet pacing context Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 09/17] net/mlx5: introduce clock queue service routine Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 10/17] net/mlx5: prepare Tx queue structures to support timestamp Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 11/17] net/mlx5: convert timestamp to completion index Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 12/17] net/mlx5: prepare Tx datapath to support sheduling Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 13/17] net/mlx5: add scheduling support to send routine template Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 14/17] net/mlx5: add read device clock support Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 15/17] net/mlx5: provide the send scheduling error statistics Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 16/17] common/mlx5: add register access DevX routine Viacheslav Ovsiienko
2020-07-16  8:23   ` [dpdk-dev] [PATCH v3 17/17] net/mlx5: convert Rx timestamps in realtime format Viacheslav Ovsiienko
2020-07-16 20:20   ` [dpdk-dev] [PATCH v3 00/17] net/mlx5: introduce accurate packet Tx scheduling Raslan Darawsheh
2020-07-17 14:28 ` [dpdk-dev] [PATCH 1/3] net/mlx5: fix compilation issue with missing DevX event Viacheslav Ovsiienko
2020-07-17 14:28   ` [dpdk-dev] [PATCH 2/3] net/mlx5: fix compilation issue with atomic128 exchange Viacheslav Ovsiienko
2020-07-17 15:08     ` Thomas Monjalon
2020-07-17 15:15       ` Slava Ovsiienko
2020-07-17 14:28   ` [dpdk-dev] [PATCH 3/3] common/mlx5: fix DevX register access opcode Viacheslav Ovsiienko
2020-07-17 15:05     ` Thomas Monjalon
2020-07-17 15:11       ` Slava Ovsiienko
2020-07-17 15:19         ` Thomas Monjalon
2020-07-17 15:23           ` Slava Ovsiienko
2020-07-17 15:59             ` Thomas Monjalon
2020-07-18 13:38   ` [dpdk-dev] [PATCH 1/3] net/mlx5: fix compilation issue with missing DevX event Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1594887800-6563-6-git-send-email-viacheslavo@mellanox.com \
    --to=viacheslavo@mellanox.com \
    --cc=dev@dpdk.org \
    --cc=ferruh.yigit@intel.com \
    --cc=matan@mellanox.com \
    --cc=olivier.matz@6wind.com \
    --cc=rasland@mellanox.com \
    --cc=thomas@monjalon.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.