All of lore.kernel.org
 help / color / mirror / Atom feed
From: Suanming Mou <suanmingm@nvidia.com>
To: Matan Azrad <matan@nvidia.com>,
	Shahaf Shuler <shahafs@nvidia.com>,
	Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Cc: dev@dpdk.org
Subject: [dpdk-dev] [PATCH v2 5/8] net/mlx5: make three level table thread safe
Date: Tue, 20 Oct 2020 11:02:25 +0800	[thread overview]
Message-ID: <1603162949-150001-6-git-send-email-suanmingm@nvidia.com> (raw)
In-Reply-To: <1603162949-150001-1-git-send-email-suanmingm@nvidia.com>

This commit adds thread safety support in three level table using
spinlock and reference counter for each table entry.

An new mlx5_l3t_prepare_entry() function is added in order to support
multiple-thread operation.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/net/mlx5/mlx5_utils.c | 191 ++++++++++++++++++++++++++++++++++--------
 drivers/net/mlx5/mlx5_utils.h |  81 ++++++++++++++----
 2 files changed, 224 insertions(+), 48 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_utils.c b/drivers/net/mlx5/mlx5_utils.c
index fefe833..9a54fda 100644
--- a/drivers/net/mlx5/mlx5_utils.c
+++ b/drivers/net/mlx5/mlx5_utils.c
@@ -551,26 +551,23 @@ struct mlx5_l3t_tbl *
 	tbl->type = type;
 	switch (type) {
 	case MLX5_L3T_TYPE_WORD:
-		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word) +
-				  sizeof(uint16_t) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_word);
 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_w";
 		break;
 	case MLX5_L3T_TYPE_DWORD:
-		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword) +
-				  sizeof(uint32_t) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_dword);
 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_dw";
 		break;
 	case MLX5_L3T_TYPE_QWORD:
-		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword) +
-				  sizeof(uint64_t) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_qword);
 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_qw";
 		break;
 	default:
-		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr) +
-				  sizeof(void *) * MLX5_L3T_ET_SIZE;
+		l3t_ip_cfg.size = sizeof(struct mlx5_l3t_entry_ptr);
 		l3t_ip_cfg.type = "mlx5_l3t_e_tbl_tpr";
 		break;
 	}
+	rte_spinlock_init(&tbl->sl);
 	tbl->eip = mlx5_ipool_create(&l3t_ip_cfg);
 	if (!tbl->eip) {
 		rte_errno = ENOMEM;
@@ -620,11 +617,15 @@ struct mlx5_l3t_tbl *
 	mlx5_free(tbl);
 }
 
-uint32_t
-mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
-		   union mlx5_l3t_data *data)
+static int32_t
+__l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		union mlx5_l3t_data *data)
 {
 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
+	struct mlx5_l3t_entry_word *w_e_tbl;
+	struct mlx5_l3t_entry_dword *dw_e_tbl;
+	struct mlx5_l3t_entry_qword *qw_e_tbl;
+	struct mlx5_l3t_entry_ptr *ptr_e_tbl;
 	void *e_tbl;
 	uint32_t entry_idx;
 
@@ -640,26 +641,46 @@ struct mlx5_l3t_tbl *
 	entry_idx = idx & MLX5_L3T_ET_MASK;
 	switch (tbl->type) {
 	case MLX5_L3T_TYPE_WORD:
-		data->word = ((struct mlx5_l3t_entry_word *)e_tbl)->entry
-			     [entry_idx];
+		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
+		data->word = w_e_tbl->entry[entry_idx].data;
+		if (w_e_tbl->entry[entry_idx].data)
+			w_e_tbl->entry[entry_idx].ref_cnt++;
 		break;
 	case MLX5_L3T_TYPE_DWORD:
-		data->dword = ((struct mlx5_l3t_entry_dword *)e_tbl)->entry
-			     [entry_idx];
+		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
+		data->dword = dw_e_tbl->entry[entry_idx].data;
+		if (dw_e_tbl->entry[entry_idx].data)
+			dw_e_tbl->entry[entry_idx].ref_cnt++;
 		break;
 	case MLX5_L3T_TYPE_QWORD:
-		data->qword = ((struct mlx5_l3t_entry_qword *)e_tbl)->entry
-			      [entry_idx];
+		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
+		data->qword = qw_e_tbl->entry[entry_idx].data;
+		if (qw_e_tbl->entry[entry_idx].data)
+			qw_e_tbl->entry[entry_idx].ref_cnt++;
 		break;
 	default:
-		data->ptr = ((struct mlx5_l3t_entry_ptr *)e_tbl)->entry
-			    [entry_idx];
+		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
+		data->ptr = ptr_e_tbl->entry[entry_idx].data;
+		if (ptr_e_tbl->entry[entry_idx].data)
+			ptr_e_tbl->entry[entry_idx].ref_cnt++;
 		break;
 	}
 	return 0;
 }
 
-void
+int32_t
+mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		   union mlx5_l3t_data *data)
+{
+	int ret;
+
+	rte_spinlock_lock(&tbl->sl);
+	ret = __l3t_get_entry(tbl, idx, data);
+	rte_spinlock_unlock(&tbl->sl);
+	return ret;
+}
+
+int32_t
 mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx)
 {
 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
@@ -670,36 +691,54 @@ struct mlx5_l3t_tbl *
 	void *e_tbl;
 	uint32_t entry_idx;
 	uint64_t ref_cnt;
+	int32_t ret = -1;
 
+	rte_spinlock_lock(&tbl->sl);
 	g_tbl = tbl->tbl;
 	if (!g_tbl)
-		return;
+		goto out;
 	m_tbl = g_tbl->tbl[(idx >> MLX5_L3T_GT_OFFSET) & MLX5_L3T_GT_MASK];
 	if (!m_tbl)
-		return;
+		goto out;
 	e_tbl = m_tbl->tbl[(idx >> MLX5_L3T_MT_OFFSET) & MLX5_L3T_MT_MASK];
 	if (!e_tbl)
-		return;
+		goto out;
 	entry_idx = idx & MLX5_L3T_ET_MASK;
 	switch (tbl->type) {
 	case MLX5_L3T_TYPE_WORD:
 		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
-		w_e_tbl->entry[entry_idx] = 0;
+		MLX5_ASSERT(w_e_tbl->entry[entry_idx].ref_cnt);
+		ret = --w_e_tbl->entry[entry_idx].ref_cnt;
+		if (ret)
+			goto out;
+		w_e_tbl->entry[entry_idx].data = 0;
 		ref_cnt = --w_e_tbl->ref_cnt;
 		break;
 	case MLX5_L3T_TYPE_DWORD:
 		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
-		dw_e_tbl->entry[entry_idx] = 0;
+		MLX5_ASSERT(dw_e_tbl->entry[entry_idx].ref_cnt);
+		ret = --dw_e_tbl->entry[entry_idx].ref_cnt;
+		if (ret)
+			goto out;
+		dw_e_tbl->entry[entry_idx].data = 0;
 		ref_cnt = --dw_e_tbl->ref_cnt;
 		break;
 	case MLX5_L3T_TYPE_QWORD:
 		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
-		qw_e_tbl->entry[entry_idx] = 0;
+		MLX5_ASSERT(qw_e_tbl->entry[entry_idx].ref_cnt);
+		ret = --qw_e_tbl->entry[entry_idx].ref_cnt;
+		if (ret)
+			goto out;
+		qw_e_tbl->entry[entry_idx].data = 0;
 		ref_cnt = --qw_e_tbl->ref_cnt;
 		break;
 	default:
 		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
-		ptr_e_tbl->entry[entry_idx] = NULL;
+		MLX5_ASSERT(ptr_e_tbl->entry[entry_idx].ref_cnt);
+		ret = --ptr_e_tbl->entry[entry_idx].ref_cnt;
+		if (ret)
+			goto out;
+		ptr_e_tbl->entry[entry_idx].data = NULL;
 		ref_cnt = --ptr_e_tbl->ref_cnt;
 		break;
 	}
@@ -718,11 +757,14 @@ struct mlx5_l3t_tbl *
 			}
 		}
 	}
+out:
+	rte_spinlock_unlock(&tbl->sl);
+	return ret;
 }
 
-uint32_t
-mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
-		   union mlx5_l3t_data *data)
+static int32_t
+__l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		union mlx5_l3t_data *data)
 {
 	struct mlx5_l3t_level_tbl *g_tbl, *m_tbl;
 	struct mlx5_l3t_entry_word *w_e_tbl;
@@ -783,24 +825,105 @@ struct mlx5_l3t_tbl *
 	switch (tbl->type) {
 	case MLX5_L3T_TYPE_WORD:
 		w_e_tbl = (struct mlx5_l3t_entry_word *)e_tbl;
-		w_e_tbl->entry[entry_idx] = data->word;
+		if (w_e_tbl->entry[entry_idx].data) {
+			data->word = w_e_tbl->entry[entry_idx].data;
+			w_e_tbl->entry[entry_idx].ref_cnt++;
+			rte_errno = EEXIST;
+			return -1;
+		}
+		w_e_tbl->entry[entry_idx].data = data->word;
+		w_e_tbl->entry[entry_idx].ref_cnt = 1;
 		w_e_tbl->ref_cnt++;
 		break;
 	case MLX5_L3T_TYPE_DWORD:
 		dw_e_tbl = (struct mlx5_l3t_entry_dword *)e_tbl;
-		dw_e_tbl->entry[entry_idx] = data->dword;
+		if (dw_e_tbl->entry[entry_idx].data) {
+			data->dword = dw_e_tbl->entry[entry_idx].data;
+			dw_e_tbl->entry[entry_idx].ref_cnt++;
+			rte_errno = EEXIST;
+			return -1;
+		}
+		dw_e_tbl->entry[entry_idx].data = data->dword;
+		dw_e_tbl->entry[entry_idx].ref_cnt = 1;
 		dw_e_tbl->ref_cnt++;
 		break;
 	case MLX5_L3T_TYPE_QWORD:
 		qw_e_tbl = (struct mlx5_l3t_entry_qword *)e_tbl;
-		qw_e_tbl->entry[entry_idx] = data->qword;
+		if (qw_e_tbl->entry[entry_idx].data) {
+			data->qword = qw_e_tbl->entry[entry_idx].data;
+			qw_e_tbl->entry[entry_idx].ref_cnt++;
+			rte_errno = EEXIST;
+			return -1;
+		}
+		qw_e_tbl->entry[entry_idx].data = data->qword;
+		qw_e_tbl->entry[entry_idx].ref_cnt = 1;
 		qw_e_tbl->ref_cnt++;
 		break;
 	default:
 		ptr_e_tbl = (struct mlx5_l3t_entry_ptr *)e_tbl;
-		ptr_e_tbl->entry[entry_idx] = data->ptr;
+		if (ptr_e_tbl->entry[entry_idx].data) {
+			data->ptr = ptr_e_tbl->entry[entry_idx].data;
+			ptr_e_tbl->entry[entry_idx].ref_cnt++;
+			rte_errno = EEXIST;
+			return -1;
+		}
+		ptr_e_tbl->entry[entry_idx].data = data->ptr;
+		ptr_e_tbl->entry[entry_idx].ref_cnt = 1;
 		ptr_e_tbl->ref_cnt++;
 		break;
 	}
 	return 0;
 }
+
+int32_t
+mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		   union mlx5_l3t_data *data)
+{
+	int ret;
+
+	rte_spinlock_lock(&tbl->sl);
+	ret = __l3t_set_entry(tbl, idx, data);
+	rte_spinlock_unlock(&tbl->sl);
+	return ret;
+}
+
+int32_t
+mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+		       union mlx5_l3t_data *data,
+		       mlx5_l3t_alloc_callback_fn cb, void *ctx)
+{
+	int32_t ret;
+
+	rte_spinlock_lock(&tbl->sl);
+	/* Check if entry data is ready. */
+	ret = __l3t_get_entry(tbl, idx, data);
+	if (!ret) {
+		switch (tbl->type) {
+		case MLX5_L3T_TYPE_WORD:
+			if (data->word)
+				goto out;
+			break;
+		case MLX5_L3T_TYPE_DWORD:
+			if (data->dword)
+				goto out;
+			break;
+		case MLX5_L3T_TYPE_QWORD:
+			if (data->qword)
+				goto out;
+			break;
+		default:
+			if (data->ptr)
+				goto out;
+			break;
+		}
+	}
+	/* Entry data is not ready, use user callback to create it. */
+	ret = cb(ctx, data);
+	if (ret)
+		goto out;
+	/* Save the new allocated data to entry. */
+	ret = __l3t_set_entry(tbl, idx, data);
+out:
+	rte_spinlock_unlock(&tbl->sl);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index f078bdc..ca9bb76 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -118,29 +118,41 @@ struct mlx5_l3t_level_tbl {
 struct mlx5_l3t_entry_word {
 	uint32_t idx; /* Table index. */
 	uint64_t ref_cnt; /* Table ref_cnt. */
-	uint16_t entry[]; /* Entry array. */
-};
+	struct {
+		uint16_t data;
+		uint32_t ref_cnt;
+	} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
+} __rte_packed;
 
 /* L3 double word entry table data structure. */
 struct mlx5_l3t_entry_dword {
 	uint32_t idx; /* Table index. */
 	uint64_t ref_cnt; /* Table ref_cnt. */
-	uint32_t entry[]; /* Entry array. */
-};
+	struct {
+		uint32_t data;
+		int32_t ref_cnt;
+	} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
+} __rte_packed;
 
 /* L3 quad word entry table data structure. */
 struct mlx5_l3t_entry_qword {
 	uint32_t idx; /* Table index. */
 	uint64_t ref_cnt; /* Table ref_cnt. */
-	uint64_t entry[]; /* Entry array. */
-};
+	struct {
+		uint64_t data;
+		uint32_t ref_cnt;
+	} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
+} __rte_packed;
 
 /* L3 pointer entry table data structure. */
 struct mlx5_l3t_entry_ptr {
 	uint32_t idx; /* Table index. */
 	uint64_t ref_cnt; /* Table ref_cnt. */
-	void *entry[]; /* Entry array. */
-};
+	struct {
+		void *data;
+		uint32_t ref_cnt;
+	} entry[MLX5_L3T_ET_SIZE]; /* Entry array */
+} __rte_packed;
 
 /* L3 table data structure. */
 struct mlx5_l3t_tbl {
@@ -148,8 +160,13 @@ struct mlx5_l3t_tbl {
 	struct mlx5_indexed_pool *eip;
 	/* Table index pool handles. */
 	struct mlx5_l3t_level_tbl *tbl; /* Global table index. */
+	rte_spinlock_t sl; /* The table lock. */
 };
 
+/** Type of function that is used to handle the data before freeing. */
+typedef int32_t (*mlx5_l3t_alloc_callback_fn)(void *ctx,
+					   union mlx5_l3t_data *data);
+
 /*
  * The indexed memory entry index is made up of trunk index and offset of
  * the entry in the trunk. Since the entry index is 32 bits, in case user
@@ -535,32 +552,68 @@ struct mlx5_indexed_pool *
  *   0 if success, -1 on error.
  */
 
-uint32_t mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+int32_t mlx5_l3t_get_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
 			    union mlx5_l3t_data *data);
+
 /**
- * This function clears the index entry from Three-level table.
+ * This function gets the index entry from Three-level table.
+ *
+ * If the index entry is not available, allocate new one by callback
+ * function and fill in the entry.
  *
  * @param tbl
  *   Pointer to the l3t.
  * @param idx
  *   Index to the entry.
+ * @param data
+ *   Pointer to the memory which saves the entry data.
+ *   When function call returns 0, data contains the entry data get from
+ *   l3t.
+ *   When function call returns -1, data is not modified.
+ * @param cb
+ *   Callback function to allocate new data.
+ * @param ctx
+ *   Context for callback function.
+ *
+ * @return
+ *   0 if success, -1 on error.
  */
-void mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx);
+
+int32_t mlx5_l3t_prepare_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+			       union mlx5_l3t_data *data,
+			       mlx5_l3t_alloc_callback_fn cb, void *ctx);
 
 /**
- * This function gets the index entry from Three-level table.
+ * This function decreases and clear index entry if reference
+ * counter is 0 from Three-level table.
  *
  * @param tbl
  *   Pointer to the l3t.
  * @param idx
  *   Index to the entry.
- * @param data
+ *
+ * @return
+ *   The remaining reference count, 0 means entry be cleared, -1 on error.
+ */
+int32_t mlx5_l3t_clear_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx);
+
+/**
+ * This function sets the index entry to Three-level table.
+ * If the entry is already set, the EEXIST errno will be given, and
+ * the set data will be filled to the data.
+ *
+ * @param tbl[in]
+ *   Pointer to the l3t.
+ * @param idx[in]
+ *   Index to the entry.
+ * @param data[in/out]
  *   Pointer to the memory which contains the entry data save to l3t.
+ *   If the entry is already set, the set data will be filled.
  *
  * @return
  *   0 if success, -1 on error.
  */
-uint32_t mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
+int32_t mlx5_l3t_set_entry(struct mlx5_l3t_tbl *tbl, uint32_t idx,
 			    union mlx5_l3t_data *data);
 
 /*
-- 
1.8.3.1


  parent reply	other threads:[~2020-10-20  3:03 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-06 11:38 [dpdk-dev] [PATCH 0/6] net/mlx5: make counter thread safe Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 1/6] net/mlx5: locate aging pools in the general container Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 2/6] net/mlx5: optimize shared counter memory Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 3/6] net/mlx5: remove single counter container Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 4/6] net/mlx5: synchronize flow counter pool creation Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 5/6] net/mlx5: make three level table thread safe Suanming Mou
2020-10-06 11:38 ` [dpdk-dev] [PATCH 6/6] net/mlx5: make shared counters " Suanming Mou
2020-10-20  3:02 ` [dpdk-dev] [PATCH v2 0/8] net/mlx5: make counter " Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 1/8] net/mlx5: locate aging pools in the general container Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 2/8] net/mlx5: optimize shared counter memory Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 3/8] net/mlx5: remove single counter container Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 4/8] net/mlx5: synchronize flow counter pool creation Suanming Mou
2020-10-20  3:02   ` Suanming Mou [this message]
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 6/8] net/mlx5: make shared counters thread safe Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 7/8] net/mlx5: rename flow counter macro Suanming Mou
2020-10-20  3:02   ` [dpdk-dev] [PATCH v2 8/8] net/mlx5: optimize counter extend memory Suanming Mou
2020-10-20 22:59   ` [dpdk-dev] [PATCH v2 0/8] net/mlx5: make counter thread safe Raslan Darawsheh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1603162949-150001-6-git-send-email-suanmingm@nvidia.com \
    --to=suanmingm@nvidia.com \
    --cc=dev@dpdk.org \
    --cc=matan@nvidia.com \
    --cc=shahafs@nvidia.com \
    --cc=viacheslavo@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.