All of lore.kernel.org
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeedm@mellanox.com>
To: "David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org, Eran Ben Elisha <eranbe@mellanox.com>,
	Mark Bloch <markb@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>
Subject: [net-next 01/13] net/mlx5: Hold pages RB tree per VF
Date: Tue, 28 Jul 2020 02:43:59 -0700	[thread overview]
Message-ID: <20200728094411.116386-2-saeedm@mellanox.com> (raw)
In-Reply-To: <20200728094411.116386-1-saeedm@mellanox.com>

From: Eran Ben Elisha <eranbe@mellanox.com>

Per page request event, FW request to allocated or release pages for a
single function. Driver maintains FW pages object per function, so there
is no need to hold one global page data-base. Instead, have a page
data-base per function, which will improve performance release flow in all
cases, especially for "release all pages".

As the range of function IDs is large and not sequential, use xarray to
store a per function ID page data-base, where the function ID is the key.

Upon first allocation of a page to a function ID, create the page
data-base per function. This data-base will be released only at pagealloc
mechanism cleanup.

NIC: ConnectX-4 Lx
CPU: Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz
Test case: 32 VFs, measure release pages on one VF as part of FLR
Before: 0.021 Sec
After:  0.014 Sec

The improvement depends on amount of VFs and memory utilization
by them. Time measurements above were taken from idle system.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/pagealloc.c   | 142 +++++++++++++-----
 include/linux/mlx5/driver.h                   |   2 +-
 2 files changed, 105 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 5ddd18639a1ee..1b20e3397ddef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -35,6 +35,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/mlx5/driver.h>
+#include <linux/xarray.h>
 #include "mlx5_core.h"
 #include "lib/eq.h"
 
@@ -73,15 +74,45 @@ enum {
 	MLX5_NUM_4K_IN_PAGE		= PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
 };
 
+static struct rb_root *page_root_per_func_id(struct mlx5_core_dev *dev, u16 func_id)
+{
+	struct rb_root *root;
+	int err;
+
+	root = xa_load(&dev->priv.page_root_xa, func_id);
+	if (root)
+		return root;
+
+	root = kzalloc(sizeof(*root), GFP_KERNEL);
+	if (!root)
+		return ERR_PTR(-ENOMEM);
+
+	err = xa_insert(&dev->priv.page_root_xa, func_id, root, GFP_KERNEL);
+	if (err) {
+		kfree(root);
+		return ERR_PTR(err);
+	}
+
+	*root = RB_ROOT;
+
+	return root;
+}
+
 static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
 {
-	struct rb_root *root = &dev->priv.page_root;
-	struct rb_node **new = &root->rb_node;
 	struct rb_node *parent = NULL;
+	struct rb_root *root;
+	struct rb_node **new;
 	struct fw_page *nfp;
 	struct fw_page *tfp;
 	int i;
 
+	root = page_root_per_func_id(dev, func_id);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+
+	new = &root->rb_node;
+
 	while (*new) {
 		parent = *new;
 		tfp = rb_entry(parent, struct fw_page, rb_node);
@@ -111,13 +142,20 @@ static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u
 	return 0;
 }
 
-static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
+static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr,
+				    u32 func_id)
 {
-	struct rb_root *root = &dev->priv.page_root;
-	struct rb_node *tmp = root->rb_node;
 	struct fw_page *result = NULL;
+	struct rb_root *root;
+	struct rb_node *tmp;
 	struct fw_page *tfp;
 
+	root = xa_load(&dev->priv.page_root_xa, func_id);
+	if (WARN_ON_ONCE(!root))
+		return NULL;
+
+	tmp = root->rb_node;
+
 	while (tmp) {
 		tfp = rb_entry(tmp, struct fw_page, rb_node);
 		if (tfp->addr < addr) {
@@ -191,7 +229,13 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id)
 static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
 		     bool in_free_list)
 {
-	rb_erase(&fwp->rb_node, &dev->priv.page_root);
+	struct rb_root *root;
+
+	root = xa_load(&dev->priv.page_root_xa, fwp->func_id);
+	if (WARN_ON_ONCE(!root))
+		return;
+
+	rb_erase(&fwp->rb_node, root);
 	if (in_free_list)
 		list_del(&fwp->list);
 	dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK,
@@ -200,12 +244,12 @@ static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
 	kfree(fwp);
 }
 
-static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 func_id)
 {
 	struct fw_page *fwp;
 	int n;
 
-	fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
+	fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK, func_id);
 	if (!fwp) {
 		mlx5_core_warn_rl(dev, "page not found\n");
 		return;
@@ -340,7 +384,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 
 out_4k:
 	for (i--; i >= 0; i--)
-		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
+		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]), func_id);
 out_free:
 	kvfree(in);
 	if (notify_fail)
@@ -351,16 +395,19 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id,
 			      bool ec_function)
 {
+	struct rb_root *root;
 	struct rb_node *p;
 	int npages = 0;
 
-	p = rb_first(&dev->priv.page_root);
+	root = xa_load(&dev->priv.page_root_xa, func_id);
+	if (WARN_ON_ONCE(!root))
+		return;
+
+	p = rb_first(root);
 	while (p) {
 		struct fw_page *fwp = rb_entry(p, struct fw_page, rb_node);
 
 		p = rb_next(p);
-		if (fwp->func_id != func_id)
-			continue;
 		npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count);
 		free_fwp(dev, fwp, fwp->free_count);
 	}
@@ -378,6 +425,7 @@ static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id,
 static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 			     u32 *in, int in_size, u32 *out, int out_size)
 {
+	struct rb_root *root;
 	struct fw_page *fwp;
 	struct rb_node *p;
 	u32 func_id;
@@ -391,12 +439,14 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 	npages = MLX5_GET(manage_pages_in, in, input_num_entries);
 	func_id = MLX5_GET(manage_pages_in, in, function_id);
 
-	p = rb_first(&dev->priv.page_root);
+	root = xa_load(&dev->priv.page_root_xa, func_id);
+	if (WARN_ON_ONCE(!root))
+		return -EEXIST;
+
+	p = rb_first(root);
 	while (p && i < npages) {
 		fwp = rb_entry(p, struct fw_page, rb_node);
 		p = rb_next(p);
-		if (fwp->func_id != func_id)
-			continue;
 
 		MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->addr);
 		i++;
@@ -446,7 +496,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	}
 
 	for (i = 0; i < num_claimed; i++)
-		free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
+		free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]), func_id);
 
 	if (nclaimed)
 		*nclaimed = num_claimed;
@@ -560,35 +610,49 @@ static int optimal_reclaimed_pages(void)
 	return ret;
 }
 
-int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
+				   struct rb_root *root, u16 func_id)
 {
 	unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
-	struct fw_page *fwp;
-	struct rb_node *p;
-	int nclaimed = 0;
-	int err = 0;
 
-	do {
-		p = rb_first(&dev->priv.page_root);
-		if (p) {
-			fwp = rb_entry(p, struct fw_page, rb_node);
-			err = reclaim_pages(dev, fwp->func_id,
-					    optimal_reclaimed_pages(),
-					    &nclaimed, mlx5_core_is_ecpf(dev));
-
-			if (err) {
-				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
-					       err);
-				return err;
-			}
-			if (nclaimed)
-				end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+	while (!RB_EMPTY_ROOT(root)) {
+		int nclaimed;
+		int err;
+
+		err = reclaim_pages(dev, func_id, optimal_reclaimed_pages(),
+				    &nclaimed, mlx5_core_is_ecpf(dev));
+		if (err) {
+			mlx5_core_warn(dev, "failed reclaiming pages (%d) for func id 0x%x\n",
+				       err, func_id);
+			return err;
 		}
+
+		if (nclaimed)
+			end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
+
 		if (time_after(jiffies, end)) {
 			mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
 			break;
 		}
-	} while (p);
+	}
+
+	return 0;
+}
+
+int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
+{
+	struct rb_root *root;
+	unsigned long id;
+	void *entry;
+
+	xa_for_each(&dev->priv.page_root_xa, id, entry) {
+		root = entry;
+		mlx5_reclaim_root_pages(dev, root, id);
+		xa_erase(&dev->priv.page_root_xa, id);
+		kfree(root);
+	}
+
+	WARN_ON(!xa_empty(&dev->priv.page_root_xa));
 
 	WARN(dev->priv.fw_pages,
 	     "FW pages counter is %d after reclaiming all pages\n",
@@ -605,17 +669,19 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 
 int mlx5_pagealloc_init(struct mlx5_core_dev *dev)
 {
-	dev->priv.page_root = RB_ROOT;
 	INIT_LIST_HEAD(&dev->priv.free_list);
 	dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
 	if (!dev->priv.pg_wq)
 		return -ENOMEM;
 
+	xa_init(&dev->priv.page_root_xa);
+
 	return 0;
 }
 
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
 {
+	xa_destroy(&dev->priv.page_root_xa);
 	destroy_workqueue(dev->priv.pg_wq);
 }
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6a97ad601991e..a0fcc4d13e93a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -541,7 +541,7 @@ struct mlx5_priv {
 	/* pages stuff */
 	struct mlx5_nb          pg_nb;
 	struct workqueue_struct *pg_wq;
-	struct rb_root		page_root;
+	struct xarray           page_root_xa;
 	int			fw_pages;
 	atomic_t		reg_pages;
 	struct list_head	free_list;
-- 
2.26.2


  reply	other threads:[~2020-07-28  9:44 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-28  9:43 [pull request][net-next 00/13] mlx5 updates 2020-07-28 Saeed Mahameed
2020-07-28  9:43 ` Saeed Mahameed [this message]
2020-07-28  9:44 ` [net-next 02/13] net/mlx5: Add function ID to reclaim pages debug log Saeed Mahameed
2020-07-28  9:44 ` [net-next 03/13] net/mlx5: E-switch, Consider maximum vf vports for steering init Saeed Mahameed
2020-07-28  9:44 ` [net-next 04/13] net/mlx5: E-switch, Reuse total_vports and avoid duplicate nvports Saeed Mahameed
2020-07-28  9:44 ` [net-next 05/13] net/mlx5: E-switch, Use eswitch total_vports Saeed Mahameed
2020-07-28  9:44 ` [net-next 06/13] net/mlx5e: Link non uplink representors to PCI device Saeed Mahameed
2020-07-28 20:26   ` Jakub Kicinski
2020-07-28  9:44 ` [net-next 07/13] net/mlx5e: RX, Re-work initializaiton of RX function pointers Saeed Mahameed
2020-07-28  9:44 ` [net-next 08/13] net/mlx5e: Move exposure of datapath function to txrx header Saeed Mahameed
2020-07-28  9:44 ` [net-next 09/13] net/mlx5e: Use indirect call wrappers for RX post WQEs functions Saeed Mahameed
2020-07-28  9:44 ` [net-next 10/13] net/mlx5e: Add support for PCI relaxed ordering Saeed Mahameed
2020-07-28  9:44 ` [net-next 11/13] net/mlx5: DR, Reduce print level for matcher print Saeed Mahameed
2020-07-28  9:44 ` [net-next 12/13] net/mlx5: Use fallthrough pseudo-keyword Saeed Mahameed
2020-07-28  9:44 ` [net-next 13/13] net/mlx5: drop unnecessary list_empty Saeed Mahameed
2020-07-28 20:23 ` [pull request][net-next 00/13] mlx5 updates 2020-07-28 David Miller
2020-07-28 20:27 ` Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200728094411.116386-2-saeedm@mellanox.com \
    --to=saeedm@mellanox.com \
    --cc=davem@davemloft.net \
    --cc=eranbe@mellanox.com \
    --cc=kuba@kernel.org \
    --cc=markb@mellanox.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.