All of lore.kernel.org
 help / color / mirror / Atom feed
From: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
To: santosh.shilimkar@oracle.com, davem@davemloft.net,
	kuba@kernel.org, netdev@vger.kernel.org,
	linux-rdma@vger.kernel.org, rds-devel@oss.oracle.com,
	linux-kernel@vger.kernel.org
Cc: rama.nichanamatlu@oracle.com,
	rajesh.sivaramasubramaniom@oracle.com,
	Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
Subject: [PATCH RFC] rds: ib: Reduce the contention caused by the asynchronous workers to flush the mr pool
Date: Tue, 18 Jan 2022 14:47:18 +0000	[thread overview]
Message-ID: <1642517238-9912-1-git-send-email-praveen.kannoju@oracle.com> (raw)

This patch aims to reduce the number of asynchronous workers being spawned
to execute the function "rds_ib_flush_mr_pool" during the high I/O
situations. Synchronous call path's to this function "rds_ib_flush_mr_pool"
will be executed without being disturbed. By reducing the number of
processes contending to flush the mr pool, the total number of D state
processes waiting to acquire the mutex lock will be greatly reduced, which
otherwise were causing DB instance crash as the corresponding processes
were not progressing while waiting to acquire the mutex lock.

Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
---
 net/rds/ib.h       |  1 +
 net/rds/ib_mr.h    |  2 ++
 net/rds/ib_rdma.c  | 18 ++++++++++++++++--
 net/rds/ib_stats.c |  1 +
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/net/rds/ib.h b/net/rds/ib.h
index 2ba7110..d881e3f 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -308,6 +308,7 @@ struct rds_ib_statistics {
 	uint64_t	s_ib_rdma_mr_1m_pool_flush;
 	uint64_t	s_ib_rdma_mr_1m_pool_wait;
 	uint64_t	s_ib_rdma_mr_1m_pool_depleted;
+	uint64_t	s_ib_rdma_flush_mr_pool_avoided;
 	uint64_t	s_ib_rdma_mr_8k_reused;
 	uint64_t	s_ib_rdma_mr_1m_reused;
 	uint64_t	s_ib_atomic_cswp;
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index ea5e9ae..9cbec6e 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -105,6 +105,8 @@ struct rds_ib_mr_pool {
 	unsigned long		max_items_soft;
 	unsigned long		max_free_pinned;
 	unsigned int		max_pages;
+
+	bool                    flush_ongoing;	/* To avoid redundant flushes */
 };
 
 extern struct workqueue_struct *rds_ib_mr_wq;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 8f070ee..6b640b5 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -393,6 +393,8 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
 	 */
 	dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list);
 	dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list);
+	WRITE_ONCE(pool->flush_ongoing, true);
+	smp_wmb();
 	if (free_all) {
 		unsigned long flags;
 
@@ -430,6 +432,8 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
 	atomic_sub(nfreed, &pool->item_count);
 
 out:
+	WRITE_ONCE(pool->flush_ongoing, false);
+	smp_wmb();
 	mutex_unlock(&pool->flush_lock);
 	if (waitqueue_active(&pool->flush_wait))
 		wake_up(&pool->flush_wait);
@@ -507,8 +511,17 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
 
 	/* If we've pinned too many pages, request a flush */
 	if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
-	    atomic_read(&pool->dirty_count) >= pool->max_items / 5)
-		queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
+	    atomic_read(&pool->dirty_count) >= pool->max_items / 5) {
+		smp_rmb();
+		if (!READ_ONCE(pool->flush_ongoing)) {
+			queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
+		} else {
+			/* This counter indicates the number of redundant
+			 * flush calls avoided, and provides an indication
+			 * of the load pattern imposed on kernel.
+			 */
+			rds_ib_stats_inc(s_ib_rdma_flush_mr_pool_avoided);
+		}
 
 	if (invalidate) {
 		if (likely(!in_interrupt())) {
@@ -670,6 +683,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
 
 	pool->max_free_pinned = pool->max_items * pool->max_pages / 4;
 	pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4;
+	pool->flush_ongoing = false;
 
 	return pool;
 }
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index ac46d89..29ae5cb 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -75,6 +75,7 @@
 	"ib_rdma_mr_1m_pool_flush",
 	"ib_rdma_mr_1m_pool_wait",
 	"ib_rdma_mr_1m_pool_depleted",
+	"ib_rdma_flush_mr_pool_avoided",
 	"ib_rdma_mr_8k_reused",
 	"ib_rdma_mr_1m_reused",
 	"ib_atomic_cswp",
-- 
1.8.3.1


             reply	other threads:[~2022-01-18 14:47 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-18 14:47 Praveen Kumar Kannoju [this message]
2022-01-18 16:48 ` [PATCH RFC] rds: ib: Reduce the contention caused by the asynchronous workers to flush the mr pool Santosh Shilimkar
2022-01-18 18:00   ` Leon Romanovsky
2022-01-18 19:17   ` Jason Gunthorpe
2022-01-18 19:42     ` Santosh Shilimkar
2022-01-19  6:59       ` Leon Romanovsky
2022-01-19 11:46         ` Praveen Kannoju
2022-01-19 13:04           ` Jason Gunthorpe
2022-01-19 13:12             ` Praveen Kannoju
2022-01-19 13:17               ` Jason Gunthorpe
2022-01-19 14:08                 ` Praveen Kannoju
2022-01-19 14:49                   ` Jason Gunthorpe
2022-01-19 14:56           ` Leon Romanovsky
2022-01-20  8:00             ` Praveen Kannoju
2022-01-20 11:11               ` Leon Romanovsky
2022-01-20 11:57                 ` Praveen Kannoju
2022-01-20 12:21                   ` Leon Romanovsky
2022-01-20 12:27                     ` Praveen Kannoju
  -- strict thread matches above, loose matches on Subject: below --
2022-01-18 13:10 Praveen Kumar Kannoju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1642517238-9912-1-git-send-email-praveen.kannoju@oracle.com \
    --to=praveen.kannoju@oracle.com \
    --cc=davem@davemloft.net \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=rajesh.sivaramasubramaniom@oracle.com \
    --cc=rama.nichanamatlu@oracle.com \
    --cc=rds-devel@oss.oracle.com \
    --cc=santosh.shilimkar@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.