All of lore.kernel.org
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeedm@mellanox.com>
To: "David S. Miller" <davem@davemloft.net>,
	Jason Gunthorpe <jgg@mellanox.com>,
	Doug Ledford <dledford@redhat.com>
Cc: Michael Chan <michael.chan@broadcom.com>,
	Andy Gospodarek <andy@greyhouse.net>,
	Tal Gilboa <talgi@mellanox.com>,
	"linux-rdma@vger.kernel.org" <linux-rdma@vger.kernel.org>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	Yamin Friedman <yaminf@mellanox.com>,
	Max Gurtovoy <maxg@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>
Subject: [for-next 8/9] linux/dim: Implement rdma_dim
Date: Wed, 5 Jun 2019 23:24:50 +0000	[thread overview]
Message-ID: <20190605232348.6452-9-saeedm@mellanox.com> (raw)
In-Reply-To: <20190605232348.6452-1-saeedm@mellanox.com>

From: Yamin Friedman <yaminf@mellanox.com>

rdma_dim implements a different algorithm than net_dim and is based on
completions which is how we can implement interrupt moderation in RDMA.
The algorithm optimizes for number of completions and ratio between
completions and events.
It also has a feature for fast reduction of moderation level when the
traffic changes in such a way as to no longer require high moderation in
order to avoid long latencies.

rdma_dim.h will be called from the ib_core module.

Signed-off-by: Yamin Friedman <yaminf@mellanox.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 MAINTAINERS              |   1 +
 include/linux/rdma_dim.h |  28 +++++++
 lib/dim/Makefile         |   7 +-
 lib/dim/rdma_dim.c       | 162 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/rdma_dim.h
 create mode 100644 lib/dim/rdma_dim.c

diff --git a/MAINTAINERS b/MAINTAINERS
index cb621d5cf223..86e4698ab390 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5589,6 +5589,7 @@ DYNAMIC INTERRUPT MODERATION
 M:	Tal Gilboa <talgi@mellanox.com>
 S:	Maintained
 F:	include/linux/net_dim.h
+F:	include/linux/rdma_dim.h
 F:	include/linux/dim.h
 F:	lib/dim/
 
diff --git a/include/linux/rdma_dim.h b/include/linux/rdma_dim.h
new file mode 100644
index 000000000000..0623ea5a1e78
--- /dev/null
+++ b/include/linux/rdma_dim.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef RDMA_DIM_H
+#define RDMA_DIM_H
+
+#include <linux/module.h>
+#include <linux/dim.h>
+
+#define RDMA_DIM_PARAMS_NUM_PROFILES 9
+#define RDMA_DIM_START_PROFILE 0
+
+static const struct dim_cq_moder
+rdma_dim_prof[RDMA_DIM_PARAMS_NUM_PROFILES] = {
+	{1,   0, 1,  0},
+	{1,   0, 4,  0},
+	{2,   0, 4,  0},
+	{2,   0, 8,  0},
+	{4,   0, 8,  0},
+	{16,  0, 8,  0},
+	{16,  0, 16, 0},
+	{32,  0, 16, 0},
+	{32,  0, 32, 0},
+};
+
+void rdma_dim(struct dim *dim, u64 completions);
+
+#endif /* RDMA_DIM_H */
diff --git a/lib/dim/Makefile b/lib/dim/Makefile
index 160afe288df0..73ddd0c64661 100644
--- a/lib/dim/Makefile
+++ b/lib/dim/Makefile
@@ -2,8 +2,13 @@
 # DIM Dynamic Interrupt Moderation library
 #
 
-obj-$(CONFIG_DIMLIB) = net_dim.o
+obj-$(CONFIG_DIMLIB) += net_dim.o
+obj-$(CONFIG_DIMLIB) += rdma_dim.o
 
 net_dim-y = \
 	dim.o		\
 	net_dim.o
+
+rdma_dim-y = \
+	dim.o		\
+	rdma_dim.o
diff --git a/lib/dim/rdma_dim.c b/lib/dim/rdma_dim.c
new file mode 100644
index 000000000000..503881ec5614
--- /dev/null
+++ b/lib/dim/rdma_dim.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <linux/rdma_dim.h>
+
+/**
+ ** rdma_dim_step: - Moves the moderation profile one step.
+ ** @dim: The moderation struct.
+ **
+ ** Description: Moves the moderation profile of @dim by one step. If we
+ ** are at the edge of the profile range returns DIM_ON_EDGE without
+ ** moving.
+ **/
+static int rdma_dim_step(struct dim *dim)
+{
+	switch (dim->tune_state) {
+	case DIM_PARKING_ON_TOP:
+		/* fall through */
+	case DIM_PARKING_TIRED:
+		break;
+	case DIM_GOING_RIGHT:
+		if (dim->profile_ix == (RDMA_DIM_PARAMS_NUM_PROFILES - 1))
+			return DIM_ON_EDGE;
+		dim->profile_ix++;
+		dim->steps_right++;
+		break;
+	case DIM_GOING_LEFT:
+		if (dim->profile_ix == 0)
+			return DIM_ON_EDGE;
+		dim->profile_ix--;
+		dim->steps_left++;
+		break;
+	}
+
+	return DIM_STEPPED;
+}
+
+/**
+ ** rdma_dim_stats_compare - Compares the current stats to the previous stats.
+ ** @curr: The current dim stats.
+ ** @prev: The previous dim stats.
+ **
+ ** Description: Checks to see if we have significantly more or less
+ ** completions.
+ ** If the completions are not greatly changed checks if the completion to
+ ** event ratio has significantly changed.
+ **/
+static int rdma_dim_stats_compare(struct dim_stats *curr,
+				  struct dim_stats *prev)
+{
+	/* first stat */
+	if (!prev->cpms)
+		return DIM_STATS_SAME;
+
+	if (IS_SIGNIFICANT_DIFF(curr->cpms, prev->cpms))
+		return (curr->cpms > prev->cpms) ? DIM_STATS_BETTER :
+						DIM_STATS_WORSE;
+
+	if (IS_SIGNIFICANT_DIFF(curr->cpe_ratio, prev->cpe_ratio))
+		return (curr->cpe_ratio > prev->cpe_ratio) ? DIM_STATS_BETTER :
+						DIM_STATS_WORSE;
+
+	return DIM_STATS_SAME;
+}
+
+/**
+ ** rdma_dim_decision - Decides the next moderation level.
+ ** @curr_stats: The current dim stats.
+ ** @dim: The moderation struct.
+ **
+ ** Description: Uses rdma_dim_stats_compare to decide what the next moderation
+ ** level should be. If the completion to event ratio is low compared to the
+ ** current level we reset the moderation to keep latency low.
+ **/
+static bool rdma_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
+{
+	int prev_ix = dim->profile_ix;
+	int stats_res;
+	int step_res;
+
+	switch (dim->tune_state) {
+	case DIM_PARKING_ON_TOP:
+		/* fall through */
+	case DIM_PARKING_TIRED:
+		break;
+	case DIM_GOING_RIGHT:
+		/* fall through */
+	case DIM_GOING_LEFT:
+		stats_res = rdma_dim_stats_compare(curr_stats,
+						   &dim->prev_stats);
+
+		switch (stats_res) {
+		case DIM_STATS_SAME:
+			if (curr_stats->cpe_ratio <= 50 * prev_ix)
+				dim->profile_ix = 0;
+			break;
+		case DIM_STATS_WORSE:
+			dim_turn(dim);
+			/* fall through */
+		case DIM_STATS_BETTER:
+			step_res = rdma_dim_step(dim);
+			if (step_res == DIM_ON_EDGE)
+				dim_turn(dim);
+			break;
+		}
+		break;
+	}
+
+	dim->prev_stats = *curr_stats;
+
+	return dim->profile_ix != prev_ix;
+}
+
+/**
+ ** rdma_dim - Runs the adaptive moderation.
+ ** @dim: The moderation struct.
+ ** @completions: The number of completions collected in this round.
+ **
+ ** Description: Each call to rdma_dim takes the latest amount of
+ ** completions that have been collected and counts them as a new event.
+ ** Once enough events have been collected the algorithm decides a new
+ ** moderation level.
+ **/
+void rdma_dim(struct dim *dim, u64 completions)
+{
+	struct dim_stats curr_stats;
+	u32 nevents;
+	struct dim_sample *curr_sample = &dim->measuring_sample;
+
+	dim_update_sample_with_comps(curr_sample->event_ctr + 1,
+				     curr_sample->pkt_ctr,
+				     curr_sample->byte_ctr,
+				     curr_sample->comp_ctr + completions,
+				     &dim->measuring_sample);
+
+	switch (dim->state) {
+	case DIM_MEASURE_IN_PROGRESS:
+		nevents = curr_sample->event_ctr - dim->start_sample.event_ctr;
+		if (nevents < DIM_NEVENTS)
+			break;
+		dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats);
+		if (rdma_dim_decision(&curr_stats, dim)) {
+			dim->state = DIM_APPLY_NEW_PROFILE;
+			schedule_work(&dim->work);
+			break;
+		}
+		/* fall through */
+	case DIM_START_MEASURE:
+		dim->state = DIM_MEASURE_IN_PROGRESS;
+		dim_update_sample_with_comps(curr_sample->event_ctr,
+					     curr_sample->pkt_ctr,
+					     curr_sample->byte_ctr,
+					     curr_sample->comp_ctr,
+					     &dim->start_sample);
+		break;
+	case DIM_APPLY_NEW_PROFILE:
+		break;
+	}
+}
+EXPORT_SYMBOL(rdma_dim);
-- 
2.21.0


  parent reply	other threads:[~2019-06-05 23:24 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-05 23:24 [pull request][for-next 0/9] Generic DIM lib for netdev and RDMA Saeed Mahameed
2019-06-05 23:24 ` [for-next 1/9] linux/dim: Move logic to dim.h Saeed Mahameed
2019-06-05 23:24 ` [for-next 2/9] linux/dim: Remove "net" prefix from internal DIM members Saeed Mahameed
2019-06-05 23:24 ` [for-next 3/9] linux/dim: Rename externally exposed macros Saeed Mahameed
2019-06-05 23:24 ` [for-next 4/9] linux/dim: Rename net_dim_sample() to net_dim_update_sample() Saeed Mahameed
2019-06-05 23:24 ` [for-next 5/9] linux/dim: Rename externally used net_dim members Saeed Mahameed
2019-06-05 23:24 ` [for-next 6/9] linux/dim: Move implementation to .c files Saeed Mahameed
2019-06-05 23:24 ` [for-next 7/9] linux/dim: Add completions count to dim_sample Saeed Mahameed
2019-06-05 23:24 ` Saeed Mahameed [this message]
2019-06-05 23:24 ` [for-next 9/9] RDMA/core: Provide RDMA DIM support for ULPs Saeed Mahameed
2019-06-06  7:14 ` [pull request][for-next 0/9] Generic DIM lib for netdev and RDMA Leon Romanovsky
2019-06-06  7:19   ` Max Gurtovoy
2019-06-06  7:53     ` Leon Romanovsky
2019-06-06 13:07     ` Jason Gunthorpe
2019-06-07 18:14       ` Saeed Mahameed
2019-06-07 18:57         ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190605232348.6452-9-saeedm@mellanox.com \
    --to=saeedm@mellanox.com \
    --cc=andy@greyhouse.net \
    --cc=davem@davemloft.net \
    --cc=dledford@redhat.com \
    --cc=jgg@mellanox.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=maxg@mellanox.com \
    --cc=michael.chan@broadcom.com \
    --cc=netdev@vger.kernel.org \
    --cc=talgi@mellanox.com \
    --cc=yaminf@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.