netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Saeed Mahameed <saeedm@mellanox.com>
To: "David S. Miller" <davem@davemloft.net>,
	Doug Ledford <dledford@redhat.com>,
	Jason Gunthorpe <jgg@mellanox.com>
Cc: Leon Romanovsky <leonro@mellanox.com>,
	Or Gerlitz <ogerlitz@mellanox.com>,
	Sagi Grimberg <sagi@grimberg.me>, Tal Gilboa <talgi@mellanox.com>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"linux-rdma@vger.kernel.org" <linux-rdma@vger.kernel.org>,
	Yamin Friedman <yaminf@mellanox.com>,
	Max Gurtovoy <maxg@mellanox.com>,
	Saeed Mahameed <saeedm@mellanox.com>
Subject: [for-next V2 08/10] linux/dim: Implement rdma_dim
Date: Tue, 25 Jun 2019 20:57:48 +0000	[thread overview]
Message-ID: <20190625205701.17849-9-saeedm@mellanox.com> (raw)
In-Reply-To: <20190625205701.17849-1-saeedm@mellanox.com>

From: Yamin Friedman <yaminf@mellanox.com>

rdma_dim implements a different algorithm than net_dim and is based on
completions which is how we can implement interrupt moderation in RDMA.
The algorithm optimizes for number of completions and ratio between
completions and events.
It also has a feature for fast reduction of moderation level when the
traffic changes in such a way as to no longer require high moderation in
order to avoid long latencies.

rdma_dim will be called from the ib_core module.

Signed-off-by: Yamin Friedman <yaminf@mellanox.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/dim.h |  36 ++++++++++++++
 lib/dim/Makefile    |   6 +--
 lib/dim/rdma_dim.c  | 112 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 150 insertions(+), 4 deletions(-)
 create mode 100644 lib/dim/rdma_dim.c

diff --git a/include/linux/dim.h b/include/linux/dim.h
index aa9bdd47a648..1ae32835723a 100644
--- a/include/linux/dim.h
+++ b/include/linux/dim.h
@@ -82,6 +82,7 @@ struct dim_stats {
  * @prev_stats: Measured rates from previous iteration (for comparison)
  * @start_sample: Sampled data at start of current iteration
  * @work: Work to perform on action required
+ * @dim_owner: A pointer to the struct that points to dim
  * @profile_ix: Current moderation profile
  * @mode: CQ period count mode
  * @tune_state: Algorithm tuning state (see below)
@@ -95,6 +96,7 @@ struct dim {
 	struct dim_sample start_sample;
 	struct dim_sample measuring_sample;
 	struct work_struct work;
+	void *dim_owner;
 	u8 profile_ix;
 	u8 mode;
 	u8 tune_state;
@@ -363,4 +365,38 @@ struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
  */
 void net_dim(struct dim *dim, struct dim_sample end_sample);
 
+/* RDMA DIM */
+
+/*
+ * RDMA DIM profile:
+ * profile size must be of RDMA_DIM_PARAMS_NUM_PROFILES.
+ */
+#define RDMA_DIM_PARAMS_NUM_PROFILES 9
+#define RDMA_DIM_START_PROFILE 0
+
+static const struct dim_cq_moder
+rdma_dim_prof[RDMA_DIM_PARAMS_NUM_PROFILES] = {
+	{1,   0, 1,  0},
+	{1,   0, 4,  0},
+	{2,   0, 4,  0},
+	{2,   0, 8,  0},
+	{4,   0, 8,  0},
+	{16,  0, 8,  0},
+	{16,  0, 16, 0},
+	{32,  0, 16, 0},
+	{32,  0, 32, 0},
+};
+
+/**
+ * rdma_dim - Runs the adaptive moderation.
+ * @dim: The moderation struct.
+ * @completions: The number of completions collected in this round.
+ *
+ * Each call to rdma_dim takes the latest amount of completions that
+ * have been collected and counts them as a new event.
+ * Once enough events have been collected the algorithm decides a new
+ * moderation level.
+ */
+void rdma_dim(struct dim *dim, u64 completions);
+
 #endif /* DIM_H */
diff --git a/lib/dim/Makefile b/lib/dim/Makefile
index 160afe288df0..1d6858a108cb 100644
--- a/lib/dim/Makefile
+++ b/lib/dim/Makefile
@@ -2,8 +2,6 @@
 # DIM Dynamic Interrupt Moderation library
 #
 
-obj-$(CONFIG_DIMLIB) = net_dim.o
+obj-$(CONFIG_DIMLIB) += dim.o
 
-net_dim-y = \
-	dim.o		\
-	net_dim.o
+dim-y := dim.o net_dim.o rdma_dim.o
diff --git a/lib/dim/rdma_dim.c b/lib/dim/rdma_dim.c
new file mode 100644
index 000000000000..1bfe8f546a20
--- /dev/null
+++ b/lib/dim/rdma_dim.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <linux/dim.h>
+
+static int rdma_dim_step(struct dim *dim)
+{
+	if (dim->tune_state == DIM_GOING_RIGHT) {
+		if (dim->profile_ix == (RDMA_DIM_PARAMS_NUM_PROFILES - 1))
+			return DIM_ON_EDGE;
+		dim->profile_ix++;
+		dim->steps_right++;
+	}
+	if (dim->tune_state == DIM_GOING_LEFT) {
+		if (dim->profile_ix == 0)
+			return DIM_ON_EDGE;
+		dim->profile_ix--;
+		dim->steps_left++;
+	}
+
+	return DIM_STEPPED;
+}
+
+static int rdma_dim_stats_compare(struct dim_stats *curr,
+				  struct dim_stats *prev)
+{
+	/* first stat */
+	if (!prev->cpms)
+		return DIM_STATS_SAME;
+
+	if (IS_SIGNIFICANT_DIFF(curr->cpms, prev->cpms))
+		return (curr->cpms > prev->cpms) ? DIM_STATS_BETTER :
+						DIM_STATS_WORSE;
+
+	if (IS_SIGNIFICANT_DIFF(curr->cpe_ratio, prev->cpe_ratio))
+		return (curr->cpe_ratio > prev->cpe_ratio) ? DIM_STATS_BETTER :
+						DIM_STATS_WORSE;
+
+	return DIM_STATS_SAME;
+}
+
+static bool rdma_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
+{
+	int prev_ix = dim->profile_ix;
+	u8 state = dim->tune_state;
+	int stats_res;
+	int step_res;
+
+	if (state != DIM_PARKING_ON_TOP && state != DIM_PARKING_TIRED) {
+		stats_res = rdma_dim_stats_compare(curr_stats,
+						   &dim->prev_stats);
+
+		switch (stats_res) {
+		case DIM_STATS_SAME:
+			if (curr_stats->cpe_ratio <= 50 * prev_ix)
+				dim->profile_ix = 0;
+			break;
+		case DIM_STATS_WORSE:
+			dim_turn(dim);
+			/* fall through */
+		case DIM_STATS_BETTER:
+			step_res = rdma_dim_step(dim);
+			if (step_res == DIM_ON_EDGE)
+				dim_turn(dim);
+			break;
+		}
+	}
+
+	dim->prev_stats = *curr_stats;
+
+	return dim->profile_ix != prev_ix;
+}
+
+void rdma_dim(struct dim *dim, u64 completions)
+{
+	struct dim_sample *curr_sample = &dim->measuring_sample;
+	struct dim_stats curr_stats;
+	u32 nevents;
+
+	dim_update_sample_with_comps(curr_sample->event_ctr + 1,
+				     curr_sample->pkt_ctr,
+				     curr_sample->byte_ctr,
+				     curr_sample->comp_ctr + completions,
+				     &dim->measuring_sample);
+
+	switch (dim->state) {
+	case DIM_MEASURE_IN_PROGRESS:
+		nevents = curr_sample->event_ctr - dim->start_sample.event_ctr;
+		if (nevents < DIM_NEVENTS)
+			break;
+		dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats);
+		if (rdma_dim_decision(&curr_stats, dim)) {
+			dim->state = DIM_APPLY_NEW_PROFILE;
+			schedule_work(&dim->work);
+			break;
+		}
+		/* fall through */
+	case DIM_START_MEASURE:
+		dim->state = DIM_MEASURE_IN_PROGRESS;
+		dim_update_sample_with_comps(curr_sample->event_ctr,
+					     curr_sample->pkt_ctr,
+					     curr_sample->byte_ctr,
+					     curr_sample->comp_ctr,
+					     &dim->start_sample);
+		break;
+	case DIM_APPLY_NEW_PROFILE:
+		break;
+	}
+}
+EXPORT_SYMBOL(rdma_dim);
-- 
2.21.0


  parent reply	other threads:[~2019-06-25 20:58 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-25 20:57 [pull request][for-next V2 0/7] Generic DIM lib for netdev and RDMA Saeed Mahameed
2019-06-25 20:57 ` [for-next V2 01/10] linux/dim: Move logic to dim.h Saeed Mahameed
2019-06-25 21:53   ` Sagi Grimberg
2019-06-25 20:57 ` [for-next V2 02/10] linux/dim: Remove "net" prefix from internal DIM members Saeed Mahameed
2019-06-25 21:53   ` Sagi Grimberg
2019-06-25 20:57 ` [for-next V2 03/10] linux/dim: Rename externally exposed macros Saeed Mahameed
2019-06-25 21:54   ` Sagi Grimberg
2019-06-25 20:57 ` [for-next V2 04/10] linux/dim: Rename net_dim_sample() to net_dim_update_sample() Saeed Mahameed
2019-06-25 21:55   ` Sagi Grimberg
2019-06-25 20:57 ` [for-next V2 05/10] linux/dim: Rename externally used net_dim members Saeed Mahameed
2019-06-25 21:57   ` Sagi Grimberg
2019-06-26  6:38     ` Tal Gilboa
2019-06-25 20:57 ` [for-next V2 06/10] linux/dim: Move implementation to .c files Saeed Mahameed
2019-07-02 16:15   ` Geert Uytterhoeven
2019-06-25 20:57 ` [for-next V2 07/10] linux/dim: Add completions count to dim_sample Saeed Mahameed
2019-06-25 20:57 ` Saeed Mahameed [this message]
2019-06-25 22:02   ` [for-next V2 08/10] linux/dim: Implement rdma_dim Sagi Grimberg
2019-06-26 11:57     ` Or Gerlitz
2019-06-27  5:25     ` Yamin Friedman
2019-06-25 20:57 ` [for-next V2 09/10] RDMA/nldev: Added configuration of RDMA dynamic interrupt moderation to netlink Saeed Mahameed
2019-06-25 21:15   ` Sagi Grimberg
2019-06-27  5:29     ` Yamin Friedman
2019-06-25 20:57 ` [for-next V2 10/10] RDMA/core: Provide RDMA DIM support for ULPs Saeed Mahameed
2019-06-25 21:14   ` Sagi Grimberg
2019-06-26  7:56     ` Idan Burstein
2019-07-02  5:36       ` Sagi Grimberg
2019-07-02  6:41         ` Leon Romanovsky
2019-07-03 18:56           ` Sagi Grimberg
2019-07-04  7:51             ` Leon Romanovsky
2019-07-04 12:30         ` Idan Burstein
2019-06-27  5:28     ` Yamin Friedman
2019-06-25 21:07 ` [pull request][for-next V2 0/7] Generic DIM lib for netdev and RDMA Saeed Mahameed
2019-06-27 19:43 ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190625205701.17849-9-saeedm@mellanox.com \
    --to=saeedm@mellanox.com \
    --cc=davem@davemloft.net \
    --cc=dledford@redhat.com \
    --cc=jgg@mellanox.com \
    --cc=leonro@mellanox.com \
    --cc=linux-rdma@vger.kernel.org \
    --cc=maxg@mellanox.com \
    --cc=netdev@vger.kernel.org \
    --cc=ogerlitz@mellanox.com \
    --cc=sagi@grimberg.me \
    --cc=talgi@mellanox.com \
    --cc=yaminf@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).