All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sagi Grimberg <sagi@grimberg.me>
To: linux-block@vger.kernel.org, linux-rdma@vger.kernel.org
Cc: Jason Gunthorpe <jgg@ziepe.ca>, Christoph Hellwig <hch@lst.de>,
	Jens Axboe <axboe@kernel.dk>, Idan Burstein <idanb@mellanox.com>,
	Bart Van Assche <bart.vanassche@wdc.com>
Subject: [PATCH rfc 1/5] irq-am: Introduce library implementing generic adaptive moderation
Date: Tue,  6 Feb 2018 00:03:12 +0200	[thread overview]
Message-ID: <20180205220316.30236-2-sagi@grimberg.me> (raw)
In-Reply-To: <20180205220316.30236-1-sagi@grimberg.me>

irq-am library helps I/O devices implement interrupt moderation in
an adaptive fashion, based on online stats.

The consumer can initialize an irq-am context with a callback that
performs the device specific moderation programming and also the number
of am (adaptive moderation) levels which are also, abstracted and allows
for device specific tuning.

The irq-am code will sample once every nr_events and will check for significant
change in workload characteristics (completions per second, events per second)
and if it detects one, will perform an am level update(called a step).

The irq-am code  assumes that the am levels are sorted in an increasing order when
the lowest level corresponds to the optimum latency tuning (short time and low
completion-count) and gradually increasing towards the throughput optimum tuning
(longer time and higher completion-count). So there is a trend and tuning direction
tracked by the moderator. When the moderator collects sufficient statistics (also
controlled by the consumer defining nr_events), it compares the current stats with the
previous stats and if a significant changed was observed in the load, the moderator
attempts to increment/decrement its current level (step) and schedules a program
dispatch work.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/irq-am.h | 116 +++++++++++++++++++++++++++++++
 lib/Kconfig            |   5 ++
 lib/Makefile           |   1 +
 lib/irq-am.c           | 182 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 304 insertions(+)
 create mode 100644 include/linux/irq-am.h
 create mode 100644 lib/irq-am.c

diff --git a/include/linux/irq-am.h b/include/linux/irq-am.h
new file mode 100644
index 000000000000..5ddd5ca268aa
--- /dev/null
+++ b/include/linux/irq-am.h
@@ -0,0 +1,116 @@
+/*
+ * Adaptive moderation support for I/O devices.
+ * Copyright (c) 2018 Lightbits Labs.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _IRQ_AM_H
+#define _IRQ_AM_H
+
+#include <linux/ktime.h>
+#include <linux/workqueue.h>
+
+struct irq_am;
+typedef int (irq_am_fn)(struct irq_am *, unsigned short level);
+
+/*
+ * struct irq_am_sample_stats - sample stats for adpative moderation
+ * @cps:        completions per-second
+ * @eps:        events per-second
+ * @cpe:	completions per event
+ */
+struct irq_am_sample_stats {
+	u32 cps;
+	u32 eps;
+	u32 cpe;
+};
+
+/*
+ * struct irq_am_sample - per-irq interrupt batch sample unit
+ * @time:         current time
+ * @comps:     completions count since last sample
+ * @events:    events count since the last sample
+ */
+struct irq_am_sample {
+	ktime_t	time;
+	u64	comps;
+	u64	events;
+};
+
+/*
+ * enum irq_am_state - adaptive moderation monitor states
+ * @IRQ_AM_START_MEASURING:        collect first sample (start_sample)
+ * @IRQ_AM_MEASURING:              measurement in progress
+ * @IRQ_AM_PROGRAM_MODERATION:     moderatio program scheduled
+ *                                 so we should not react to any stats
+ *                                 from the old moderation profile.
+ */
+enum irq_am_state {
+	IRQ_AM_START_MEASURING,
+	IRQ_AM_MEASURING,
+	IRQ_AM_PROGRAM_MODERATION,
+};
+
+enum irq_am_tune_state {
+	IRQ_AM_GOING_UP,
+	IRQ_AM_GOING_DOWN,
+};
+
+enum irq_am_relative_diff {
+	IRQ_AM_STATS_WORSE,
+	IRQ_AM_STATS_SAME,
+	IRQ_AM_STATS_BETTER,
+};
+
+struct irq_am_stats {
+	u64	events;
+	u64	comps;
+};
+
+/*
+ * struct irq_am - irq adaptive moderation monitor
+ * @state:             adaptive moderation monitor state
+ * @tune_state:        tuning state of the moderation monitor
+ * @am_stats:          overall completions and events counters
+ * @start_sample:      first sample in moderation batch
+ * @prev_stats:        previous stats for trend detection
+ * @nr_events:         number of events between samples
+ * @nr_levels:         number of moderation levels
+ * @curr_level:        current moderation level
+ * @work:              schedule moderation program
+ * @program:           moderation program handler
+ */
+struct irq_am {
+	enum irq_am_state		state;
+	enum irq_am_tune_state		tune_state;
+
+	struct irq_am_stats		am_stats;
+	struct irq_am_sample		start_sample;
+	struct irq_am_sample_stats	prev_stats;
+
+	u16				nr_events;
+	unsigned short			nr_levels;
+	unsigned short			curr_level;
+
+	struct work_struct		work;
+	irq_am_fn			*program;
+};
+
+void irq_am_add_event(struct irq_am *am);
+static inline void irq_am_add_comps(struct irq_am *am, u64 n)
+{
+	am->am_stats.comps += n;
+}
+
+void irq_am_cleanup(struct irq_am *am);
+void irq_am_init(struct irq_am *am, unsigned int nr_events,
+	unsigned short nr_levels, unsigned short start_level, irq_am_fn *fn);
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index 4dd5c11366f9..bbb4c9eea84d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -504,6 +504,11 @@ config DDR
 	  information. This data is useful for drivers handling
 	  DDR SDRAM controllers.
 
+config IRQ_AM
+	bool "IRQ adaptive moderation library"
+	help
+	  Helper library to implement adaptive moderation for I/O devices.
+
 config IRQ_POLL
 	bool "IRQ polling library"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index d11c48ec8ffd..795583a685b9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -193,6 +193,7 @@ obj-$(CONFIG_SG_SPLIT) += sg_split.o
 obj-$(CONFIG_SG_POOL) += sg_pool.o
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
 obj-$(CONFIG_IRQ_POLL) += irq_poll.o
+obj-$(CONFIG_IRQ_AM) += irq-am.o
 
 obj-$(CONFIG_STACKDEPOT) += stackdepot.o
 KASAN_SANITIZE_stackdepot.o := n
diff --git a/lib/irq-am.c b/lib/irq-am.c
new file mode 100644
index 000000000000..ed7befd7a560
--- /dev/null
+++ b/lib/irq-am.c
@@ -0,0 +1,182 @@
+/*
+ * Adaptive moderation support for I/O devices.
+ * Copyright (c) 2018 Lightbits Labs.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/irq-am.h>
+
+static void irq_am_try_step(struct irq_am *am)
+{
+	if (am->tune_state == IRQ_AM_GOING_UP &&
+	    am->curr_level != am->nr_levels - 1) {
+		am->curr_level++;
+	} else if (am->tune_state == IRQ_AM_GOING_DOWN &&
+		   am->curr_level != 0) {
+		am->curr_level--;
+	}
+}
+
+static inline bool irq_am_on_edge(struct irq_am *am)
+{
+	return am->curr_level == 0 || am->curr_level == am->nr_levels - 1;
+}
+
+static void irq_am_turn(struct irq_am *am)
+{
+	am->tune_state = am->tune_state == IRQ_AM_GOING_UP ?
+		IRQ_AM_GOING_DOWN : IRQ_AM_GOING_UP;
+	irq_am_try_step(am);
+}
+
+#define IRQ_AM_SIGNIFICANT_DIFF(val, ref) \
+	(((100 * abs((val) - (ref))) / (ref)) > 20) /* more than 20% difference */
+
+static int irq_am_stats_compare(struct irq_am *am, struct irq_am_sample_stats *curr)
+{
+	struct irq_am_sample_stats *prev = &am->prev_stats;
+
+	/* first stat */
+	if (!prev->cps)
+		return IRQ_AM_STATS_SAME;
+
+	/* more completions per second is better */
+	if (IRQ_AM_SIGNIFICANT_DIFF(curr->cps, prev->cps))
+		return (curr->cps > prev->cps) ? IRQ_AM_STATS_BETTER :
+						 IRQ_AM_STATS_WORSE;
+
+	/* less events per second is better */
+	if (IRQ_AM_SIGNIFICANT_DIFF(curr->eps, prev->eps))
+		return (curr->eps < prev->eps) ? IRQ_AM_STATS_BETTER :
+						 IRQ_AM_STATS_WORSE;
+
+	/*
+	 * we get 1 completion per event, no point in trying to aggregate
+	 * any further, start declining moderation
+	 */
+	if (curr->cpe == 1 && am->curr_level)
+		return am->tune_state == IRQ_AM_GOING_UP ?
+			IRQ_AM_STATS_WORSE : IRQ_AM_STATS_BETTER;
+
+	return IRQ_AM_STATS_SAME;
+}
+
+static bool irq_am_decision(struct irq_am *am,
+		struct irq_am_sample_stats *curr_stats)
+{
+	unsigned short prev_level = am->curr_level;
+	enum irq_am_relative_diff diff;
+	bool changed;
+
+	diff = irq_am_stats_compare(am, curr_stats);
+	switch (diff) {
+	default:
+	case IRQ_AM_STATS_SAME:
+		/* fall through */
+		break;
+	case IRQ_AM_STATS_WORSE:
+		irq_am_turn(am);
+		break;
+	case IRQ_AM_STATS_BETTER:
+		irq_am_try_step(am);
+		break;
+	}
+
+	changed = am->curr_level != prev_level || irq_am_on_edge(am);
+	if (changed || !am->prev_stats.cps)
+		am->prev_stats = *curr_stats;
+
+	return changed;
+}
+
+static void irq_am_sample(struct irq_am *am, struct irq_am_sample *s)
+{
+	s->time = ktime_get();
+	s->events = am->am_stats.events;
+	s->comps = am->am_stats.comps;
+}
+
+static void irq_am_calc_stats(struct irq_am *am, struct irq_am_sample *start,
+		struct irq_am_sample *end,
+		struct irq_am_sample_stats *curr_stats)
+{
+	/* u32 holds up to 71 minutes, should be enough */
+	u32 delta_us = ktime_us_delta(end->time, start->time);
+	u32 ncomps = end->comps - start->comps;
+
+	if (!delta_us)
+		return;
+
+	curr_stats->cps = DIV_ROUND_UP(ncomps * USEC_PER_SEC, delta_us);
+	curr_stats->eps = DIV_ROUND_UP(am->nr_events * USEC_PER_SEC, delta_us);
+	curr_stats->cpe = DIV_ROUND_UP(ncomps, am->nr_events);
+}
+
+void irq_am_add_event(struct irq_am *am)
+{
+	struct irq_am_sample end_sample;
+	struct irq_am_sample_stats curr_stats;
+	u16 nr_events;
+
+	am->am_stats.events++;
+
+	switch (am->state) {
+	case IRQ_AM_MEASURING:
+		nr_events = am->am_stats.events - am->start_sample.events;
+		if (nr_events < am->nr_events)
+			break;
+
+		irq_am_sample(am, &end_sample);
+		irq_am_calc_stats(am, &am->start_sample, &end_sample,
+				    &curr_stats);
+		if (irq_am_decision(am, &curr_stats)) {
+			am->state = IRQ_AM_PROGRAM_MODERATION;
+			schedule_work(&am->work);
+			break;
+		}
+		/* fall through */
+	case IRQ_AM_START_MEASURING:
+		irq_am_sample(am, &am->start_sample);
+		am->state = IRQ_AM_MEASURING;
+		break;
+	case IRQ_AM_PROGRAM_MODERATION:
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(irq_am_add_event);
+
+static void irq_am_program_moderation_work(struct work_struct *w)
+{
+	struct irq_am *am = container_of(w, struct irq_am, work);
+
+	WARN_ON_ONCE(am->program(am, am->curr_level));
+	am->state = IRQ_AM_START_MEASURING;
+}
+
+
+void irq_am_cleanup(struct irq_am *am)
+{
+	flush_work(&am->work);
+}
+EXPORT_SYMBOL_GPL(irq_am_cleanup);
+
+void irq_am_init(struct irq_am *am, unsigned int nr_events,
+	unsigned short nr_levels, unsigned short start_level, irq_am_fn *fn)
+{
+	memset(am, 0, sizeof(*am));
+	am->state = IRQ_AM_START_MEASURING;
+	am->tune_state = IRQ_AM_GOING_UP;
+	am->nr_levels = nr_levels;
+	am->nr_events = nr_events;
+	am->curr_level = start_level;
+	am->program = fn;
+	INIT_WORK(&am->work, irq_am_program_moderation_work);
+}
+EXPORT_SYMBOL_GPL(irq_am_init);
-- 
2.14.1

WARNING: multiple messages have this Message-ID (diff)
From: Sagi Grimberg <sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
To: linux-block-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: Jason Gunthorpe <jgg-uk2M96/98Pc@public.gmane.org>,
	Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>,
	Jens Axboe <axboe-tSWWG44O7X1aa/9Udqfwiw@public.gmane.org>,
	Idan Burstein <idanb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>,
	Bart Van Assche <bart.vanassche-Sjgp3cTcYWE@public.gmane.org>
Subject: [PATCH rfc 1/5] irq-am: Introduce library implementing generic adaptive moderation
Date: Tue,  6 Feb 2018 00:03:12 +0200	[thread overview]
Message-ID: <20180205220316.30236-2-sagi@grimberg.me> (raw)
In-Reply-To: <20180205220316.30236-1-sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>

irq-am library helps I/O devices implement interrupt moderation in
an adaptive fashion, based on online stats.

The consumer can initialize an irq-am context with a callback that
performs the device specific moderation programming and also the number
of am (adaptive moderation) levels which are also, abstracted and allows
for device specific tuning.

The irq-am code will sample once every nr_events and will check for significant
change in workload characteristics (completions per second, events per second)
and if it detects one, will perform an am level update(called a step).

The irq-am code  assumes that the am levels are sorted in an increasing order when
the lowest level corresponds to the optimum latency tuning (short time and low
completion-count) and gradually increasing towards the throughput optimum tuning
(longer time and higher completion-count). So there is a trend and tuning direction
tracked by the moderator. When the moderator collects sufficient statistics (also
controlled by the consumer defining nr_events), it compares the current stats with the
previous stats and if a significant changed was observed in the load, the moderator
attempts to increment/decrement its current level (step) and schedules a program
dispatch work.

Signed-off-by: Sagi Grimberg <sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
---
 include/linux/irq-am.h | 116 +++++++++++++++++++++++++++++++
 lib/Kconfig            |   5 ++
 lib/Makefile           |   1 +
 lib/irq-am.c           | 182 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 304 insertions(+)
 create mode 100644 include/linux/irq-am.h
 create mode 100644 lib/irq-am.c

diff --git a/include/linux/irq-am.h b/include/linux/irq-am.h
new file mode 100644
index 000000000000..5ddd5ca268aa
--- /dev/null
+++ b/include/linux/irq-am.h
@@ -0,0 +1,116 @@
+/*
+ * Adaptive moderation support for I/O devices.
+ * Copyright (c) 2018 Lightbits Labs.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef _IRQ_AM_H
+#define _IRQ_AM_H
+
+#include <linux/ktime.h>
+#include <linux/workqueue.h>
+
+struct irq_am;
+typedef int (irq_am_fn)(struct irq_am *, unsigned short level);
+
+/*
+ * struct irq_am_sample_stats - sample stats for adpative moderation
+ * @cps:        completions per-second
+ * @eps:        events per-second
+ * @cpe:	completions per event
+ */
+struct irq_am_sample_stats {
+	u32 cps;
+	u32 eps;
+	u32 cpe;
+};
+
+/*
+ * struct irq_am_sample - per-irq interrupt batch sample unit
+ * @time:         current time
+ * @comps:     completions count since last sample
+ * @events:    events count since the last sample
+ */
+struct irq_am_sample {
+	ktime_t	time;
+	u64	comps;
+	u64	events;
+};
+
+/*
+ * enum irq_am_state - adaptive moderation monitor states
+ * @IRQ_AM_START_MEASURING:        collect first sample (start_sample)
+ * @IRQ_AM_MEASURING:              measurement in progress
+ * @IRQ_AM_PROGRAM_MODERATION:     moderatio program scheduled
+ *                                 so we should not react to any stats
+ *                                 from the old moderation profile.
+ */
+enum irq_am_state {
+	IRQ_AM_START_MEASURING,
+	IRQ_AM_MEASURING,
+	IRQ_AM_PROGRAM_MODERATION,
+};
+
+enum irq_am_tune_state {
+	IRQ_AM_GOING_UP,
+	IRQ_AM_GOING_DOWN,
+};
+
+enum irq_am_relative_diff {
+	IRQ_AM_STATS_WORSE,
+	IRQ_AM_STATS_SAME,
+	IRQ_AM_STATS_BETTER,
+};
+
+struct irq_am_stats {
+	u64	events;
+	u64	comps;
+};
+
+/*
+ * struct irq_am - irq adaptive moderation monitor
+ * @state:             adaptive moderation monitor state
+ * @tune_state:        tuning state of the moderation monitor
+ * @am_stats:          overall completions and events counters
+ * @start_sample:      first sample in moderation batch
+ * @prev_stats:        previous stats for trend detection
+ * @nr_events:         number of events between samples
+ * @nr_levels:         number of moderation levels
+ * @curr_level:        current moderation level
+ * @work:              schedule moderation program
+ * @program:           moderation program handler
+ */
+struct irq_am {
+	enum irq_am_state		state;
+	enum irq_am_tune_state		tune_state;
+
+	struct irq_am_stats		am_stats;
+	struct irq_am_sample		start_sample;
+	struct irq_am_sample_stats	prev_stats;
+
+	u16				nr_events;
+	unsigned short			nr_levels;
+	unsigned short			curr_level;
+
+	struct work_struct		work;
+	irq_am_fn			*program;
+};
+
+void irq_am_add_event(struct irq_am *am);
+static inline void irq_am_add_comps(struct irq_am *am, u64 n)
+{
+	am->am_stats.comps += n;
+}
+
+void irq_am_cleanup(struct irq_am *am);
+void irq_am_init(struct irq_am *am, unsigned int nr_events,
+	unsigned short nr_levels, unsigned short start_level, irq_am_fn *fn);
+
+#endif
diff --git a/lib/Kconfig b/lib/Kconfig
index 4dd5c11366f9..bbb4c9eea84d 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -504,6 +504,11 @@ config DDR
 	  information. This data is useful for drivers handling
 	  DDR SDRAM controllers.
 
+config IRQ_AM
+	bool "IRQ adaptive moderation library"
+	help
+	  Helper library to implement adaptive moderation for I/O devices.
+
 config IRQ_POLL
 	bool "IRQ polling library"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index d11c48ec8ffd..795583a685b9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -193,6 +193,7 @@ obj-$(CONFIG_SG_SPLIT) += sg_split.o
 obj-$(CONFIG_SG_POOL) += sg_pool.o
 obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
 obj-$(CONFIG_IRQ_POLL) += irq_poll.o
+obj-$(CONFIG_IRQ_AM) += irq-am.o
 
 obj-$(CONFIG_STACKDEPOT) += stackdepot.o
 KASAN_SANITIZE_stackdepot.o := n
diff --git a/lib/irq-am.c b/lib/irq-am.c
new file mode 100644
index 000000000000..ed7befd7a560
--- /dev/null
+++ b/lib/irq-am.c
@@ -0,0 +1,182 @@
+/*
+ * Adaptive moderation support for I/O devices.
+ * Copyright (c) 2018 Lightbits Labs.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/irq-am.h>
+
+static void irq_am_try_step(struct irq_am *am)
+{
+	if (am->tune_state == IRQ_AM_GOING_UP &&
+	    am->curr_level != am->nr_levels - 1) {
+		am->curr_level++;
+	} else if (am->tune_state == IRQ_AM_GOING_DOWN &&
+		   am->curr_level != 0) {
+		am->curr_level--;
+	}
+}
+
+static inline bool irq_am_on_edge(struct irq_am *am)
+{
+	return am->curr_level == 0 || am->curr_level == am->nr_levels - 1;
+}
+
+static void irq_am_turn(struct irq_am *am)
+{
+	am->tune_state = am->tune_state == IRQ_AM_GOING_UP ?
+		IRQ_AM_GOING_DOWN : IRQ_AM_GOING_UP;
+	irq_am_try_step(am);
+}
+
+#define IRQ_AM_SIGNIFICANT_DIFF(val, ref) \
+	(((100 * abs((val) - (ref))) / (ref)) > 20) /* more than 20% difference */
+
+static int irq_am_stats_compare(struct irq_am *am, struct irq_am_sample_stats *curr)
+{
+	struct irq_am_sample_stats *prev = &am->prev_stats;
+
+	/* first stat */
+	if (!prev->cps)
+		return IRQ_AM_STATS_SAME;
+
+	/* more completions per second is better */
+	if (IRQ_AM_SIGNIFICANT_DIFF(curr->cps, prev->cps))
+		return (curr->cps > prev->cps) ? IRQ_AM_STATS_BETTER :
+						 IRQ_AM_STATS_WORSE;
+
+	/* less events per second is better */
+	if (IRQ_AM_SIGNIFICANT_DIFF(curr->eps, prev->eps))
+		return (curr->eps < prev->eps) ? IRQ_AM_STATS_BETTER :
+						 IRQ_AM_STATS_WORSE;
+
+	/*
+	 * we get 1 completion per event, no point in trying to aggregate
+	 * any further, start declining moderation
+	 */
+	if (curr->cpe == 1 && am->curr_level)
+		return am->tune_state == IRQ_AM_GOING_UP ?
+			IRQ_AM_STATS_WORSE : IRQ_AM_STATS_BETTER;
+
+	return IRQ_AM_STATS_SAME;
+}
+
+static bool irq_am_decision(struct irq_am *am,
+		struct irq_am_sample_stats *curr_stats)
+{
+	unsigned short prev_level = am->curr_level;
+	enum irq_am_relative_diff diff;
+	bool changed;
+
+	diff = irq_am_stats_compare(am, curr_stats);
+	switch (diff) {
+	default:
+	case IRQ_AM_STATS_SAME:
+		/* fall through */
+		break;
+	case IRQ_AM_STATS_WORSE:
+		irq_am_turn(am);
+		break;
+	case IRQ_AM_STATS_BETTER:
+		irq_am_try_step(am);
+		break;
+	}
+
+	changed = am->curr_level != prev_level || irq_am_on_edge(am);
+	if (changed || !am->prev_stats.cps)
+		am->prev_stats = *curr_stats;
+
+	return changed;
+}
+
+static void irq_am_sample(struct irq_am *am, struct irq_am_sample *s)
+{
+	s->time = ktime_get();
+	s->events = am->am_stats.events;
+	s->comps = am->am_stats.comps;
+}
+
+static void irq_am_calc_stats(struct irq_am *am, struct irq_am_sample *start,
+		struct irq_am_sample *end,
+		struct irq_am_sample_stats *curr_stats)
+{
+	/* u32 holds up to 71 minutes, should be enough */
+	u32 delta_us = ktime_us_delta(end->time, start->time);
+	u32 ncomps = end->comps - start->comps;
+
+	if (!delta_us)
+		return;
+
+	curr_stats->cps = DIV_ROUND_UP(ncomps * USEC_PER_SEC, delta_us);
+	curr_stats->eps = DIV_ROUND_UP(am->nr_events * USEC_PER_SEC, delta_us);
+	curr_stats->cpe = DIV_ROUND_UP(ncomps, am->nr_events);
+}
+
+void irq_am_add_event(struct irq_am *am)
+{
+	struct irq_am_sample end_sample;
+	struct irq_am_sample_stats curr_stats;
+	u16 nr_events;
+
+	am->am_stats.events++;
+
+	switch (am->state) {
+	case IRQ_AM_MEASURING:
+		nr_events = am->am_stats.events - am->start_sample.events;
+		if (nr_events < am->nr_events)
+			break;
+
+		irq_am_sample(am, &end_sample);
+		irq_am_calc_stats(am, &am->start_sample, &end_sample,
+				    &curr_stats);
+		if (irq_am_decision(am, &curr_stats)) {
+			am->state = IRQ_AM_PROGRAM_MODERATION;
+			schedule_work(&am->work);
+			break;
+		}
+		/* fall through */
+	case IRQ_AM_START_MEASURING:
+		irq_am_sample(am, &am->start_sample);
+		am->state = IRQ_AM_MEASURING;
+		break;
+	case IRQ_AM_PROGRAM_MODERATION:
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(irq_am_add_event);
+
+static void irq_am_program_moderation_work(struct work_struct *w)
+{
+	struct irq_am *am = container_of(w, struct irq_am, work);
+
+	WARN_ON_ONCE(am->program(am, am->curr_level));
+	am->state = IRQ_AM_START_MEASURING;
+}
+
+
+void irq_am_cleanup(struct irq_am *am)
+{
+	flush_work(&am->work);
+}
+EXPORT_SYMBOL_GPL(irq_am_cleanup);
+
+void irq_am_init(struct irq_am *am, unsigned int nr_events,
+	unsigned short nr_levels, unsigned short start_level, irq_am_fn *fn)
+{
+	memset(am, 0, sizeof(*am));
+	am->state = IRQ_AM_START_MEASURING;
+	am->tune_state = IRQ_AM_GOING_UP;
+	am->nr_levels = nr_levels;
+	am->nr_events = nr_events;
+	am->curr_level = start_level;
+	am->program = fn;
+	INIT_WORK(&am->work, irq_am_program_moderation_work);
+}
+EXPORT_SYMBOL_GPL(irq_am_init);
-- 
2.14.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

  reply	other threads:[~2018-02-05 22:03 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-05 22:03 [PATCH rfc 0/5] generic adaptive IRQ moderation library for I/O devices Sagi Grimberg
2018-02-05 22:03 ` Sagi Grimberg [this message]
2018-02-05 22:03   ` [PATCH rfc 1/5] irq-am: Introduce library implementing generic adaptive moderation Sagi Grimberg
2018-02-06  7:43   ` Or Gerlitz
2018-02-06  7:43     ` Or Gerlitz
2018-02-05 22:03 ` [PATCH rfc 2/5] irq-am: add some debugfs exposure on tuning state Sagi Grimberg
2018-02-05 22:03   ` Sagi Grimberg
2018-02-06 16:04   ` kbuild test robot
2018-02-06 17:38   ` kbuild test robot
2018-02-08  1:24   ` Bart Van Assche
2018-02-08  1:24     ` Bart Van Assche
2018-02-12 19:42     ` Sagi Grimberg
2018-02-05 22:03 ` [PATCH rfc 3/5] irq_poll: wire up irq_am Sagi Grimberg
2018-02-05 22:03   ` Sagi Grimberg
2018-02-08  1:28   ` Bart Van Assche
2018-02-08  1:28     ` Bart Van Assche
2018-02-12 19:40     ` Sagi Grimberg
2018-02-12 19:40       ` Sagi Grimberg
2018-02-05 22:03 ` [PATCH rfc 4/5] IB/cq: add adaptive moderation support Sagi Grimberg
2018-02-05 22:03   ` Sagi Grimberg
2018-02-05 22:03 ` [PATCH rfc 5/5] IB/cq: wire up adaptive moderation to workqueue based completion queues Sagi Grimberg
2018-02-06  6:56 ` [PATCH rfc 0/5] generic adaptive IRQ moderation library for I/O devices Or Gerlitz
2018-02-06  6:56   ` Or Gerlitz
2018-02-06  9:25   ` Sagi Grimberg
2018-02-06 11:34     ` Or Gerlitz
2018-02-06 11:34       ` Or Gerlitz
2018-02-06  8:54 ` Or Gerlitz
2018-02-06  8:54   ` Or Gerlitz
2018-02-06  9:02   ` Tal Gilboa
2018-02-06  9:02     ` Tal Gilboa
2018-02-06  9:34     ` Sagi Grimberg
2018-02-06  9:45       ` Tal Gilboa
2018-02-13  9:30         ` Or Gerlitz
2018-02-13  9:30           ` Or Gerlitz
2018-02-13 21:46           ` Tal Gilboa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180205220316.30236-2-sagi@grimberg.me \
    --to=sagi@grimberg.me \
    --cc=axboe@kernel.dk \
    --cc=bart.vanassche@wdc.com \
    --cc=hch@lst.de \
    --cc=idanb@mellanox.com \
    --cc=jgg@ziepe.ca \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.