linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] [idled]: Idle Cycle Injector for power capping
@ 2010-04-14  0:08 Salman
  2010-04-14  0:08 ` [PATCH 1/3] [kidled]: introduce kidled Salman
                   ` (4 more replies)
  0 siblings, 5 replies; 22+ messages in thread
From: Salman @ 2010-04-14  0:08 UTC (permalink / raw)
  To: peterz, mingo, linux-kernel, akpm, svaidy, linux-pm, arjan
  Cc: csadler, ranjitm, kenchen, dawnchen

As we discussed earlier this year, Google has an implementation that it
would like to share.  I have finally gotten around to porting it to
v2.6.33 and cleaning up the interfaces.  It is provided in the following
messages for your review.  I realize that when we first discussed this
idea, a lot of ideas were presented for enhancing it.  Thanks alot for
your suggestions.  I haven't gotten around to implementing any of them.

The ones that I still find appealing are:

0. Providing approximate synchronization between cores, regardless
of their independant settings in order to improve power savings.   We have
to balance this with eager injection (i.e. avoiding injection when
an interactive task needs to run).

A stricter synchronization between cores is needed to make idle cycle injector
work on hyperthreaded systems.  This is a some what separate issue, as
there should only be one idle cycle injector minimum idle setting per
physical core.

1. It's not possible to directly use hard limits to implement the
type of assurance that we need.  However, doing something similar to CPU hard
limits, to implement a global power cap. It is not strictly necessary for
Google's purposes.  The outcome of the trade offs is not immediately clear to
me.  I need to do some prototyping.

Now, back to the current set of patches.

Testing:

The patches were tested using the following program.  The output was:

# /export/hda3/kidled_test /dev/cgroup/
Latency Test:

Count without injection: 9441
Count with 80% injection (batch) 1805 (idle 8099305661)
Count with 80% injection (interactive): 9439 (idle 8054796135)
Lost wake ups (batch): 7636
Lost wake ups (interactive): 2
Priority Test:

Low priority got:  26197453ns
High priority got: 1971369919ns
Idle Time:         8021629325ns

Test program follows:


/*
 *  A set of tests for the idle cycle injector.
 */

#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <signal.h>
#include <unistd.h>
#include <assert.h>
#include <time.h>
#include <sched.h>

char *cpu_cgroup_dir;

#define NUM_SECONDS	10
#define NSEC_PER_SEC	1000000000L
#define USEC_PER_MSEC	1000
#define USEC_PER_SEC	1000000L

int start_while_one(void)
{
	int pid;
	pid = fork();
	if (pid > 0)
		return pid;

	if (pid < 0) {
		printf("Antagonist fork failed\n");
		exit(EXIT_FAILURE);
	}

	while(1);
}

#define write_file(filename, fmt, ...)		\
	do {					\
		FILE *f;			\
		f = fopen(filename, "w");	\
		fprintf(f, fmt, __VA_ARGS__);	\
		fclose(f);			\
	} while(0)

#define read_file(filename, fmt, ...)		\
	do {					\
		FILE *f;			\
		f = fopen(filename, "r");	\
		fscanf(f, fmt, __VA_ARGS__);	\
		fclose(f);			\
	} while(0)


int do_latency_protagonist(int interactive, long *total_idle)
{
	char my_cgroup[200];
	char file[200];
	int  count;
	int i;
	struct timespec ts;
	long base;
	long now;
	long idle, busy, lazy, eager;

	/* Put ourselves in an interactive cgroup */
	sprintf(my_cgroup, "%s/protogonist", cpu_cgroup_dir);
	rmdir(my_cgroup);
	mkdir(my_cgroup, 0755);
	sprintf(file, "%s/cpu.power_interactive", my_cgroup);
	write_file(file, "%d\n", interactive);
	sprintf(file, "%s/cpuset.mems", my_cgroup);
	write_file(file, "%d\n", 0);
	sprintf(file, "%s/cpuset.cpus", my_cgroup);
	write_file(file, "%d\n", 0);
	sprintf(file, "%s/tasks", my_cgroup);
	write_file(file, "%d\n", getpid());

	count = 0;
	if (total_idle) {
		read_file("/proc/sys/kernel/kidled/cpu/0/stats",
			"%ld %ld %ld %ld\n",
			&idle, &busy, &lazy, &eager);
		*total_idle = idle;
	}
	clock_gettime(CLOCK_MONOTONIC, &ts);
	base = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
	while (1) {
		usleep(USEC_PER_MSEC);
		count++;
		clock_gettime(CLOCK_MONOTONIC, &ts);
		now = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
		if (now - base > NUM_SECONDS*NSEC_PER_SEC)
			break;
	}

	if (total_idle) {
		read_file("/proc/sys/kernel/kidled/cpu/0/stats",
			"%ld %ld %ld %ld\n",
			&idle, &busy, &lazy, &eager);
			*total_idle = idle - *total_idle;
	}

	return count;
}

/*
 * Test for the eager injection case of power capping.
 *
 * Protagonist: frequently waking interactive thread that does little work.
 * Antagonist:  constantly running batch thread.
 *
 */
void latency_test(void)
{
	int pid;
	int count_base;
	int count_injected;
	int count_injected_batch;
	long int_idle;
	long batch_idle;
	printf("Latency Test:\n\n");
	pid = start_while_one();
	write_file("/proc/sys/kernel/kidled/cpu/0/min_idle_percent",
                   "%d\n", 0);
	write_file("/proc/sys/kernel/kidled/cpu/0/interval",
		   "%d\n", 100);
	count_base = do_latency_protagonist(0, NULL);
	write_file("/proc/sys/kernel/kidled/cpu/0/min_idle_percent",
			"%d\n", 80);
	count_injected = do_latency_protagonist(1, &int_idle);
	count_injected_batch = do_latency_protagonist(0, &batch_idle);
	kill(pid, SIGKILL);
	printf("Count without injection: %d\n", count_base);
	printf("Count with 80%% injection (batch) %d (idle %ld)\n",
		 count_injected_batch, batch_idle);
	printf("Count with 80%% injection (interactive): %d (idle %ld)\n",
		count_injected, int_idle);
	printf("Lost wake ups (batch): %d\n",
		count_base - count_injected_batch);
	printf("Lost wake ups (interactive): %d\n",
		count_base - count_injected);

}

void make_prio_container(char *container_name, int priority, int pid)
{
	char my_cgroup[200];
	char file[200];
	sprintf(my_cgroup, "%s/%s", cpu_cgroup_dir, container_name);
	rmdir(my_cgroup);
	mkdir(my_cgroup, 0755);
	sprintf(file, "%s/cpu.power_capping_priority", my_cgroup);
	write_file(file, "%d\n", priority);
	sprintf(file, "%s/cpu.power_interactive", my_cgroup);
	write_file(file, "%d\n", 1);
	sprintf(file, "%s/cpuset.mems", my_cgroup);
	write_file(file, "%d\n", 0);
	sprintf(file, "%s/cpuset.cpus", my_cgroup);
	write_file(file, "%d\n", 0);
	sprintf(file, "%s/tasks", my_cgroup);
	write_file(file, "%d\n", pid);
}

/* If there are two processes with different power capping priorities, and
 * the enforcement interval is sufficiently small, the task with the
 * smaller priority should approx recieve its fair share minus the idle cycles
 * injected and the task with the larger priority should just recieve
 * its fair share.  Once the amount of idle cycles exceed the lower
 * priority task's fair share, the higher priority task's throughput is
 * impacted.
 */
void priority_test(void)
{
	char file[200];
	int pid1;
	int pid2;
	long low_prio_cpu;
	long high_prio_cpu;
	long low_prio_cpu_base;
	long high_prio_cpu_base;
	long idle, busy, lazy, eager, old_idle;

	printf("Priority Test:\n\n");

	write_file("/proc/sys/kernel/kidled/cpu/0/min_idle_percent",
			"%d\n", 80);
	write_file("/proc/sys/kernel/kidled/cpu/0/interval",
			"%d\n", 30);

	pid1 = start_while_one();
	pid2 = start_while_one();

	make_prio_container("high_prio", 14, pid1);
	make_prio_container("low_prio", 0, pid2);

	sprintf(file, "%s/high_prio/cpuacct.usage", cpu_cgroup_dir);
	read_file(file, "%ld\n", &high_prio_cpu_base);
	sprintf(file, "%s/low_prio/cpuacct.usage", cpu_cgroup_dir);
	read_file(file, "%ld\n", &low_prio_cpu_base);
	read_file("/proc/sys/kernel/kidled/cpu/0/stats",
		  "%ld %ld %ld %ld\n",
		  &old_idle, &busy, &lazy, &eager);

	usleep(NUM_SECONDS*USEC_PER_SEC);

	sprintf(file, "%s/high_prio/cpuacct.usage", cpu_cgroup_dir);
	read_file(file, "%ld\n", &high_prio_cpu);
	sprintf(file, "%s/low_prio/cpuacct.usage", cpu_cgroup_dir);
	read_file(file, "%ld\n", &low_prio_cpu);
	read_file("/proc/sys/kernel/kidled/cpu/0/stats",
		"%ld %ld %ld %ld\n",
		&idle, &busy, &lazy, &eager);
	printf("Low priority got:  %ldns\n", low_prio_cpu - low_prio_cpu_base);
	printf("High priority got: %ldns\n",
		high_prio_cpu - high_prio_cpu_base);
	printf("Idle Time:         %ldns\n", idle - old_idle);
	kill(pid1, SIGKILL);
	kill(pid2, SIGKILL);
}

/* Arguments: directory where cpu cgroup is mounted. */
int main(int argc, char **argv)
{
	unsigned long mask;
	if (argc < 2) {
		printf("Required argument 'cpu cgroup directory' missing\n");
		exit(EXIT_FAILURE);
	}

	/* Pin everything to CPU 0, so that one idle cycle injector applies */
	mask = (1 << 0);
	sched_setaffinity(0, sizeof(mask), &mask);

	cpu_cgroup_dir = argv[1];

	latency_test();
	priority_test();

	return 0;
}

---

Salman Qazi (3):
      [kidled]: introduce kidled.
      [kidled]: Add eager injection.
      [kidled]: Introduce power capping priority and LB awareness.


 Documentation/kidled.txt     |   89 +++++
 arch/x86/Kconfig             |    1 
 arch/x86/include/asm/idle.h  |    1 
 arch/x86/kernel/process_64.c |    2 
 drivers/misc/Gconfig.ici     |    1 
 include/linux/kidled.h       |   83 +++++
 include/linux/sched.h        |    3 
 kernel/Kconfig.ici           |    6 
 kernel/Makefile              |    1 
 kernel/kidled.c              |  693 ++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c               |  155 +++++++++
 kernel/sched_fair.c          |   77 +++++
 kernel/softirq.c             |   15 +
 kernel/sysctl.c              |   11 +
 14 files changed, 1127 insertions(+), 11 deletions(-)
 create mode 100644 Documentation/kidled.txt
 create mode 100644 drivers/misc/Gconfig.ici
 create mode 100644 include/linux/kidled.h
 create mode 100644 kernel/Kconfig.ici
 create mode 100644 kernel/kidled.c

-- 
Salman Qazi

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2010-04-22 19:02 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-14  0:08 [PATCH 0/3] [idled]: Idle Cycle Injector for power capping Salman
2010-04-14  0:08 ` [PATCH 1/3] [kidled]: introduce kidled Salman
2010-04-14  9:49   ` Andi Kleen
2010-04-14 15:41     ` Salman Qazi
2010-04-15  7:46       ` Peter Zijlstra
2010-04-14  0:08 ` [PATCH 2/3] [kidled]: Add eager injection Salman
2010-04-14  0:08 ` [PATCH 3/3] [kidled]: Introduce power capping priority and LB awareness Salman
2010-04-15  7:51 ` [PATCH 0/3] [idled]: Idle Cycle Injector for power capping Peter Zijlstra
2010-04-17 17:08   ` tytso
2010-04-17 17:57     ` Arjan van de Ven
2010-04-17 19:51     ` Peter Zijlstra
2010-04-19 17:20   ` Salman Qazi
2010-04-19 19:01     ` Peter Zijlstra
2010-04-20  1:00       ` Arjan van de Ven
2010-04-20  5:00         ` Vaidyanathan Srinivasan
2010-04-20  4:50       ` Vaidyanathan Srinivasan
2010-04-20 17:52         ` Salman Qazi
2010-04-21  5:08           ` Arjan van de Ven
2010-04-22  1:32   ` Mike Chan
2010-04-22  8:21     ` Peter Zijlstra
2010-04-22 19:02     ` Vaidyanathan Srinivasan
2010-04-17 16:40 ` Arjan van de Ven

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).