All of lore.kernel.org
 help / color / mirror / Atom feed
* Perf Oops on 3.14-rc2
@ 2014-02-10 22:17 Drew Richardson
  2014-02-18 10:18 ` Will Deacon
  0 siblings, 1 reply; 12+ messages in thread
From: Drew Richardson @ 2014-02-10 22:17 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Arnaldo, Will Deacon, Pawel Moll, Wade Cherry

While adding CPU on/offlining support during perf captures I get an
Oops both on ARM as well as my desktop x86_64. Below is a small
program that duplicates the issue.

Here's the oops from an ARM Versatile Express TC2 board running a
vanilla 3.14-rc2 kernel.

[  119.176648] Unable to handle kernel NULL pointer dereference at virtual address 00000040
[  119.203448] pgd = ec178000
[  119.211562] [00000040] *pgd=adcee831, *pte=00000000, *ppte=00000000
[  119.230399] Internal error: Oops: 17 [#1] SMP THUMB2
[  119.245263] Modules linked in:
[  119.254409] CPU: 1 PID: 2268 Comm: perf_fail Not tainted 3.14.0-rc2 #1
[  119.273962] task: ee2c1540 ti: ed6b8000 task.ti: ed6b8000
[  119.290133] PC is at perf_event_aux_ctx+0x36/0x5c
[  119.304216] LR is at perf_event_aux_ctx+0x4b/0x5c
[  119.318299] pc : [<c008c62a>]    lr : [<c008c63f>]    psr: 00000033
[  119.318299] sp : ed6b9dd0  ip : ee2c1a80  fp : ee3cefe0
[  119.352701] r10: ee252420  r9 : ed6b8000  r8 : c00910b9
[  119.368346] r7 : ed6b9e48  r6 : 00000001  r5 : eefc7180  r4 : 00000000
[  119.387898] r3 : 00000000  r2 : 00000002  r1 : ed6b9e48  r0 : 00000000
[  119.407452] Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA Thumb  Segment user
[  119.429352] Control: 50c5387d  Table: ac17806a  DAC: 00000015
[  119.446562] Process perf_fail (pid: 2268, stack limit = 0xed6b8240)
[  119.465333] Stack: (0xed6b9dd0 to 0xed6ba000)
[  119.478374] 9dc0:                                     edb11f34 00000000 ed6b8000 ee923880
[  119.502880] 9de0: ed6b8000 00000000 ed6b9e48 c00910b9 c06bd43c c008c9d1 00000001 00000000
[  119.527385] 9e00: c008c930 00000000 00000001 ee923880 edc25c80 00000000 00000000 ee3ce000
[  119.551890] 9e20: 00000008 000014a5 00000000 c0091ebd ed6b8000 00000000 00000080 00000000
[  119.576394] 9e40: c00b1a97 00000000 ee252420 ee3cefe0 00000018 00000000 00000008 00000000
[  119.600899] 9e60: 000014a5 00000000 00000000 00000000 00000001 00402002 00000000 00000000
[  119.625404] 9e80: b1daa000 00000000 00101000 00000000 00000000 00000000 ee6c4a14 ee2520c8
[  119.649910] 9ea0: b1daa000 ee2520c0 edc25c80 edc18d80 040600fb ed55db00 ed6b8000 c00b32cb
[  119.674414] 9ec0: ee2520c0 00000000 edc25c80 00000000 00000000 00101000 00000000 ee252420
[  119.698924] 9ee0: 00000101 edc25c80 b1daa000 00000000 b1daa000 ed6b8000 edc25c80 edc18d80
[  119.723430] 9f00: 00101000 00000101 c06ad7e4 c00b37e5 00000000 edc18df8 edc18dd4 000000fb
[  119.747934] 9f20: 00100100 ed6b9f5c 00000001 00000003 00101000 00000000 edc25c80 edc18dd4
[  119.772439] 9f40: 00000000 c00a723b 00000001 00000000 ed6b9f5c c00d4bdd 00000001 00000000
[  119.796944] 9f60: 00000001 00000003 00101000 00000000 00000000 edc25c80 00000000 c00b275d
[  119.821449] 9f80: 00000001 00000000 ffffffff 00000003 00000000 be823718 000000c0 c000cfc4
[  119.845954] 9fa0: ed6b8000 c000ce01 00000003 00000000 00000000 00101000 00000003 00000001
[  119.870459] 9fc0: 00000003 00000000 be823718 000000c0 00000000 00000000 b6fd5000 00000000
[  119.894965] 9fe0: 00000000 be823664 00008bab b6f39588 40000010 00000000 00afbc1e 00000000
[  119.919477] [<c008c62a>] (perf_event_aux_ctx) from [<c008c9d1>] (perf_event_aux+0xa1/0xd4)
[  119.944251] [<c008c9d1>] (perf_event_aux) from [<c0091ebd>] (perf_event_mmap+0xf9/0x190)
[  119.968506] [<c0091ebd>] (perf_event_mmap) from [<c00b32cb>] (mmap_region+0xd7/0x418)
[  119.991973] [<c00b32cb>] (mmap_region) from [<c00b37e5>] (do_mmap_pgoff+0x1d9/0x244)
[  120.015184] [<c00b37e5>] (do_mmap_pgoff) from [<c00a723b>] (vm_mmap_pgoff+0x5b/0x74)
[  120.038389] [<c00a723b>] (vm_mmap_pgoff) from [<c00b275d>] (SyS_mmap_pgoff+0x61/0xa4)
[  120.061861] [<c00b275d>] (SyS_mmap_pgoff) from [<c000ce01>] (ret_fast_syscall+0x1/0x44)
[  120.085847] Code: 9301 9c01 42ac d00e (6c23) 2b00
[  120.100239] ---[ end trace c41e3da6a7630bd4 ]---
[  120.114104] note: perf_fail[2268] exited with preempt_count 2

Drew

--->8

#include <assert.h>
#include <fcntl.h>
#include <linux/perf_event.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>

#define NR_CPUS 16
#define BUF_SIZE (1<<20)
#define MASK (BUF_SIZE - 1)

static void *bufs[NR_CPUS];
static int fds[NR_CPUS][3];
static long page_size;
static int nr_cpu_ids;

static int sys_perf_event_open(struct perf_event_attr *const attr, const pid_t pid, const int cpu, const int group_fd, const unsigned long flags) {
	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
}

static long read_long(const char *const path)
{
	char buf[32];
	ssize_t bytes;
	int fd;

	fd = open(path, O_RDONLY);
	assert(fd >= 0);
	bytes = read(fd, buf, sizeof(buf) - 1);
	assert(bytes > 0);
	buf[bytes] = '\0';
	close(fd);

	return strtol(buf, NULL, 0);
}

static int write_cpu_online(const char online)
{
	ssize_t bytes;
	int fd;

	fd = open("/sys/devices/system/cpu/cpu1/online", O_WRONLY);
	assert(fd >= 0);
	bytes = write(fd, &online, sizeof(online));
	close(fd);

	return bytes == sizeof(online);
}

static void *busy_loop(void *arg)
{
	(void)arg;

	for (;;);

	return NULL;
}

static void create_threads(void)
{
	pthread_t thread;
	int cpu;
	int result;

	for (cpu = 0; cpu < 2*nr_cpu_ids; ++cpu) {
		result = pthread_create(&thread, NULL, busy_loop, NULL);
		assert(result == 0);
	}
}

static void start_perf(void)
{
	struct perf_event_attr pea = {
		.size = sizeof(pea),
		.read_format = PERF_FORMAT_ID | PERF_FORMAT_GROUP,
		.disabled = 1,
		.watermark = 1,
		.wakeup_watermark = 3 * BUF_SIZE / 4,
	};
	long sched_switch_id = read_long("/sys/kernel/debug/tracing/events/sched/sched_switch/id");
	int cpu;
	int i;
	int result;

	assert(sched_switch_id >= 0);

	// Setup perf
	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
		pea.type = PERF_TYPE_TRACEPOINT;
		pea.config = sched_switch_id;
		pea.sample_period = 1;
		pea.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_ID | PERF_SAMPLE_RAW,
		pea.pinned = 1;
		pea.mmap = 1;
		pea.comm = 1;
		pea.task = 1;
		pea.sample_id_all = 1;
		fds[cpu][0] = sys_perf_event_open(&pea, -1, cpu, -1, 0);
		assert(fds[cpu][0] >= 0);
		bufs[cpu] = mmap(NULL, page_size + BUF_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fds[cpu][0], 0);
		assert(bufs[cpu] != MAP_FAILED);

		pea.pinned = 0;
		pea.mmap = 0;
		pea.comm = 0;
		pea.task = 0;
		pea.sample_id_all = 0;

		pea.type = PERF_TYPE_SOFTWARE;
		pea.config = PERF_COUNT_SW_CPU_CLOCK;
		pea.sample_period = 1000000;
		pea.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_ID | PERF_SAMPLE_TID | PERF_SAMPLE_CALLCHAIN;
		fds[cpu][1] = sys_perf_event_open(&pea, -1, cpu, fds[cpu][0], PERF_FLAG_FD_OUTPUT);
		assert(fds[cpu][1] >= 0);
		result = ioctl(fds[cpu][1], PERF_EVENT_IOC_SET_OUTPUT, fds[cpu][0]);
		assert(result == 0);

		pea.type = PERF_TYPE_HARDWARE;
		pea.config = PERF_COUNT_HW_CPU_CYCLES;
		pea.sample_period = 0;
		pea.sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_READ | PERF_SAMPLE_ID;
		fds[cpu][2] = sys_perf_event_open(&pea, -1, cpu, fds[cpu][0], PERF_FLAG_FD_OUTPUT);
		assert(fds[cpu][2] >= 0);
		result = ioctl(fds[cpu][2], PERF_EVENT_IOC_SET_OUTPUT, fds[cpu][0]);
		assert(result == 0);
	}

	// Start perf
	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
		for (i = 0; i < (int)(sizeof(fds[cpu])/sizeof(fds[cpu][0])); ++i) {
			result = ioctl(fds[cpu][i], PERF_EVENT_IOC_ENABLE);
			assert(result == 0);
		}
	}
}

static void read_perf(void)
{
	int cpu;

	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
		if (bufs[cpu] != MAP_FAILED) {
			// Take a snapshot of the positions
			struct perf_event_mmap_page *pemp = (struct perf_event_mmap_page *)bufs[cpu];
			const __u64 head = pemp->data_head;
			__u64 tail = pemp->data_tail;

			if (head > tail) {
				printf("cpu %i has data\n", cpu);
				/*
				int header_print_count = 5;
				while (head > tail) {
					struct perf_event_header *const peh = (struct perf_event_header *)(bufs[cpu] + page_size + (tail % MASK));
					if (header_print_count > 0) {
						printf("header = {type = %i, misc = %i, size = %i}\n", peh->type, peh->misc, peh->size);
						--header_print_count;
					}
					if (peh->size <= 0) {
						printf("Found odd header\n");
						tail = head;
						break;
					}
					if (tail + peh->size > head) {
						break;
					}
					tail += peh->size;
				}
				*/

				// Update tail with the data read
				pemp->data_tail = tail;
			}
		}
	}
}

static void stop_perf(void)
{
	int cpu;
	int i;
	int result;

	// Stop perf
	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
		for (i = 0; i < (int)(sizeof(fds[cpu])/sizeof(fds[cpu][0])); ++i) {
			result = ioctl(fds[cpu][i], PERF_EVENT_IOC_DISABLE);
			assert(result == 0);
		}
	}

	// Cleanup perf
	for (cpu = 0; cpu < nr_cpu_ids; ++cpu) {
		munmap(bufs[cpu], page_size + BUF_SIZE);
		for (i = 0; i < (int)(sizeof(fds[cpu])/sizeof(fds[cpu][0])); ++i) {
			close(fds[cpu][i]);
		}
	}
}

int main(void)
{
	int result;

	page_size = sysconf(_SC_PAGE_SIZE);
	assert(page_size > 0);
	nr_cpu_ids = sysconf(_SC_NPROCESSORS_CONF);
	assert(nr_cpu_ids > 0 && nr_cpu_ids <= NR_CPUS);

	write_cpu_online('1');
	create_threads();

	printf("Starting perf\n");
	start_perf();
	sleep(10);

	printf("Offlining cpu1\n");
	result = write_cpu_online('0');
	assert(result);
	sleep(1);

	read_perf();
	sleep(10);

	read_perf();
	stop_perf();
	write_cpu_online('1');

	return 0;
}

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-10 22:17 Perf Oops on 3.14-rc2 Drew Richardson
@ 2014-02-18 10:18 ` Will Deacon
  2014-02-18 10:52   ` Peter Zijlstra
                     ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Will Deacon @ 2014-02-18 10:18 UTC (permalink / raw)
  To: Drew Richardson
  Cc: linux-kernel, Peter Zijlstra, Arnaldo, Pawel Moll, Wade Cherry

On Mon, Feb 10, 2014 at 10:17:59PM +0000, Drew Richardson wrote:
> While adding CPU on/offlining support during perf captures I get an
> Oops both on ARM as well as my desktop x86_64. Below is a small
> program that duplicates the issue.

[...]

FWIW I can reproduce this easily with -rc3 on my x86 laptop running
hackbench in parallel with a tweaked version of your test (using
_SC_NPROCESSORS_ONLN instead of _SC_NPROCESSORS_CONF and hotplugging off
both CPU2 and CPU3).

I've included part of the log below, but I also saw a WARN which I
unfortunately only managed to get a photo of.

  http://www.willdeacon.ukfsn.org/bitbucket/oopsen/x86/

Will

--->8

BUG: unable to handle kernel NULL pointer dereference at 0000000000000078
IP: [<ffffffff811195fe>] perf_event_aux_ctx+0x4e/0x80
PGD 0
Oops: 0000 [#1608] SMP
Modules linked in:
CPU: 1 PID: 2270 Comm: hackbench Tainted: G      D W    3.14.0-rc3 #2
Hardware name: System76, Inc. Lemur Ultra/Lemur Ultra, BIOS 4.6.4 10/07/2011
task: ffff8800d1ec6180 ti: ffff8800d1fb4000 task.ti: ffff8800d1fb4000
RIP: 0010:[<ffffffff811195fe>]  [<ffffffff811195fe>] perf_event_aux_ctx+0x4e/0x80
RSP: 0018:ffff8800d1fb5d98  EFLAGS: 00010207
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000000fffd8
RDX: 0000000000000001 RSI: 00000000000fffd8 RDI: ffff8800d1fb5c98
RBP: ffff88021fa55e60 R08: 00000000000fffd8 R09: 00000000df1aa3e9
R10: ffffffff8108790e R11: ffffea000347dec0 R12: ffff8800d1fb5e18
R13: ffffffff8111f580 R14: ffff8800d1fb5e18 R15: ffff8800d1ec6180
FS:  0000000000000000(0000) GS:ffff88021fa40000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000078 CR3: 0000000001ad5000 CR4: 00000000000407e0
Stack:
 00007f8618b68cc0 0000000000000000 ffff8800d1ec6180 ffffffff81ae17e0
 0000000000000000 ffffffff8111f580 ffff8800d1ec6180 ffffffff811196cb
 0000000000000064 ffffffff81ae17e0 0000000000000004 0000000000000000
Call Trace:
 [<ffffffff8111f580>] ? perf_event_task_tick+0xe0/0xe0
 [<ffffffff811196cb>] ? perf_event_aux+0x9b/0xd0
 [<ffffffff81119e48>] ? perf_event_task+0x78/0xa0
 [<ffffffff81122c80>] ? perf_event_exit_task+0xb0/0x200
 [<ffffffff81087928>] ? do_exit+0x2b8/0xa00
 [<ffffffff8117798c>] ? vfs_write+0x17c/0x1e0
 [<ffffffff81088178>] ? do_group_exit+0x38/0xa0
 [<ffffffff810881f2>] ? SyS_exit_group+0x12/0x20
 [<ffffffff817be922>] ? system_call_fastpath+0x16/0x1b
Code: 39 eb 75 27 eb 47 0f 1f 80 00 00 00 00 65 8b 14 25 1c b0 00 00 39 d0 74 26 48 8b 03 48 89 44 24 08 48 8b 5c 24 08 48 39 eb 74 22 <44> 8b 4b 78 45 85 c9 78 e5 8b 83 3c 02 00 00 83 f8 ff 75 ce 4c 
RIP  [<ffffffff811195fe>] perf_event_aux_ctx+0x4e/0x80
 RSP <ffff8800d1fb5d98>
CR2: 0000000000000078
---[ end trace 0e9345db7c92edc0 ]---
Fixing recursive fault but reboot is needed!

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-18 10:18 ` Will Deacon
@ 2014-02-18 10:52   ` Peter Zijlstra
  2014-02-18 11:03     ` Will Deacon
  2014-02-18 10:54   ` Peter Zijlstra
  2014-02-19 16:28   ` Peter Zijlstra
  2 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2014-02-18 10:52 UTC (permalink / raw)
  To: Will Deacon
  Cc: Drew Richardson, linux-kernel, Arnaldo, Pawel Moll, Wade Cherry

On Tue, Feb 18, 2014 at 10:18:31AM +0000, Will Deacon wrote:
> I've included part of the log below, but I also saw a WARN which I
> unfortunately only managed to get a photo of.
> 
>   http://www.willdeacon.ukfsn.org/bitbucket/oopsen/x86/
> 

Known issue that; we've been chasing it for a while. The most recent
thread can be found here:

  lkml.kernel.org/r/20140130190253.GA11819@redhat.com

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-18 10:18 ` Will Deacon
  2014-02-18 10:52   ` Peter Zijlstra
@ 2014-02-18 10:54   ` Peter Zijlstra
  2014-02-18 11:01     ` Will Deacon
  2014-02-19 16:28   ` Peter Zijlstra
  2 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2014-02-18 10:54 UTC (permalink / raw)
  To: Will Deacon
  Cc: Drew Richardson, linux-kernel, Arnaldo, Pawel Moll, Wade Cherry

On Tue, Feb 18, 2014 at 10:18:31AM +0000, Will Deacon wrote:
> On Mon, Feb 10, 2014 at 10:17:59PM +0000, Drew Richardson wrote:
> > While adding CPU on/offlining support during perf captures I get an
> > Oops both on ARM as well as my desktop x86_64. Below is a small
> > program that duplicates the issue.
> 
> [...]
> 
> FWIW I can reproduce this easily with -rc3 on my x86 laptop running
> hackbench in parallel with a tweaked version of your test (using
> _SC_NPROCESSORS_ONLN instead of _SC_NPROCESSORS_CONF and hotplugging off
> both CPU2 and CPU3).

-ENOTEST

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-18 10:54   ` Peter Zijlstra
@ 2014-02-18 11:01     ` Will Deacon
  0 siblings, 0 replies; 12+ messages in thread
From: Will Deacon @ 2014-02-18 11:01 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Drew Richardson, linux-kernel, Arnaldo, Pawel Moll, Wade Cherry

On Tue, Feb 18, 2014 at 10:54:34AM +0000, Peter Zijlstra wrote:
> On Tue, Feb 18, 2014 at 10:18:31AM +0000, Will Deacon wrote:
> > On Mon, Feb 10, 2014 at 10:17:59PM +0000, Drew Richardson wrote:
> > > While adding CPU on/offlining support during perf captures I get an
> > > Oops both on ARM as well as my desktop x86_64. Below is a small
> > > program that duplicates the issue.
> > 
> > [...]
> > 
> > FWIW I can reproduce this easily with -rc3 on my x86 laptop running
> > hackbench in parallel with a tweaked version of your test (using
> > _SC_NPROCESSORS_ONLN instead of _SC_NPROCESSORS_CONF and hotplugging off
> > both CPU2 and CPU3).
> 
> -ENOTEST

Sorry, I trimmed it from my reply since it's not especially concise. It's
inlined in the original post:

  http://lkml.kernel.org/r/20140210221758.GB11542@dreric01-Precision-T1600

Will

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-18 10:52   ` Peter Zijlstra
@ 2014-02-18 11:03     ` Will Deacon
  0 siblings, 0 replies; 12+ messages in thread
From: Will Deacon @ 2014-02-18 11:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Drew Richardson, linux-kernel, Arnaldo, Pawel Moll, Wade Cherry

On Tue, Feb 18, 2014 at 10:52:00AM +0000, Peter Zijlstra wrote:
> On Tue, Feb 18, 2014 at 10:18:31AM +0000, Will Deacon wrote:
> > I've included part of the log below, but I also saw a WARN which I
> > unfortunately only managed to get a photo of.
> > 
> >   http://www.willdeacon.ukfsn.org/bitbucket/oopsen/x86/
> > 
> 
> Known issue that; we've been chasing it for a while. The most recent
> thread can be found here:
> 
>   lkml.kernel.org/r/20140130190253.GA11819@redhat.com

Thanks for the pointer, Peter.

Will

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-18 10:18 ` Will Deacon
  2014-02-18 10:52   ` Peter Zijlstra
  2014-02-18 10:54   ` Peter Zijlstra
@ 2014-02-19 16:28   ` Peter Zijlstra
  2014-02-19 18:03     ` Stephane Eranian
  2014-02-19 19:37     ` Drew Richardson
  2 siblings, 2 replies; 12+ messages in thread
From: Peter Zijlstra @ 2014-02-19 16:28 UTC (permalink / raw)
  To: Will Deacon
  Cc: Drew Richardson, linux-kernel, Arnaldo, Pawel Moll, Wade Cherry,
	Stephane Eranian

On Tue, Feb 18, 2014 at 10:18:31AM +0000, Will Deacon wrote:
> On Mon, Feb 10, 2014 at 10:17:59PM +0000, Drew Richardson wrote:
> > While adding CPU on/offlining support during perf captures I get an
> > Oops both on ARM as well as my desktop x86_64. Below is a small
> > program that duplicates the issue.
> 
> [...]
> 
> FWIW I can reproduce this easily with -rc3 on my x86 laptop running
> hackbench in parallel with a tweaked version of your test (using
> _SC_NPROCESSORS_ONLN instead of _SC_NPROCESSORS_CONF and hotplugging off
> both CPU2 and CPU3).
> 

OK; found it, or at least, I stopped being able to make my box explode.

Drew, can you try the below and confirm? Its still got all the debug goo
in too, but *shees* took me long enough.

Stephane, there's two task_function_call()s in the CGROUP_PERF code that
don't check return codes; can you please audit those?

---
 kernel/events/core.c | 128 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 101 insertions(+), 27 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2067cbb378eb..7c378fc21bff 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -78,7 +78,7 @@ static void remote_function(void *data)
  *	    -ESRCH  - when the process isn't running
  *	    -EAGAIN - when the process moved away
  */
-static int
+static int __must_check
 task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
 {
 	struct remote_function_call data = {
@@ -103,7 +103,8 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
  *
  * returns: @func return value or -ENXIO when the cpu is offline
  */
-static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
+static int __must_check
+cpu_function_call(int cpu, int (*func) (void *info), void *info)
 {
 	struct remote_function_call data = {
 		.p	= NULL,
@@ -1500,14 +1501,23 @@ static void perf_remove_from_context(struct perf_event *event)
 
 	if (!task) {
 		/*
-		 * Per cpu events are removed via an smp call and
-		 * the removal is always successful.
+		 * Per cpu events are removed via an smp call.
 		 */
-		cpu_function_call(event->cpu, __perf_remove_from_context, event);
+retry1:
+		if (!cpu_function_call(event->cpu, __perf_remove_from_context, event))
+			return;
+
+		raw_spin_lock_irq(&ctx->lock);
+		if (ctx->is_active) {
+			raw_spin_unlock_irq(&ctx->lock);
+			goto retry1;
+		}
+		list_del_event(event, ctx);
+		raw_spin_unlock_irq(&ctx->lock);
 		return;
 	}
 
-retry:
+retry2:
 	if (!task_function_call(task, __perf_remove_from_context, event))
 		return;
 
@@ -1518,7 +1528,7 @@ static void perf_remove_from_context(struct perf_event *event)
 	 */
 	if (ctx->is_active) {
 		raw_spin_unlock_irq(&ctx->lock);
-		goto retry;
+		goto retry2;
 	}
 
 	/*
@@ -1592,11 +1602,24 @@ void perf_event_disable(struct perf_event *event)
 		/*
 		 * Disable the event on the cpu that it's on
 		 */
-		cpu_function_call(event->cpu, __perf_event_disable, event);
+retry1:
+		if (!cpu_function_call(event->cpu, __perf_event_disable, event))
+			return;
+
+		raw_spin_lock_irq(&ctx->lock);
+		if (event->state == PERF_EVENT_STATE_ACTIVE) {
+			raw_spin_unlock_irq(&ctx->lock);
+			goto retry1;
+		}
+		if (event->state == PERF_EVENT_STATE_INACTIVE) {
+			update_group_times(event);
+			event->state = PERF_EVENT_STATE_OFF;
+		}
+		raw_spin_unlock_irq(&ctx->lock);
 		return;
 	}
 
-retry:
+retry2:
 	if (!task_function_call(task, __perf_event_disable, event))
 		return;
 
@@ -1611,7 +1634,7 @@ void perf_event_disable(struct perf_event *event)
 		 * a concurrent perf_event_context_sched_out().
 		 */
 		task = ctx->task;
-		goto retry;
+		goto retry2;
 	}
 
 	/*
@@ -1939,14 +1962,22 @@ perf_install_in_context(struct perf_event_context *ctx,
 
 	if (!task) {
 		/*
-		 * Per cpu events are installed via an smp call and
-		 * the install is always successful.
+		 * Per cpu events are installed via an smp call.
 		 */
-		cpu_function_call(cpu, __perf_install_in_context, event);
+retry1:
+		if (!cpu_function_call(cpu, __perf_install_in_context, event))
+			return;
+		raw_spin_lock_irq(&ctx->lock);
+		if (ctx->is_active) {
+			raw_spin_unlock_irq(&ctx->lock);
+			goto retry1;
+		}
+		add_event_to_ctx(event, ctx);
+		raw_spin_unlock_irq(&ctx->lock);
 		return;
 	}
 
-retry:
+retry2:
 	if (!task_function_call(task, __perf_install_in_context, event))
 		return;
 
@@ -1957,7 +1988,7 @@ perf_install_in_context(struct perf_event_context *ctx,
 	 */
 	if (ctx->is_active) {
 		raw_spin_unlock_irq(&ctx->lock);
-		goto retry;
+		goto retry2;
 	}
 
 	/*
@@ -2086,7 +2117,15 @@ void perf_event_enable(struct perf_event *event)
 		/*
 		 * Enable the event on the cpu that it's on
 		 */
-		cpu_function_call(event->cpu, __perf_event_enable, event);
+retry1:
+		if (!cpu_function_call(event->cpu, __perf_event_enable, event))
+			return;
+		raw_spin_lock_irq(&ctx->lock);
+		if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
+			raw_spin_unlock_irq(&ctx->lock);
+			goto retry1;
+		}
+		raw_spin_unlock_irq(&ctx->lock);
 		return;
 	}
 
@@ -2104,7 +2143,7 @@ void perf_event_enable(struct perf_event *event)
 	if (event->state == PERF_EVENT_STATE_ERROR)
 		event->state = PERF_EVENT_STATE_OFF;
 
-retry:
+retry2:
 	if (!ctx->is_active) {
 		__perf_event_mark_enabled(event);
 		goto out;
@@ -2127,7 +2166,7 @@ void perf_event_enable(struct perf_event *event)
 		 * perf_event_context_sched_out()
 		 */
 		task = ctx->task;
-		goto retry;
+		goto retry2;
 	}
 
 out:
@@ -3243,6 +3282,7 @@ static void __free_event(struct perf_event *event)
 
 	call_rcu(&event->rcu_head, free_event_rcu);
 }
+
 static void free_event(struct perf_event *event)
 {
 	irq_work_sync(&event->pending);
@@ -3271,6 +3311,8 @@ static void free_event(struct perf_event *event)
 	if (is_cgroup_event(event))
 		perf_detach_cgroup(event);
 
+	printk("destroying event (%p) : %d:%lx for cpu: %d\n",
+			event, event->attr.type, event->attr.config, event->cpu);
 
 	__free_event(event);
 }
@@ -4831,6 +4873,28 @@ perf_event_aux_ctx(struct perf_event_context *ctx,
 	struct perf_event *event;
 
 	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (!event) {
+			struct perf_event *tmp;
+			printk("on cpu: %d, %p:(%p,%p)\n", smp_processor_id(),
+					&ctx->event_list,
+					ctx->event_list.prev,
+					ctx->event_list.next);
+
+			tmp = ctx->event_list.next;
+			while (tmp) {
+				printk("%02d: event(%p): %d:%lx %d %d\n", 
+						smp_processor_id(), tmp, 
+						tmp ? tmp->attr.type : -1, 
+						tmp ? tmp->attr.config : 0,
+						tmp ? tmp->state : 0,
+						tmp ? tmp->cpu : 0);
+				printk("%02d:  ->next (%p)\n", 
+						smp_processor_id(),
+						tmp->event_entry.next);
+				tmp = tmp->event_entry.next;
+			}
+		}
+
 		if (event->state < PERF_EVENT_STATE_INACTIVE)
 			continue;
 		if (!event_filter_match(event))
@@ -6722,6 +6786,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 	event->state		= PERF_EVENT_STATE_INACTIVE;
 
+	printk("creating event (%p) : %d:%lx for cpu: %d\n",
+			event, attr->type, attr->config, cpu);
+
 	if (task) {
 		event->attach_state = PERF_ATTACH_TASK;
 
@@ -7869,29 +7936,35 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
 
 static void __perf_event_exit_context(void *__info)
 {
-	struct perf_event_context *ctx = __info;
-	struct perf_event *event, *tmp;
+	struct perf_cpu_context *cpuctx = __info;
+	struct perf_event_context *ctx = &cpuctx->ctx;
+	struct perf_event *event;
 
 	perf_pmu_rotate_stop(ctx->pmu);
 
-	list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
-		__perf_remove_from_context(event);
-	list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
-		__perf_remove_from_context(event);
+	raw_spin_lock(&ctx->lock);
+	ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+
+	list_for_each_entry(event, &ctx->event_list, event_entry)
+		event->state = PERF_EVENT_STATE_off;
+
+	raw_spin_unlock(&ctx->lock);
 }
 
 static void perf_event_exit_cpu_context(int cpu)
 {
+	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
 	struct pmu *pmu;
 	int idx;
 
 	idx = srcu_read_lock(&pmus_srcu);
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
+		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+		ctx = &cpuctx->ctx;
 
 		mutex_lock(&ctx->mutex);
-		smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
+		smp_call_function_single(cpu, __perf_event_exit_context, cpuctx, 1);
 		mutex_unlock(&ctx->mutex);
 	}
 	srcu_read_unlock(&pmus_srcu, idx);
@@ -7901,11 +7974,12 @@ static void perf_event_exit_cpu(int cpu)
 {
 	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
 
+	perf_event_exit_cpu_context(cpu);
+
 	mutex_lock(&swhash->hlist_mutex);
 	swevent_hlist_release(swhash);
 	mutex_unlock(&swhash->hlist_mutex);
 
-	perf_event_exit_cpu_context(cpu);
 }
 #else
 static inline void perf_event_exit_cpu(int cpu) { }

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-19 16:28   ` Peter Zijlstra
@ 2014-02-19 18:03     ` Stephane Eranian
  2014-02-19 18:36       ` Peter Zijlstra
  2014-02-19 19:37     ` Drew Richardson
  1 sibling, 1 reply; 12+ messages in thread
From: Stephane Eranian @ 2014-02-19 18:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Will Deacon, Drew Richardson, linux-kernel, Arnaldo, Pawel Moll,
	Wade Cherry

Peter,

On Wed, Feb 19, 2014 at 5:28 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, Feb 18, 2014 at 10:18:31AM +0000, Will Deacon wrote:
>> On Mon, Feb 10, 2014 at 10:17:59PM +0000, Drew Richardson wrote:
>> > While adding CPU on/offlining support during perf captures I get an
>> > Oops both on ARM as well as my desktop x86_64. Below is a small
>> > program that duplicates the issue.
>>
>> [...]
>>
>> FWIW I can reproduce this easily with -rc3 on my x86 laptop running
>> hackbench in parallel with a tweaked version of your test (using
>> _SC_NPROCESSORS_ONLN instead of _SC_NPROCESSORS_CONF and hotplugging off
>> both CPU2 and CPU3).
>>
>
> OK; found it, or at least, I stopped being able to make my box explode.
>
> Drew, can you try the below and confirm? Its still got all the debug goo
> in too, but *shees* took me long enough.
>
> Stephane, there's two task_function_call()s in the CGROUP_PERF code that
> don't check return codes; can you please audit those?
>
I am trying to understand the context here.
Are you saying, we may call an offline CPU?
I saw that sometimes you retry, sometimes you don't.

For perf_cgroup_attach(), we invoke task_function_call()
to force a PMU context switch on the task which is now monitored in cgroup mode.
If the CPU is offline then, the task is switched out and monitoring
has been stoppe,
 no need to retry or do anything more.

For perf_cgroup_exit(), this is pretty much the same logic.

am I missing anything else?

> ---
>  kernel/events/core.c | 128 ++++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 101 insertions(+), 27 deletions(-)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 2067cbb378eb..7c378fc21bff 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -78,7 +78,7 @@ static void remote_function(void *data)
>   *         -ESRCH  - when the process isn't running
>   *         -EAGAIN - when the process moved away
>   */
> -static int
> +static int __must_check
>  task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
>  {
>         struct remote_function_call data = {
> @@ -103,7 +103,8 @@ task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
>   *
>   * returns: @func return value or -ENXIO when the cpu is offline
>   */
> -static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
> +static int __must_check
> +cpu_function_call(int cpu, int (*func) (void *info), void *info)
>  {
>         struct remote_function_call data = {
>                 .p      = NULL,
> @@ -1500,14 +1501,23 @@ static void perf_remove_from_context(struct perf_event *event)
>
>         if (!task) {
>                 /*
> -                * Per cpu events are removed via an smp call and
> -                * the removal is always successful.
> +                * Per cpu events are removed via an smp call.
>                  */
> -               cpu_function_call(event->cpu, __perf_remove_from_context, event);
> +retry1:
> +               if (!cpu_function_call(event->cpu, __perf_remove_from_context, event))
> +                       return;
> +
> +               raw_spin_lock_irq(&ctx->lock);
> +               if (ctx->is_active) {
> +                       raw_spin_unlock_irq(&ctx->lock);
> +                       goto retry1;
> +               }
> +               list_del_event(event, ctx);
> +               raw_spin_unlock_irq(&ctx->lock);
>                 return;
>         }
>
> -retry:
> +retry2:
>         if (!task_function_call(task, __perf_remove_from_context, event))
>                 return;
>
> @@ -1518,7 +1528,7 @@ static void perf_remove_from_context(struct perf_event *event)
>          */
>         if (ctx->is_active) {
>                 raw_spin_unlock_irq(&ctx->lock);
> -               goto retry;
> +               goto retry2;
>         }
>
>         /*
> @@ -1592,11 +1602,24 @@ void perf_event_disable(struct perf_event *event)
>                 /*
>                  * Disable the event on the cpu that it's on
>                  */
> -               cpu_function_call(event->cpu, __perf_event_disable, event);
> +retry1:
> +               if (!cpu_function_call(event->cpu, __perf_event_disable, event))
> +                       return;
> +
> +               raw_spin_lock_irq(&ctx->lock);
> +               if (event->state == PERF_EVENT_STATE_ACTIVE) {
> +                       raw_spin_unlock_irq(&ctx->lock);
> +                       goto retry1;
> +               }
> +               if (event->state == PERF_EVENT_STATE_INACTIVE) {
> +                       update_group_times(event);
> +                       event->state = PERF_EVENT_STATE_OFF;
> +               }
> +               raw_spin_unlock_irq(&ctx->lock);
>                 return;
>         }
>
> -retry:
> +retry2:
>         if (!task_function_call(task, __perf_event_disable, event))
>                 return;
>
> @@ -1611,7 +1634,7 @@ void perf_event_disable(struct perf_event *event)
>                  * a concurrent perf_event_context_sched_out().
>                  */
>                 task = ctx->task;
> -               goto retry;
> +               goto retry2;
>         }
>
>         /*
> @@ -1939,14 +1962,22 @@ perf_install_in_context(struct perf_event_context *ctx,
>
>         if (!task) {
>                 /*
> -                * Per cpu events are installed via an smp call and
> -                * the install is always successful.
> +                * Per cpu events are installed via an smp call.
>                  */
> -               cpu_function_call(cpu, __perf_install_in_context, event);
> +retry1:
> +               if (!cpu_function_call(cpu, __perf_install_in_context, event))
> +                       return;
> +               raw_spin_lock_irq(&ctx->lock);
> +               if (ctx->is_active) {
> +                       raw_spin_unlock_irq(&ctx->lock);
> +                       goto retry1;
> +               }
> +               add_event_to_ctx(event, ctx);
> +               raw_spin_unlock_irq(&ctx->lock);
>                 return;
>         }
>
> -retry:
> +retry2:
>         if (!task_function_call(task, __perf_install_in_context, event))
>                 return;
>
> @@ -1957,7 +1988,7 @@ perf_install_in_context(struct perf_event_context *ctx,
>          */
>         if (ctx->is_active) {
>                 raw_spin_unlock_irq(&ctx->lock);
> -               goto retry;
> +               goto retry2;
>         }
>
>         /*
> @@ -2086,7 +2117,15 @@ void perf_event_enable(struct perf_event *event)
>                 /*
>                  * Enable the event on the cpu that it's on
>                  */
> -               cpu_function_call(event->cpu, __perf_event_enable, event);
> +retry1:
> +               if (!cpu_function_call(event->cpu, __perf_event_enable, event))
> +                       return;
> +               raw_spin_lock_irq(&ctx->lock);
> +               if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
> +                       raw_spin_unlock_irq(&ctx->lock);
> +                       goto retry1;
> +               }
> +               raw_spin_unlock_irq(&ctx->lock);
>                 return;
>         }
>
> @@ -2104,7 +2143,7 @@ void perf_event_enable(struct perf_event *event)
>         if (event->state == PERF_EVENT_STATE_ERROR)
>                 event->state = PERF_EVENT_STATE_OFF;
>
> -retry:
> +retry2:
>         if (!ctx->is_active) {
>                 __perf_event_mark_enabled(event);
>                 goto out;
> @@ -2127,7 +2166,7 @@ void perf_event_enable(struct perf_event *event)
>                  * perf_event_context_sched_out()
>                  */
>                 task = ctx->task;
> -               goto retry;
> +               goto retry2;
>         }
>
>  out:
> @@ -3243,6 +3282,7 @@ static void __free_event(struct perf_event *event)
>
>         call_rcu(&event->rcu_head, free_event_rcu);
>  }
> +
>  static void free_event(struct perf_event *event)
>  {
>         irq_work_sync(&event->pending);
> @@ -3271,6 +3311,8 @@ static void free_event(struct perf_event *event)
>         if (is_cgroup_event(event))
>                 perf_detach_cgroup(event);
>
> +       printk("destroying event (%p) : %d:%lx for cpu: %d\n",
> +                       event, event->attr.type, event->attr.config, event->cpu);
>
>         __free_event(event);
>  }
> @@ -4831,6 +4873,28 @@ perf_event_aux_ctx(struct perf_event_context *ctx,
>         struct perf_event *event;
>
>         list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
> +               if (!event) {
> +                       struct perf_event *tmp;
> +                       printk("on cpu: %d, %p:(%p,%p)\n", smp_processor_id(),
> +                                       &ctx->event_list,
> +                                       ctx->event_list.prev,
> +                                       ctx->event_list.next);
> +
> +                       tmp = ctx->event_list.next;
> +                       while (tmp) {
> +                               printk("%02d: event(%p): %d:%lx %d %d\n",
> +                                               smp_processor_id(), tmp,
> +                                               tmp ? tmp->attr.type : -1,
> +                                               tmp ? tmp->attr.config : 0,
> +                                               tmp ? tmp->state : 0,
> +                                               tmp ? tmp->cpu : 0);
> +                               printk("%02d:  ->next (%p)\n",
> +                                               smp_processor_id(),
> +                                               tmp->event_entry.next);
> +                               tmp = tmp->event_entry.next;
> +                       }
> +               }
> +
>                 if (event->state < PERF_EVENT_STATE_INACTIVE)
>                         continue;
>                 if (!event_filter_match(event))
> @@ -6722,6 +6786,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
>
>         event->state            = PERF_EVENT_STATE_INACTIVE;
>
> +       printk("creating event (%p) : %d:%lx for cpu: %d\n",
> +                       event, attr->type, attr->config, cpu);
> +
>         if (task) {
>                 event->attach_state = PERF_ATTACH_TASK;
>
> @@ -7869,29 +7936,35 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)
>
>  static void __perf_event_exit_context(void *__info)
>  {
> -       struct perf_event_context *ctx = __info;
> -       struct perf_event *event, *tmp;
> +       struct perf_cpu_context *cpuctx = __info;
> +       struct perf_event_context *ctx = &cpuctx->ctx;
> +       struct perf_event *event;
>
>         perf_pmu_rotate_stop(ctx->pmu);
>
> -       list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
> -               __perf_remove_from_context(event);
> -       list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
> -               __perf_remove_from_context(event);
> +       raw_spin_lock(&ctx->lock);
> +       ctx_sched_out(ctx, cpuctx, EVENT_ALL);
> +
> +       list_for_each_entry(event, &ctx->event_list, event_entry)
> +               event->state = PERF_EVENT_STATE_off;
> +
> +       raw_spin_unlock(&ctx->lock);
>  }
>
>  static void perf_event_exit_cpu_context(int cpu)
>  {
> +       struct perf_cpu_context *cpuctx;
>         struct perf_event_context *ctx;
>         struct pmu *pmu;
>         int idx;
>
>         idx = srcu_read_lock(&pmus_srcu);
>         list_for_each_entry_rcu(pmu, &pmus, entry) {
> -               ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
> +               cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
> +               ctx = &cpuctx->ctx;
>
>                 mutex_lock(&ctx->mutex);
> -               smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
> +               smp_call_function_single(cpu, __perf_event_exit_context, cpuctx, 1);
>                 mutex_unlock(&ctx->mutex);
>         }
>         srcu_read_unlock(&pmus_srcu, idx);
> @@ -7901,11 +7974,12 @@ static void perf_event_exit_cpu(int cpu)
>  {
>         struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
>
> +       perf_event_exit_cpu_context(cpu);
> +
>         mutex_lock(&swhash->hlist_mutex);
>         swevent_hlist_release(swhash);
>         mutex_unlock(&swhash->hlist_mutex);
>
> -       perf_event_exit_cpu_context(cpu);
>  }
>  #else
>  static inline void perf_event_exit_cpu(int cpu) { }

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-19 18:03     ` Stephane Eranian
@ 2014-02-19 18:36       ` Peter Zijlstra
  2014-02-19 19:59         ` Stephane Eranian
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2014-02-19 18:36 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Will Deacon, Drew Richardson, linux-kernel, Arnaldo, Pawel Moll,
	Wade Cherry

On Wed, Feb 19, 2014 at 07:03:13PM +0100, Stephane Eranian wrote:
> I am trying to understand the context here.
> Are you saying, we may call an offline CPU?

Yes, that is what's happening.

> I saw that sometimes you retry, sometimes you don't.

I tried to do exactly what we do for the task case which is far more
likely to fail. Could be I messed up.

I should probably write the function differently and have a common retry
path instead of duplicating everything.

> For perf_cgroup_attach(), we invoke task_function_call()
> to force a PMU context switch on the task which is now monitored in cgroup mode.
> If the CPU is offline then, the task is switched out and monitoring
> has been stoppe,
>  no need to retry or do anything more.
> 
> For perf_cgroup_exit(), this is pretty much the same logic.
> 
> am I missing anything else?

Don't think so; I'll add a comment there. I was just too tired to make
sense of things.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-19 16:28   ` Peter Zijlstra
  2014-02-19 18:03     ` Stephane Eranian
@ 2014-02-19 19:37     ` Drew Richardson
  1 sibling, 0 replies; 12+ messages in thread
From: Drew Richardson @ 2014-02-19 19:37 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Will Deacon, linux-kernel, Arnaldo, Pawel Moll, Wade Cherry,
	Stephane Eranian

On Wed, Feb 19, 2014 at 04:28:19PM +0000, Peter Zijlstra wrote:
> On Tue, Feb 18, 2014 at 10:18:31AM +0000, Will Deacon wrote:
> > On Mon, Feb 10, 2014 at 10:17:59PM +0000, Drew Richardson wrote:
> > > While adding CPU on/offlining support during perf captures I get an
> > > Oops both on ARM as well as my desktop x86_64. Below is a small
> > > program that duplicates the issue.
> > 
> > [...]
> > 
> > FWIW I can reproduce this easily with -rc3 on my x86 laptop running
> > hackbench in parallel with a tweaked version of your test (using
> > _SC_NPROCESSORS_ONLN instead of _SC_NPROCESSORS_CONF and hotplugging off
> > both CPU2 and CPU3).
> > 
> 
> OK; found it, or at least, I stopped being able to make my box explode.
> 
> Drew, can you try the below and confirm? Its still got all the debug goo
> in too, but *shees* took me long enough.

This works great for me. No Oops or garbage data when a core is
offlined.

> +	raw_spin_lock(&ctx->lock);
> +	ctx_sched_out(ctx, cpuctx, EVENT_ALL);
> +
> +	list_for_each_entry(event, &ctx->event_list, event_entry)
> +		event->state = PERF_EVENT_STATE_off;
> +
> +	raw_spin_unlock(&ctx->lock);

I changed PERF_EVENT_STATE_off to PERF_EVENT_STATE_OFF.

Drew

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-19 18:36       ` Peter Zijlstra
@ 2014-02-19 19:59         ` Stephane Eranian
  2014-02-19 20:24           ` Peter Zijlstra
  0 siblings, 1 reply; 12+ messages in thread
From: Stephane Eranian @ 2014-02-19 19:59 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Will Deacon, Drew Richardson, linux-kernel, Arnaldo, Pawel Moll,
	Wade Cherry

On Wed, Feb 19, 2014 at 7:36 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Feb 19, 2014 at 07:03:13PM +0100, Stephane Eranian wrote:
>> I am trying to understand the context here.
>> Are you saying, we may call an offline CPU?
>
> Yes, that is what's happening.
>
>> I saw that sometimes you retry, sometimes you don't.
>
> I tried to do exactly what we do for the task case which is far more
> likely to fail. Could be I messed up.
>
I am not sure why you need to retry. If the CPU is offline, it is offline.
Or are you saying, you get an error, but you don't know the exact
reason, thus you keep trying? But how do you get out of this if
the CPU stays offline?


> I should probably write the function differently and have a common retry
> path instead of duplicating everything.
>
>> For perf_cgroup_attach(), we invoke task_function_call()
>> to force a PMU context switch on the task which is now monitored in cgroup mode.
>> If the CPU is offline then, the task is switched out and monitoring
>> has been stoppe,
>>  no need to retry or do anything more.
>>
>> For perf_cgroup_exit(), this is pretty much the same logic.
>>
>> am I missing anything else?
>
> Don't think so; I'll add a comment there. I was just too tired to make
> sense of things.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: Perf Oops on 3.14-rc2
  2014-02-19 19:59         ` Stephane Eranian
@ 2014-02-19 20:24           ` Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: Peter Zijlstra @ 2014-02-19 20:24 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Will Deacon, Drew Richardson, linux-kernel, Arnaldo, Pawel Moll,
	Wade Cherry

On Wed, Feb 19, 2014 at 08:59:08PM +0100, Stephane Eranian wrote:
> On Wed, Feb 19, 2014 at 7:36 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> > On Wed, Feb 19, 2014 at 07:03:13PM +0100, Stephane Eranian wrote:
> >> I am trying to understand the context here.
> >> Are you saying, we may call an offline CPU?
> >
> > Yes, that is what's happening.
> >
> >> I saw that sometimes you retry, sometimes you don't.
> >
> > I tried to do exactly what we do for the task case which is far more
> > likely to fail. Could be I messed up.
> >
> I am not sure why you need to retry. If the CPU is offline, it is offline.
> Or are you saying, you get an error, but you don't know the exact
> reason, thus you keep trying? But how do you get out of this if
> the CPU stays offline?

Ah, so take perf_remove_from_context() as before the patch; if the
cpu_function_call() fails because the CPU is offline, it doesn't call
list_del_event().

Now the offline function is supposed to take them off the list, but it
doesn't actually in case they're grouped.

This leaves a free()d event on the offline cpu's context list.

After that things quickly go downwards.

But before I got there I was led down a few too many rabbit holes trying
to figure out wtf happened.


We could probably fix it differently though. But by the time I more or
less understood things I was too tired to make something pretty.

Anyway; if you get to do something if cpu_function_call() fails; you
have to also check if it got back up since you tried; at which point
you've got the same pattern as we have for task_function_call().

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-02-19 20:24 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-02-10 22:17 Perf Oops on 3.14-rc2 Drew Richardson
2014-02-18 10:18 ` Will Deacon
2014-02-18 10:52   ` Peter Zijlstra
2014-02-18 11:03     ` Will Deacon
2014-02-18 10:54   ` Peter Zijlstra
2014-02-18 11:01     ` Will Deacon
2014-02-19 16:28   ` Peter Zijlstra
2014-02-19 18:03     ` Stephane Eranian
2014-02-19 18:36       ` Peter Zijlstra
2014-02-19 19:59         ` Stephane Eranian
2014-02-19 20:24           ` Peter Zijlstra
2014-02-19 19:37     ` Drew Richardson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.