linux-edac.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-11-01 16:47 Borislav Petkov
  0 siblings, 0 replies; 13+ messages in thread
From: Borislav Petkov @ 2017-11-01 16:47 UTC (permalink / raw)
  To: Laura Abbott, Luck, Tony
  Cc: Andi Kleen, Jeremy Cline, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86, linux-edac, linux-kernel

On Wed, Nov 01, 2017 at 07:56:20AM -0700, Laura Abbott wrote:
> Did these get queued up somewhere? I know last week was OSSEU/Ksummit
> so people may still be playing catch-up.

Ah, thanks for the reminder.

/me goes and dusts it off.

Ok, here it is, injecting some MCEs in a guest seems to work fine,
mcelog logs them.

Tony?
---
From: Borislav Petkov <bp@suse.de>
Date: Wed, 11 Oct 2017 15:49:19 +0200
Subject: [PATCH] x86/mcelog: Get rid of RCU remnants

/dev/mcelog is called in process context now as part of the notifier
chain and doesn't need any of the fancy RCU and lockless accesses which
it did in atomic context.

Axe it all in favor of a simple mutex synchronization.

Reported-by: Jeremy Cline <jcline@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1498969
---
 arch/x86/kernel/cpu/mcheck/dev-mcelog.c | 121 +++++++-------------------------
 1 file changed, 27 insertions(+), 94 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 10cec43aac38..7f85b76f43bc 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -24,14 +24,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
 static char mce_helper[128];
 static char *mce_helper_argv[2] = { mce_helper, NULL };
 
-#define mce_log_get_idx_check(p) \
-({ \
-	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
-			 !lockdep_is_held(&mce_chrdev_read_mutex), \
-			 "suspicious mce_log_get_idx_check() usage"); \
-	smp_load_acquire(&(p)); \
-})
-
 /*
  * Lockless MCE logging infrastructure.
  * This avoids deadlocks on printk locks without having to break locks. Also
@@ -53,43 +45,32 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 				void *data)
 {
 	struct mce *mce = (struct mce *)data;
-	unsigned int next, entry;
-
-	wmb();
-	for (;;) {
-		entry = mce_log_get_idx_check(mcelog.next);
-		for (;;) {
-
-			/*
-			 * When the buffer fills up discard new entries.
-			 * Assume that the earlier errors are the more
-			 * interesting ones:
-			 */
-			if (entry >= MCE_LOG_LEN) {
-				set_bit(MCE_OVERFLOW,
-					(unsigned long *)&mcelog.flags);
-				return NOTIFY_OK;
-			}
-			/* Old left over entry. Skip: */
-			if (mcelog.entry[entry].finished) {
-				entry++;
-				continue;
-			}
-			break;
-		}
-		smp_rmb();
-		next = entry + 1;
-		if (cmpxchg(&mcelog.next, entry, next) == entry)
-			break;
+	unsigned int entry;
+
+	mutex_lock(&mce_chrdev_read_mutex);
+
+	entry = mcelog.next;
+
+	/*
+	 * When the buffer fills up discard new entries. Assume that the
+	 * earlier errors are the more interesting ones:
+	 */
+	if (entry >= MCE_LOG_LEN) {
+		set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+		goto unlock;
 	}
+
+	mcelog.next = entry + 1;
+
 	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
-	wmb();
 	mcelog.entry[entry].finished = 1;
-	wmb();
 
 	/* wake processes polling /dev/mcelog */
 	wake_up_interruptible(&mce_chrdev_wait);
 
+unlock:
+	mutex_unlock(&mce_chrdev_read_mutex);
+
 	return NOTIFY_OK;
 }
 
@@ -177,13 +158,6 @@ static int mce_chrdev_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static void collect_tscs(void *data)
-{
-	unsigned long *cpu_tsc = (unsigned long *)data;
-
-	cpu_tsc[smp_processor_id()] = rdtsc();
-}
-
 static int mce_apei_read_done;
 
 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */
@@ -231,14 +205,9 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
 				size_t usize, loff_t *off)
 {
 	char __user *buf = ubuf;
-	unsigned long *cpu_tsc;
-	unsigned prev, next;
+	unsigned next;
 	int i, err;
 
-	cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
-	if (!cpu_tsc)
-		return -ENOMEM;
-
 	mutex_lock(&mce_chrdev_read_mutex);
 
 	if (!mce_apei_read_done) {
@@ -247,65 +216,29 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
 			goto out;
 	}
 
-	next = mce_log_get_idx_check(mcelog.next);
-
 	/* Only supports full reads right now */
 	err = -EINVAL;
 	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
 		goto out;
 
+	next = mcelog.next;
 	err = 0;
-	prev = 0;
-	do {
-		for (i = prev; i < next; i++) {
-			unsigned long start = jiffies;
-			struct mce *m = &mcelog.entry[i];
-
-			while (!m->finished) {
-				if (time_after_eq(jiffies, start + 2)) {
-					memset(m, 0, sizeof(*m));
-					goto timeout;
-				}
-				cpu_relax();
-			}
-			smp_rmb();
-			err |= copy_to_user(buf, m, sizeof(*m));
-			buf += sizeof(*m);
-timeout:
-			;
-		}
-
-		memset(mcelog.entry + prev, 0,
-		       (next - prev) * sizeof(struct mce));
-		prev = next;
-		next = cmpxchg(&mcelog.next, prev, 0);
-	} while (next != prev);
-
-	synchronize_sched();
 
-	/*
-	 * Collect entries that were still getting written before the
-	 * synchronize.
-	 */
-	on_each_cpu(collect_tscs, cpu_tsc, 1);
-
-	for (i = next; i < MCE_LOG_LEN; i++) {
+	for (i = 0; i < next; i++) {
 		struct mce *m = &mcelog.entry[i];
 
-		if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
-			err |= copy_to_user(buf, m, sizeof(*m));
-			smp_rmb();
-			buf += sizeof(*m);
-			memset(m, 0, sizeof(*m));
-		}
+		err |= copy_to_user(buf, m, sizeof(*m));
+		buf += sizeof(*m);
 	}
 
+	memset(mcelog.entry, 0, next * sizeof(struct mce));
+	mcelog.next = 0;
+
 	if (err)
 		err = -EFAULT;
 
 out:
 	mutex_unlock(&mce_chrdev_read_mutex);
-	kfree(cpu_tsc);
 
 	return err ? err : buf - ubuf;
 }

^ permalink raw reply related	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-11-01 20:07 Luck, Tony
  0 siblings, 0 replies; 13+ messages in thread
From: Luck, Tony @ 2017-11-01 20:07 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Laura Abbott, Andi Kleen, Jeremy Cline, Thomas Gleixner,
	Ingo Molnar, H. Peter Anvin, x86, linux-edac, linux-kernel

On Wed, Nov 01, 2017 at 05:47:54PM +0100, Borislav Petkov wrote:
> On Wed, Nov 01, 2017 at 07:56:20AM -0700, Laura Abbott wrote:
> > Did these get queued up somewhere? I know last week was OSSEU/Ksummit
> > so people may still be playing catch-up.
> 
> Ah, thanks for the reminder.
> 
> /me goes and dusts it off.
> 
> Ok, here it is, injecting some MCEs in a guest seems to work fine,
> mcelog logs them.
> 
> Tony?

Yup. I did a bit more testing on bare metal with injection:

1) Just one error
	Worked
2) "kill -STOP `pgrep mcelog`" ; inject 5 errors ; "kill -CONT `pgrep mcelog`"
	Worked (all five errors reported correctly)
3) "kill -STOP `pgrep mcelog`" ; inject 35 errors ; "kill -CONT `pgrep mcelog`"
	mcelog reported Warning: MCE buffer is overflowed.
	First 32 (MCE_LOG_LEN) errors reported correctly.

Reviewed-and-tested-by: Tony Luck <tony.luck@intel.com>

-Tony
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-11-01 14:56 Laura Abbott
  0 siblings, 0 replies; 13+ messages in thread
From: Laura Abbott @ 2017-11-01 14:56 UTC (permalink / raw)
  To: Luck, Tony, Borislav Petkov
  Cc: Andi Kleen, Jeremy Cline, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86, linux-edac, linux-kernel

On 10/16/2017 11:28 AM, Luck, Tony wrote:
> On Sun, Oct 15, 2017 at 11:40:46AM +0200, Borislav Petkov wrote:
>> On Wed, Oct 11, 2017 at 09:34:22PM +0000, Luck, Tony wrote:
>>>> here's a second attempt at a more rigorous simplification: RCU stuff is
>>>> gone and only a single loop scans through the elements.
>>>
>>> The dev_mce_log() changes look good now.
>>>
>>> You can apply the axe to more bits of mce_chrdev_read() though. Like that
>>
>> That provoked a very serious axing. Please check whether I went too far. Hunk
>> below is ontop of what got axed already:
> 
> I think a few more lines can go.  Almost everything relating to the "finished"
> element. dev_mce_log() must still set it (because the user mode mcelog(8)
> daemon will grumble if we give it records that don't have it set). But
> since everything is protected by mce_chrdev_read_mutex we can't have
> "Old left over entries" to skip. Nor is there any way that finished can't
> be set for an entry in 0..mcelog.next when it comes to mce_chrdev_read().
> 
> This patch on top of your two???
> 

Did these get queued up somewhere? I know last week was OSSEU/Ksummit
so people may still be playing catch-up.

Thanks,
Laura

> -Tony
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
> index 17d2bab25720..7f85b76f43bc 100644
> --- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
> +++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
> @@ -49,11 +49,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
>  
>  	mutex_lock(&mce_chrdev_read_mutex);
>  
> -	for (entry = mcelog.next; entry < MCE_LOG_LEN; entry++) {
> -		/* Old left over entry. Skip: */
> -		if (mcelog.entry[entry].finished)
> -			continue;
> -	}
> +	entry = mcelog.next;
>  
>  	/*
>  	 * When the buffer fills up discard new entries. Assume that the
> @@ -231,9 +227,6 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
>  	for (i = 0; i < next; i++) {
>  		struct mce *m = &mcelog.entry[i];
>  
> -		if (!m->finished)
> -			continue;
> -
>  		err |= copy_to_user(buf, m, sizeof(*m));
>  		buf += sizeof(*m);
>  	}
>
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-10-16 18:28 Luck, Tony
  0 siblings, 0 replies; 13+ messages in thread
From: Luck, Tony @ 2017-10-16 18:28 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Andi Kleen, Jeremy Cline, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86, linux-edac, linux-kernel, Laura Abbott

On Sun, Oct 15, 2017 at 11:40:46AM +0200, Borislav Petkov wrote:
> On Wed, Oct 11, 2017 at 09:34:22PM +0000, Luck, Tony wrote:
> > > here's a second attempt at a more rigorous simplification: RCU stuff is
> > > gone and only a single loop scans through the elements.
> > 
> > The dev_mce_log() changes look good now.
> > 
> > You can apply the axe to more bits of mce_chrdev_read() though. Like that
> 
> That provoked a very serious axing. Please check whether I went too far. Hunk
> below is ontop of what got axed already:

I think a few more lines can go.  Almost everything relating to the "finished"
element. dev_mce_log() must still set it (because the user mode mcelog(8)
daemon will grumble if we give it records that don't have it set). But
since everything is protected by mce_chrdev_read_mutex we can't have
"Old left over entries" to skip. Nor is there any way that finished can't
be set for an entry in 0..mcelog.next when it comes to mce_chrdev_read().

This patch on top of your two???

-Tony
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 17d2bab25720..7f85b76f43bc 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -49,11 +49,7 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 
 	mutex_lock(&mce_chrdev_read_mutex);
 
-	for (entry = mcelog.next; entry < MCE_LOG_LEN; entry++) {
-		/* Old left over entry. Skip: */
-		if (mcelog.entry[entry].finished)
-			continue;
-	}
+	entry = mcelog.next;
 
 	/*
 	 * When the buffer fills up discard new entries. Assume that the
@@ -231,9 +227,6 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
 	for (i = 0; i < next; i++) {
 		struct mce *m = &mcelog.entry[i];
 
-		if (!m->finished)
-			continue;
-
 		err |= copy_to_user(buf, m, sizeof(*m));
 		buf += sizeof(*m);
 	}

^ permalink raw reply related	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-10-15  9:40 Borislav Petkov
  0 siblings, 0 replies; 13+ messages in thread
From: Borislav Petkov @ 2017-10-15  9:40 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Andi Kleen, Jeremy Cline, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86, linux-edac, linux-kernel, Laura Abbott

On Wed, Oct 11, 2017 at 09:34:22PM +0000, Luck, Tony wrote:
> > here's a second attempt at a more rigorous simplification: RCU stuff is
> > gone and only a single loop scans through the elements.
> 
> The dev_mce_log() changes look good now.
> 
> You can apply the axe to more bits of mce_chrdev_read() though. Like that

That provoked a very serious axing. Please check whether I went too far. Hunk
below is ontop of what got axed already:

diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 1e1c6d22c93e..17d2bab25720 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -162,13 +162,6 @@ static int mce_chrdev_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static void collect_tscs(void *data)
-{
-	unsigned long *cpu_tsc = (unsigned long *)data;
-
-	cpu_tsc[smp_processor_id()] = rdtsc();
-}
-
 static int mce_apei_read_done;
 
 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */
@@ -216,14 +209,9 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
 				size_t usize, loff_t *off)
 {
 	char __user *buf = ubuf;
-	unsigned long *cpu_tsc;
-	unsigned prev, next;
+	unsigned next;
 	int i, err;
 
-	cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
-	if (!cpu_tsc)
-		return -ENOMEM;
-
 	mutex_lock(&mce_chrdev_read_mutex);
 
 	if (!mce_apei_read_done) {
@@ -232,63 +220,32 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
 			goto out;
 	}
 
-	next = mcelog.next;
-
 	/* Only supports full reads right now */
 	err = -EINVAL;
 	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
 		goto out;
 
+	next = mcelog.next;
 	err = 0;
-	prev = 0;
-	do {
-		for (i = prev; i < next; i++) {
-			unsigned long start = jiffies;
-			struct mce *m = &mcelog.entry[i];
-
-			while (!m->finished) {
-				if (time_after_eq(jiffies, start + 2)) {
-					memset(m, 0, sizeof(*m));
-					goto timeout;
-				}
-				cpu_relax();
-			}
-			smp_rmb();
-			err |= copy_to_user(buf, m, sizeof(*m));
-			buf += sizeof(*m);
-timeout:
-			;
-		}
-
-		memset(mcelog.entry + prev, 0,
-		       (next - prev) * sizeof(struct mce));
-		prev = next;
-		next = cmpxchg(&mcelog.next, prev, 0);
-	} while (next != prev);
-
-	/*
-	 * Collect entries that were still getting written before the
-	 * synchronize.
-	 */
-	on_each_cpu(collect_tscs, cpu_tsc, 1);
 
-	for (i = next; i < MCE_LOG_LEN; i++) {
+	for (i = 0; i < next; i++) {
 		struct mce *m = &mcelog.entry[i];
 
-		if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
-			err |= copy_to_user(buf, m, sizeof(*m));
-			smp_rmb();
-			buf += sizeof(*m);
-			memset(m, 0, sizeof(*m));
-		}
+		if (!m->finished)
+			continue;
+
+		err |= copy_to_user(buf, m, sizeof(*m));
+		buf += sizeof(*m);
 	}
 
+	memset(mcelog.entry, 0, next * sizeof(struct mce));
+	mcelog.next = 0;
+
 	if (err)
 		err = -EFAULT;
 
 out:
 	mutex_unlock(&mce_chrdev_read_mutex);
-	kfree(cpu_tsc);
 
 	return err ? err : buf - ubuf;
 }

^ permalink raw reply related	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-10-12 22:13 Andi Kleen
  0 siblings, 0 replies; 13+ messages in thread
From: Andi Kleen @ 2017-10-12 22:13 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Luck, Tony, Jeremy Cline, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86, linux-edac, linux-kernel, Laura Abbott

On Thu, Oct 12, 2017 at 11:02:52AM +0200, Borislav Petkov wrote:
> On Wed, Oct 11, 2017 at 04:11:37PM -0700, Andi Kleen wrote:
> > Sorry I take back what I wrote earlier. This RCU is actually still needed,
> > otherwise the reader could see partially written entries.
> 
> Why? All the readers and writers take the mutex now.

Ok fair enough.
-Andi
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-10-12  9:02 Borislav Petkov
  0 siblings, 0 replies; 13+ messages in thread
From: Borislav Petkov @ 2017-10-12  9:02 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Luck, Tony, Jeremy Cline, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86, linux-edac, linux-kernel, Laura Abbott

On Wed, Oct 11, 2017 at 04:11:37PM -0700, Andi Kleen wrote:
> Sorry I take back what I wrote earlier. This RCU is actually still needed,
> otherwise the reader could see partially written entries.

Why? All the readers and writers take the mutex now.

^ permalink raw reply	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-10-11 23:11 Andi Kleen
  0 siblings, 0 replies; 13+ messages in thread
From: Andi Kleen @ 2017-10-11 23:11 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Luck, Tony, Jeremy Cline, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86, linux-edac, linux-kernel, Laura Abbott

> -	next = mce_log_get_idx_check(mcelog.next);
> +	next = mcelog.next;
>  
>  	/* Only supports full reads right now */
>  	err = -EINVAL;
> @@ -281,8 +266,6 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
>  		next = cmpxchg(&mcelog.next, prev, 0);
>  	} while (next != prev);
>  
> -	synchronize_sched();

Sorry I take back what I wrote earlier. This RCU is actually still needed,
otherwise the reader could see partially written entries.

So rather have to keep that, and change the read code to run with rcu_read_lock()

-Andi
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-10-11 21:34 Luck, Tony
  0 siblings, 0 replies; 13+ messages in thread
From: Luck, Tony @ 2017-10-11 21:34 UTC (permalink / raw)
  To: Borislav Petkov, Andi Kleen
  Cc: Jeremy Cline, Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86,
	linux-edac, linux-kernel, Laura Abbott

> here's a second attempt at a more rigorous simplification: RCU stuff is
> gone and only a single loop scans through the elements.

The dev_mce_log() changes look good now.

You can apply the axe to more bits of mce_chrdev_read() though. Like that

	while (!m->finished) {

we hold the mutex, the writer of that holds the mutex ... the spin loop is going to always
time out.  

-Tony

^ permalink raw reply	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-10-11 11:50 Borislav Petkov
  0 siblings, 0 replies; 13+ messages in thread
From: Borislav Petkov @ 2017-10-11 11:50 UTC (permalink / raw)
  To: Andi Kleen, Luck, Tony
  Cc: Jeremy Cline, Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86,
	linux-edac, linux-kernel, Laura Abbott

Ok,

here's a second attempt at a more rigorous simplification: RCU stuff is
gone and only a single loop scans through the elements.

diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 10cec43aac38..1e1c6d22c93e 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -24,14 +24,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
 static char mce_helper[128];
 static char *mce_helper_argv[2] = { mce_helper, NULL };
 
-#define mce_log_get_idx_check(p) \
-({ \
-	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
-			 !lockdep_is_held(&mce_chrdev_read_mutex), \
-			 "suspicious mce_log_get_idx_check() usage"); \
-	smp_load_acquire(&(p)); \
-})
-
 /*
  * Lockless MCE logging infrastructure.
  * This avoids deadlocks on printk locks without having to break locks. Also
@@ -53,43 +45,36 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 				void *data)
 {
 	struct mce *mce = (struct mce *)data;
-	unsigned int next, entry;
-
-	wmb();
-	for (;;) {
-		entry = mce_log_get_idx_check(mcelog.next);
-		for (;;) {
-
-			/*
-			 * When the buffer fills up discard new entries.
-			 * Assume that the earlier errors are the more
-			 * interesting ones:
-			 */
-			if (entry >= MCE_LOG_LEN) {
-				set_bit(MCE_OVERFLOW,
-					(unsigned long *)&mcelog.flags);
-				return NOTIFY_OK;
-			}
-			/* Old left over entry. Skip: */
-			if (mcelog.entry[entry].finished) {
-				entry++;
-				continue;
-			}
-			break;
-		}
-		smp_rmb();
-		next = entry + 1;
-		if (cmpxchg(&mcelog.next, entry, next) == entry)
-			break;
+	unsigned int entry;
+
+	mutex_lock(&mce_chrdev_read_mutex);
+
+	for (entry = mcelog.next; entry < MCE_LOG_LEN; entry++) {
+		/* Old left over entry. Skip: */
+		if (mcelog.entry[entry].finished)
+			continue;
+	}
+
+	/*
+	 * When the buffer fills up discard new entries. Assume that the
+	 * earlier errors are the more interesting ones:
+	 */
+	if (entry >= MCE_LOG_LEN) {
+		set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+		goto unlock;
 	}
+
+	mcelog.next = entry + 1;
+
 	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
-	wmb();
 	mcelog.entry[entry].finished = 1;
-	wmb();
 
 	/* wake processes polling /dev/mcelog */
 	wake_up_interruptible(&mce_chrdev_wait);
 
+unlock:
+	mutex_unlock(&mce_chrdev_read_mutex);
+
 	return NOTIFY_OK;
 }
 
@@ -247,7 +232,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
 			goto out;
 	}
 
-	next = mce_log_get_idx_check(mcelog.next);
+	next = mcelog.next;
 
 	/* Only supports full reads right now */
 	err = -EINVAL;
@@ -281,8 +266,6 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
 		next = cmpxchg(&mcelog.next, prev, 0);
 	} while (next != prev);
 
-	synchronize_sched();
-
 	/*
 	 * Collect entries that were still getting written before the
 	 * synchronize.

^ permalink raw reply related	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-10-10 20:13 Andi Kleen
  0 siblings, 0 replies; 13+ messages in thread
From: Andi Kleen @ 2017-10-10 20:13 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Borislav Petkov, Jeremy Cline, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, x86, linux-edac, linux-kernel, Laura Abbott

"Luck, Tony" <tony.luck@intel.com> writes:

>>  	for (;;) {
>>  		entry = mce_log_get_idx_check(mcelog.next);
>
> Can't this get even simpler? Do we need the loop? The mutex
> will now protect us while we check to see if there is a slot
> to stash this new entry. Also just say:

IMHO the warning is just bogus. There's nothing here that actually
uses RCU. I would just remove it.

>>  			if (entry >= MCE_LOG_LEN) {
>> -				set_bit(MCE_OVERFLOW,
>> -					(unsigned long *)&mcelog.flags);
>> +				set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
>
> Need to mutex_unlock(&mce_chrdev_read_mutex); here.

And yes that too.

-Andi
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-10-10 20:08 Luck, Tony
  0 siblings, 0 replies; 13+ messages in thread
From: Luck, Tony @ 2017-10-10 20:08 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Jeremy Cline, Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86,
	linux-edac, linux-kernel, Laura Abbott

>  	for (;;) {
>  		entry = mce_log_get_idx_check(mcelog.next);

Can't this get even simpler? Do we need the loop? The mutex
will now protect us while we check to see if there is a slot
to stash this new entry. Also just say:

		entry = mcelog.next;

>  		for (;;) {
> @@ -66,10 +67,10 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
>  			 * interesting ones:
>  			 */
>  			if (entry >= MCE_LOG_LEN) {
> -				set_bit(MCE_OVERFLOW,
> -					(unsigned long *)&mcelog.flags);
> +				set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);

Need to mutex_unlock(&mce_chrdev_read_mutex); here.

>  				return NOTIFY_OK;
>  			}
> +
>  			/* Old left over entry. Skip: */
>  			if (mcelog.entry[entry].finished) {
>  				entry++;
> @@ -77,15 +78,13 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
>  			}
>  			break;
>  		}
> -		smp_rmb();
> -		next = entry + 1;
> -		if (cmpxchg(&mcelog.next, entry, next) == entry)
> -			break;

Ummm. Without this "break" how will we exit the loop (more fuel
for getting rid of the loop.

> +		mcelog.next = entry + 1;
>  	}

-Tony
---
To unsubscribe from this list: send the line "unsubscribe linux-edac" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread
* x86/mce: suspicious RCU usage in 4.13.4
@ 2017-10-10 19:44 Borislav Petkov
  0 siblings, 0 replies; 13+ messages in thread
From: Borislav Petkov @ 2017-10-10 19:44 UTC (permalink / raw)
  To: Jeremy Cline, Tony Luck
  Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, linux-edac,
	linux-kernel, Laura Abbott

On Tue, Oct 10, 2017 at 03:00:09PM -0400, Jeremy Cline wrote:
> Hello,
> 
> A Fedora user has reported an issue about suspicious RCU usage in
> dev-mcelog. It looks like perhaps the notifier call chain is not
> acquiring the mce_chrdev_read_mutex? The traceback is
> 
> [36915.633804] =============================
> [36915.633805] WARNING: suspicious RCU usage
> [36915.633808] 4.13.4-301.fc27.x86_64+debug #1 Not tainted
> [36915.633809] -----------------------------
> [36915.633811] arch/x86/kernel/cpu/mcheck/dev-mcelog.c:60 suspicious
> mce_log_get_idx_check() usage!
> [36915.633812]
>                other info that might help us debug this:
> 
> [36915.633813]
>                rcu_scheduler_active = 2, debug_locks = 1
> [36915.633815] 3 locks held by kworker/1:2/14637:
> [36915.633816]  #0:  ("events"){.+.+.+}, at: [<ffffffffaa0d2ac0>]
> process_one_work+0x1d0/0x6a0
> [36915.633827]  #1:  ((&mce_work)){+.+...}, at: [<ffffffffaa0d2ac0>]
> process_one_work+0x1d0/0x6a0
> [36915.633833]  #2:  ((x86_mce_decoder_chain).rwsem){++++..}, at:
> [<ffffffffaa0dc92f>] blocking_notifier_call_chain+0x2f/0x70
> [36915.633840]
>                stack backtrace:
> [36915.633843] CPU: 1 PID: 14637 Comm: kworker/1:2 Not tainted
> 4.13.4-301.fc27.x86_64+debug #1
> [36915.633844] Hardware name: Gigabyte Technology Co., Ltd.
> Z87M-D3H/Z87M-D3H, BIOS F11 08/12/2014
> [36915.633847] Workqueue: events mce_gen_pool_process
> [36915.633849] Call Trace:
> [36915.633854]  dump_stack+0x8e/0xd6
> [36915.633858]  lockdep_rcu_suspicious+0xc5/0x100
> [36915.633862]  dev_mce_log+0xf6/0x1e0
> [36915.633865]  notifier_call_chain+0x39/0x90
> [36915.633869]  blocking_notifier_call_chain+0x49/0x70
> [36915.633873]  mce_gen_pool_process+0x41/0x70

Right, so dev_mce_log() is called in process context now and thus can be
greatly simplified by removing all those memory barriers and cmpxchg()
fun which was for atomic context back then. And simply grab the mutex
instead.

IOW, something like this totally untested hunk. Tony?

diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 10cec43aac38..1dacebb6a23b 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -53,9 +53,10 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 				void *data)
 {
 	struct mce *mce = (struct mce *)data;
-	unsigned int next, entry;
+	unsigned int entry;
+
+	mutex_lock(&mce_chrdev_read_mutex);
 
-	wmb();
 	for (;;) {
 		entry = mce_log_get_idx_check(mcelog.next);
 		for (;;) {
@@ -66,10 +67,10 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 			 * interesting ones:
 			 */
 			if (entry >= MCE_LOG_LEN) {
-				set_bit(MCE_OVERFLOW,
-					(unsigned long *)&mcelog.flags);
+				set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
 				return NOTIFY_OK;
 			}
+
 			/* Old left over entry. Skip: */
 			if (mcelog.entry[entry].finished) {
 				entry++;
@@ -77,15 +78,13 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 			}
 			break;
 		}
-		smp_rmb();
-		next = entry + 1;
-		if (cmpxchg(&mcelog.next, entry, next) == entry)
-			break;
+		mcelog.next = entry + 1;
 	}
+
 	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
-	wmb();
 	mcelog.entry[entry].finished = 1;
-	wmb();
+
+	mutex_unlock(&mce_chrdev_read_mutex);
 
 	/* wake processes polling /dev/mcelog */
 	wake_up_interruptible(&mce_chrdev_wait);

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-11-01 20:07 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-01 16:47 x86/mce: suspicious RCU usage in 4.13.4 Borislav Petkov
  -- strict thread matches above, loose matches on Subject: below --
2017-11-01 20:07 Luck, Tony
2017-11-01 14:56 Laura Abbott
2017-10-16 18:28 Luck, Tony
2017-10-15  9:40 Borislav Petkov
2017-10-12 22:13 Andi Kleen
2017-10-12  9:02 Borislav Petkov
2017-10-11 23:11 Andi Kleen
2017-10-11 21:34 Luck, Tony
2017-10-11 11:50 Borislav Petkov
2017-10-10 20:13 Andi Kleen
2017-10-10 20:08 Luck, Tony
2017-10-10 19:44 Borislav Petkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).