* [PATCH] mce: fix warning messages about static struct mce_device
@ 2012-01-16 22:40 Greg KH
2012-01-17 0:14 ` Djalal Harouni
` (2 more replies)
0 siblings, 3 replies; 27+ messages in thread
From: Greg KH @ 2012-01-16 22:40 UTC (permalink / raw)
To: Linus Torvalds
Cc: Rafael J. Wysocki, Srivatsa S. Bhat, Sergei Trofimovich,
linux-kernel, Kay Sievers, Linux PM mailing list, Tony Luck,
mingo, Borislav Petkov, tglx, prasad, Ming Lei, Djalal Harouni,
Borislav Petkov, Hidetoshi Seto, Andi Kleen, gouders,
Marcos Souza, justinmattock, Jeff Chua
From: Greg Kroah-Hartman <gregkh@suse.de>
When suspending, there was a large list of warnings going something like:
Device 'machinecheck1' does not have a release() function, it is broken and must be fixed
This patch turns the static mce_devices into dynamically allocated, and
properly frees them when they are removed from the system. It solves
the warning messages on my laptop here.
Reported-by: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Borislav Petkov <bp@amd64.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
Linus, this should fix the warnings on your laptop, it does on mine.
include/asm/mce.h | 2 +-
kernel/cpu/mcheck/mce.c | 18 ++++++++++++++----
kernel/cpu/mcheck/mce_amd.c | 18 +++++++++++-------
3 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index f35ce43..6aefb14 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct device, mce_device);
+extern struct device *mce_device[CONFIG_NR_CPUS];
/*
* Maximum banks number.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 29ba329..5a11ae2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1859,7 +1859,7 @@ static struct bus_type mce_subsys = {
.dev_name = "machinecheck",
};
-DEFINE_PER_CPU(struct device, mce_device);
+struct device *mce_device[CONFIG_NR_CPUS];
__cpuinitdata
void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
@@ -2001,19 +2001,27 @@ static struct device_attribute *mce_device_attrs[] = {
static cpumask_var_t mce_device_initialized;
+static void mce_device_release(struct device *dev)
+{
+ kfree(dev);
+}
+
/* Per cpu device init. All of the cpus still share the same ctrl bank: */
static __cpuinit int mce_device_create(unsigned int cpu)
{
- struct device *dev = &per_cpu(mce_device, cpu);
+ struct device *dev;
int err;
int i, j;
if (!mce_available(&boot_cpu_data))
return -EIO;
- memset(dev, 0, sizeof(struct device));
+ dev = kzalloc(sizeof *dev, GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
dev->id = cpu;
dev->bus = &mce_subsys;
+ dev->release = &mce_device_release;
err = device_register(dev);
if (err)
@@ -2030,6 +2038,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
goto error2;
}
cpumask_set_cpu(cpu, mce_device_initialized);
+ mce_device[cpu] = dev;
return 0;
error2:
@@ -2046,7 +2055,7 @@ error:
static __cpuinit void mce_device_remove(unsigned int cpu)
{
- struct device *dev = &per_cpu(mce_device, cpu);
+ struct device *dev = mce_device[cpu];
int i;
if (!cpumask_test_cpu(cpu, mce_device_initialized))
@@ -2060,6 +2069,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
device_unregister(dev);
cpumask_clear_cpu(cpu, mce_device_initialized);
+ mce_device[cpu] = NULL;
}
/* Make sure there are no machine checks on offlined CPUs. */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ba0b94a..786e76a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -523,6 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
{
int i, err = 0;
struct threshold_bank *b = NULL;
+ struct device *dev = mce_device[cpu];
char name[32];
sprintf(name, "threshold_bank%i", bank);
@@ -543,8 +544,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (!b)
goto out;
- err = sysfs_create_link(&per_cpu(mce_device, cpu).kobj,
- b->kobj, name);
+ err = sysfs_create_link(&dev->kobj, b->kobj, name);
if (err)
goto out;
@@ -565,7 +565,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
goto out;
}
- b->kobj = kobject_create_and_add(name, &per_cpu(mce_device, cpu).kobj);
+ b->kobj = kobject_create_and_add(name, &dev->kobj);
if (!b->kobj)
goto out_free;
@@ -585,8 +585,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (i == cpu)
continue;
- err = sysfs_create_link(&per_cpu(mce_device, i).kobj,
- b->kobj, name);
+ dev = mce_device[i];
+ if (dev)
+ err = sysfs_create_link(&dev->kobj,b->kobj, name);
if (err)
goto out;
@@ -649,6 +650,7 @@ static void deallocate_threshold_block(unsigned int cpu,
static void threshold_remove_bank(unsigned int cpu, int bank)
{
struct threshold_bank *b;
+ struct device *dev;
char name[32];
int i = 0;
@@ -663,7 +665,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
#ifdef CONFIG_SMP
/* sibling symlink */
if (shared_bank[bank] && b->blocks->cpu != cpu) {
- sysfs_remove_link(&per_cpu(mce_device, cpu).kobj, name);
+ sysfs_remove_link(&mce_device[cpu]->kobj, name);
per_cpu(threshold_banks, cpu)[bank] = NULL;
return;
@@ -675,7 +677,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
if (i == cpu)
continue;
- sysfs_remove_link(&per_cpu(mce_device, i).kobj, name);
+ dev = mce_device[i];
+ if (dev)
+ sysfs_remove_link(&dev->kobj, name);
per_cpu(threshold_banks, i)[bank] = NULL;
}
^ permalink raw reply related [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-16 22:40 [PATCH] mce: fix warning messages about static struct mce_device Greg KH
@ 2012-01-17 0:14 ` Djalal Harouni
2012-01-17 0:15 ` Greg KH
2012-01-17 8:38 ` Ingo Molnar
2012-01-17 12:36 ` [PATCH] mce: fix warning messages about static struct mce_device Srivatsa S. Bhat
2 siblings, 1 reply; 27+ messages in thread
From: Djalal Harouni @ 2012-01-17 0:14 UTC (permalink / raw)
To: Greg KH
Cc: Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, mingo, Borislav Petkov, tglx,
prasad, Ming Lei, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
On Mon, Jan 16, 2012 at 02:40:28PM -0800, Greg KH wrote:
> From: Greg Kroah-Hartman <gregkh@suse.de>
>
> When suspending, there was a large list of warnings going something like:
Not only suspend, during poweroff we'll also see these annoying warnings
as reported here: http://lkml.org/lkml/2012/1/10/504
> Device 'machinecheck1' does not have a release() function, it is broken and must be fixed
>
> This patch turns the static mce_devices into dynamically allocated, and
> properly frees them when they are removed from the system. It solves
> the warning messages on my laptop here.
>
> Reported-by: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
> Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
> Cc: Kay Sievers <kay.sievers@vrfy.org>
> Cc: Tony Luck <tony.luck@intel.com>
> Cc: Borislav Petkov <bp@amd64.org>
> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
>
> ---
>
> Linus, this should fix the warnings on your laptop, it does on mine.
Patch tested.
Thanks, this also fixed the warnings for my KVM/Qemu guests.
--
tixxdz
http://opendz.org
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-17 0:14 ` Djalal Harouni
@ 2012-01-17 0:15 ` Greg KH
2012-01-17 0:21 ` Linus Torvalds
0 siblings, 1 reply; 27+ messages in thread
From: Greg KH @ 2012-01-17 0:15 UTC (permalink / raw)
To: Djalal Harouni
Cc: Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, mingo, Borislav Petkov, tglx,
prasad, Ming Lei, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
On Tue, Jan 17, 2012 at 01:14:03AM +0100, Djalal Harouni wrote:
> On Mon, Jan 16, 2012 at 02:40:28PM -0800, Greg KH wrote:
> > From: Greg Kroah-Hartman <gregkh@suse.de>
> >
> > When suspending, there was a large list of warnings going something like:
> Not only suspend, during poweroff we'll also see these annoying warnings
> as reported here: http://lkml.org/lkml/2012/1/10/504
Ah, yes, I forgot about that case.
> > Device 'machinecheck1' does not have a release() function, it is broken and must be fixed
> >
> > This patch turns the static mce_devices into dynamically allocated, and
> > properly frees them when they are removed from the system. It solves
> > the warning messages on my laptop here.
> >
> > Reported-by: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
> > Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
> > Cc: Kay Sievers <kay.sievers@vrfy.org>
> > Cc: Tony Luck <tony.luck@intel.com>
> > Cc: Borislav Petkov <bp@amd64.org>
> > Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
> >
> > ---
> >
> > Linus, this should fix the warnings on your laptop, it does on mine.
> Patch tested.
>
> Thanks, this also fixed the warnings for my KVM/Qemu guests.
Wonderful, thanks for testing.
Linus, do you want to take this directly from the email, or do you want
me to put it in a tree to pull from?
thanks,
greg k-h
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-17 0:15 ` Greg KH
@ 2012-01-17 0:21 ` Linus Torvalds
2012-01-17 1:00 ` Greg KH
0 siblings, 1 reply; 27+ messages in thread
From: Linus Torvalds @ 2012-01-17 0:21 UTC (permalink / raw)
To: Greg KH
Cc: Djalal Harouni, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, mingo, Borislav Petkov, tglx,
prasad, Ming Lei, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
On Mon, Jan 16, 2012 at 4:15 PM, Greg KH <gregkh@suse.de> wrote:
>
> Linus, do you want to take this directly from the email, or do you want
> me to put it in a tree to pull from?
Do you have anything else pending before I close the merge window? If
so, put it in a tree with those.
If not, let me know, and I'll just take the patch as-is.
Linus
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-17 0:21 ` Linus Torvalds
@ 2012-01-17 1:00 ` Greg KH
0 siblings, 0 replies; 27+ messages in thread
From: Greg KH @ 2012-01-17 1:00 UTC (permalink / raw)
To: Linus Torvalds
Cc: Djalal Harouni, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, mingo, Borislav Petkov, tglx,
prasad, Ming Lei, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
On Mon, Jan 16, 2012 at 04:21:20PM -0800, Linus Torvalds wrote:
> On Mon, Jan 16, 2012 at 4:15 PM, Greg KH <gregkh@suse.de> wrote:
> >
> > Linus, do you want to take this directly from the email, or do you want
> > me to put it in a tree to pull from?
>
> Do you have anything else pending before I close the merge window? If
> so, put it in a tree with those.
>
> If not, let me know, and I'll just take the patch as-is.
Nope, nothing else from me for the big 3.3-rc1 merge, so please take it
as-is.
thanks,
greg k-h
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-16 22:40 [PATCH] mce: fix warning messages about static struct mce_device Greg KH
2012-01-17 0:14 ` Djalal Harouni
@ 2012-01-17 8:38 ` Ingo Molnar
2012-01-17 15:51 ` Greg KH
2012-01-17 12:36 ` [PATCH] mce: fix warning messages about static struct mce_device Srivatsa S. Bhat
2 siblings, 1 reply; 27+ messages in thread
From: Ingo Molnar @ 2012-01-17 8:38 UTC (permalink / raw)
To: Greg KH
Cc: Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, Borislav Petkov, tglx, prasad,
Ming Lei, Djalal Harouni, Borislav Petkov, Hidetoshi Seto,
Andi Kleen, gouders, Marcos Souza, justinmattock, Jeff Chua
* Greg KH <gregkh@suse.de> wrote:
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index f35ce43..6aefb14 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
>
> void mce_setup(struct mce *m);
> void mce_log(struct mce *m);
> -DECLARE_PER_CPU(struct device, mce_device);
> +extern struct device *mce_device[CONFIG_NR_CPUS];
Minor nit, i don't think we have any other such [CONFIG_NR_CPUS]
pattern in the kernel.
This should be something like:
DECLARE_PER_CPU(struct device *, mce_device);
Or the pointer should be attached to the CPU info structure.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-16 22:40 [PATCH] mce: fix warning messages about static struct mce_device Greg KH
2012-01-17 0:14 ` Djalal Harouni
2012-01-17 8:38 ` Ingo Molnar
@ 2012-01-17 12:36 ` Srivatsa S. Bhat
2012-01-17 15:52 ` Greg KH
2 siblings, 1 reply; 27+ messages in thread
From: Srivatsa S. Bhat @ 2012-01-17 12:36 UTC (permalink / raw)
To: Greg KH
Cc: Linus Torvalds, Rafael J. Wysocki, Sergei Trofimovich,
linux-kernel, Kay Sievers, Linux PM mailing list, Tony Luck,
mingo, Borislav Petkov, tglx, prasad, Ming Lei, Djalal Harouni,
Borislav Petkov, Hidetoshi Seto, Andi Kleen, gouders,
Marcos Souza, justinmattock, Jeff Chua, neilb
On 01/17/2012 04:10 AM, Greg KH wrote:
> From: Greg Kroah-Hartman <gregkh@suse.de>
>
> When suspending, there was a large list of warnings going something like:
>
> Device 'machinecheck1' does not have a release() function, it is broken and must be fixed
>
> This patch turns the static mce_devices into dynamically allocated, and
> properly frees them when they are removed from the system. It solves
> the warning messages on my laptop here.
>
...
> /* Per cpu device init. All of the cpus still share the same ctrl bank: */
> static __cpuinit int mce_device_create(unsigned int cpu)
> {
> - struct device *dev = &per_cpu(mce_device, cpu);
> + struct device *dev;
> int err;
> int i, j;
>
> if (!mce_available(&boot_cpu_data))
> return -EIO;
>
> - memset(dev, 0, sizeof(struct device));
> + dev = kzalloc(sizeof *dev, GFP_KERNEL);
> + if (!dev)
> + return -ENOMEM;
> dev->id = cpu;
> dev->bus = &mce_subsys;
> + dev->release = &mce_device_release;
>
> err = device_register(dev);
> if (err)
> @@ -2030,6 +2038,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
> goto error2;
> }
> cpumask_set_cpu(cpu, mce_device_initialized);
> + mce_device[cpu] = dev;
>
> return 0;
> error2:
> @@ -2046,7 +2055,7 @@ error:
>
> /* Make sure there are no machine checks on offlined CPUs. */
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index ba0b94a..786e76a 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -523,6 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
> {
> int i, err = 0;
> struct threshold_bank *b = NULL;
> + struct device *dev = mce_device[cpu];
> char name[32];
>
> sprintf(name, "threshold_bank%i", bank);
> @@ -543,8 +544,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
> if (!b)
> goto out;
>
> - err = sysfs_create_link(&per_cpu(mce_device, cpu).kobj,
> - b->kobj, name);
> + err = sysfs_create_link(&dev->kobj, b->kobj, name);
I don't think dereferencing 'dev' like this is safe when booting up.
(See below for another such instance of dereferencing dev.)
Both mcheck_init_device() and threshold_init_device() are device_initcalls.
And the latter depends on the former, because the former dynamically
allocates and fills the 'mce_device' array of pointers.
So, what guarantees that this ordering is preserved? IOW, what ensures that
mcheck_init_device() is completed before running threshold_init_device()?
Or am I missing something?
> if (err)
> goto out;
>
> @@ -565,7 +565,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
> goto out;
> }
>
> - b->kobj = kobject_create_and_add(name, &per_cpu(mce_device, cpu).kobj);
> + b->kobj = kobject_create_and_add(name, &dev->kobj);
Same here.
> if (!b->kobj)
> goto out_free;
>
> @@ -585,8 +585,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
> if (i == cpu)
> continue;
>
> - err = sysfs_create_link(&per_cpu(mce_device, i).kobj,
> - b->kobj, name);
> + dev = mce_device[i];
> + if (dev)
> + err = sysfs_create_link(&dev->kobj,b->kobj, name);
> if (err)
> goto out;
>
Regards,
Srivatsa S. Bhat
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-17 8:38 ` Ingo Molnar
@ 2012-01-17 15:51 ` Greg KH
2012-01-17 16:28 ` Greg KH
2012-01-18 9:31 ` Ingo Molnar
0 siblings, 2 replies; 27+ messages in thread
From: Greg KH @ 2012-01-17 15:51 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, Borislav Petkov, tglx, prasad,
Ming Lei, Djalal Harouni, Borislav Petkov, Hidetoshi Seto,
Andi Kleen, gouders, Marcos Souza, justinmattock, Jeff Chua
On Tue, Jan 17, 2012 at 09:38:43AM +0100, Ingo Molnar wrote:
>
> * Greg KH <gregkh@suse.de> wrote:
>
> > diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> > index f35ce43..6aefb14 100644
> > --- a/arch/x86/include/asm/mce.h
> > +++ b/arch/x86/include/asm/mce.h
> > @@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
> >
> > void mce_setup(struct mce *m);
> > void mce_log(struct mce *m);
> > -DECLARE_PER_CPU(struct device, mce_device);
> > +extern struct device *mce_device[CONFIG_NR_CPUS];
>
> Minor nit, i don't think we have any other such [CONFIG_NR_CPUS]
> pattern in the kernel.
>
> This should be something like:
>
> DECLARE_PER_CPU(struct device *, mce_device);
That is what we used to have, but with just a static struct device. We
really don't need this to be in the per-cpu area, a flat array should be
just fine, why can't we use the CONFIG_NR_CPUS value? Should we use
something else?
> Or the pointer should be attached to the CPU info structure.
Ok, I have no objection to that, do you want me to make a patch doing
that, now that this is already in Linus's tree?
thanks,
greg k-h
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-17 12:36 ` [PATCH] mce: fix warning messages about static struct mce_device Srivatsa S. Bhat
@ 2012-01-17 15:52 ` Greg KH
0 siblings, 0 replies; 27+ messages in thread
From: Greg KH @ 2012-01-17 15:52 UTC (permalink / raw)
To: Srivatsa S. Bhat
Cc: Linus Torvalds, Rafael J. Wysocki, Sergei Trofimovich,
linux-kernel, Kay Sievers, Linux PM mailing list, Tony Luck,
mingo, Borislav Petkov, tglx, prasad, Ming Lei, Djalal Harouni,
Borislav Petkov, Hidetoshi Seto, Andi Kleen, gouders,
Marcos Souza, justinmattock, Jeff Chua, neilb
On Tue, Jan 17, 2012 at 06:06:35PM +0530, Srivatsa S. Bhat wrote:
> On 01/17/2012 04:10 AM, Greg KH wrote:
>
> > From: Greg Kroah-Hartman <gregkh@suse.de>
> >
> > When suspending, there was a large list of warnings going something like:
> >
> > Device 'machinecheck1' does not have a release() function, it is broken and must be fixed
> >
> > This patch turns the static mce_devices into dynamically allocated, and
> > properly frees them when they are removed from the system. It solves
> > the warning messages on my laptop here.
> >
>
> ...
>
> > /* Per cpu device init. All of the cpus still share the same ctrl bank: */
> > static __cpuinit int mce_device_create(unsigned int cpu)
> > {
> > - struct device *dev = &per_cpu(mce_device, cpu);
> > + struct device *dev;
> > int err;
> > int i, j;
> >
> > if (!mce_available(&boot_cpu_data))
> > return -EIO;
> >
> > - memset(dev, 0, sizeof(struct device));
> > + dev = kzalloc(sizeof *dev, GFP_KERNEL);
> > + if (!dev)
> > + return -ENOMEM;
> > dev->id = cpu;
> > dev->bus = &mce_subsys;
> > + dev->release = &mce_device_release;
> >
> > err = device_register(dev);
> > if (err)
> > @@ -2030,6 +2038,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
> > goto error2;
> > }
> > cpumask_set_cpu(cpu, mce_device_initialized);
> > + mce_device[cpu] = dev;
> >
> > return 0;
> > error2:
> > @@ -2046,7 +2055,7 @@ error:
> >
> > /* Make sure there are no machine checks on offlined CPUs. */
> > diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > index ba0b94a..786e76a 100644
> > --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > @@ -523,6 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
> > {
> > int i, err = 0;
> > struct threshold_bank *b = NULL;
> > + struct device *dev = mce_device[cpu];
> > char name[32];
> >
> > sprintf(name, "threshold_bank%i", bank);
> > @@ -543,8 +544,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
> > if (!b)
> > goto out;
> >
> > - err = sysfs_create_link(&per_cpu(mce_device, cpu).kobj,
> > - b->kobj, name);
> > + err = sysfs_create_link(&dev->kobj, b->kobj, name);
>
>
> I don't think dereferencing 'dev' like this is safe when booting up.
> (See below for another such instance of dereferencing dev.)
>
> Both mcheck_init_device() and threshold_init_device() are device_initcalls.
> And the latter depends on the former, because the former dynamically
> allocates and fills the 'mce_device' array of pointers.
>
> So, what guarantees that this ordering is preserved? IOW, what ensures that
> mcheck_init_device() is completed before running threshold_init_device()?
>
> Or am I missing something?
You must be, as this is the way it is always works, and somehow your
machine boots properly, with the symlinks setup :)
thanks,
greg k-h
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-17 15:51 ` Greg KH
@ 2012-01-17 16:28 ` Greg KH
2012-01-18 9:31 ` Ingo Molnar
1 sibling, 0 replies; 27+ messages in thread
From: Greg KH @ 2012-01-17 16:28 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, Borislav Petkov, tglx, prasad,
Ming Lei, Djalal Harouni, Borislav Petkov, Hidetoshi Seto,
Andi Kleen, gouders, Marcos Souza, justinmattock, Jeff Chua
On Tue, Jan 17, 2012 at 07:51:25AM -0800, Greg KH wrote:
> On Tue, Jan 17, 2012 at 09:38:43AM +0100, Ingo Molnar wrote:
> >
> > * Greg KH <gregkh@suse.de> wrote:
> >
> > > diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> > > index f35ce43..6aefb14 100644
> > > --- a/arch/x86/include/asm/mce.h
> > > +++ b/arch/x86/include/asm/mce.h
> > > @@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
> > >
> > > void mce_setup(struct mce *m);
> > > void mce_log(struct mce *m);
> > > -DECLARE_PER_CPU(struct device, mce_device);
> > > +extern struct device *mce_device[CONFIG_NR_CPUS];
> >
> > Minor nit, i don't think we have any other such [CONFIG_NR_CPUS]
> > pattern in the kernel.
> >
> > This should be something like:
> >
> > DECLARE_PER_CPU(struct device *, mce_device);
>
> That is what we used to have, but with just a static struct device. We
> really don't need this to be in the per-cpu area, a flat array should be
> just fine, why can't we use the CONFIG_NR_CPUS value? Should we use
> something else?
>
> > Or the pointer should be attached to the CPU info structure.
>
> Ok, I have no objection to that, do you want me to make a patch doing
> that, now that this is already in Linus's tree?
Wait, isn't that variable also used in head.S, so do you really want a
struct device * in there?
thanks,
greg k-h
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-17 15:51 ` Greg KH
2012-01-17 16:28 ` Greg KH
@ 2012-01-18 9:31 ` Ingo Molnar
2012-01-18 14:42 ` Greg KH
1 sibling, 1 reply; 27+ messages in thread
From: Ingo Molnar @ 2012-01-18 9:31 UTC (permalink / raw)
To: Greg KH
Cc: Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, Borislav Petkov, tglx, prasad,
Ming Lei, Djalal Harouni, Borislav Petkov, Hidetoshi Seto,
Andi Kleen, gouders, Marcos Souza, justinmattock, Jeff Chua
* Greg KH <gregkh@suse.de> wrote:
> On Tue, Jan 17, 2012 at 09:38:43AM +0100, Ingo Molnar wrote:
> >
> > * Greg KH <gregkh@suse.de> wrote:
> >
> > > diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> > > index f35ce43..6aefb14 100644
> > > --- a/arch/x86/include/asm/mce.h
> > > +++ b/arch/x86/include/asm/mce.h
> > > @@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
> > >
> > > void mce_setup(struct mce *m);
> > > void mce_log(struct mce *m);
> > > -DECLARE_PER_CPU(struct device, mce_device);
> > > +extern struct device *mce_device[CONFIG_NR_CPUS];
> >
> > Minor nit, i don't think we have any other such [CONFIG_NR_CPUS]
> > pattern in the kernel.
> >
> > This should be something like:
> >
> > DECLARE_PER_CPU(struct device *, mce_device);
>
> That is what we used to have, but with just a static struct
> device. [...]
Which was fine in itself for a per CPU data structure - wouldnt
the warning be fixed by memset()-ing before registering the
device or such, if device registry absolutely needs a pre-zeroed
buffer?
I still think there must be some bug/assumption lurking in the
device layer - do you require all device allocations to be one
via zalloc()? Seems like a weird and unrobust requirement.
I don't object to the quick fix that gets rid of the warnings,
but that quick fix came at the price of leaving the real bug
unfixed and at the price of introducing a new ugliness ;-)
> [...] We really don't need this to be in the per-cpu area, a
> flat array should be just fine, why can't we use the
> CONFIG_NR_CPUS value? Should we use something else?
By that argument we don't really need PER_CPU() areas to begin
with, a flat [CONFIG_NR_CPUS] array is just fine, right?
Amongst other things we use PER_CPU to have an array of just 2
elements on a dual core system, even if it boots a
CONFIG_NR_CPUS=512 distro kernel. That saves RAM, and with
higher CONFIG_NR_CPUS values it adds up quickly.
> > Or the pointer should be attached to the CPU info structure.
>
> Ok, I have no objection to that, do you want me to make a
> patch doing that, now that this is already in Linus's tree?
Would be nice if you could do that or some other equivalent
solution, i'd really not like to see the [CONFIG_NR_CPUS]
pattern to spread in the kernel, we spent a lot of time getting
rid of such uses ;-)
Thanks,
Ingo
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-18 9:31 ` Ingo Molnar
@ 2012-01-18 14:42 ` Greg KH
2012-01-18 15:51 ` Alan Stern
2012-01-19 12:28 ` Ingo Molnar
0 siblings, 2 replies; 27+ messages in thread
From: Greg KH @ 2012-01-18 14:42 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, Borislav Petkov, tglx, prasad,
Ming Lei, Djalal Harouni, Borislav Petkov, Hidetoshi Seto,
Andi Kleen, gouders, Marcos Souza, justinmattock, Jeff Chua
On Wed, Jan 18, 2012 at 10:31:38AM +0100, Ingo Molnar wrote:
>
> * Greg KH <gregkh@suse.de> wrote:
>
> > On Tue, Jan 17, 2012 at 09:38:43AM +0100, Ingo Molnar wrote:
> > >
> > > * Greg KH <gregkh@suse.de> wrote:
> > >
> > > > diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> > > > index f35ce43..6aefb14 100644
> > > > --- a/arch/x86/include/asm/mce.h
> > > > +++ b/arch/x86/include/asm/mce.h
> > > > @@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
> > > >
> > > > void mce_setup(struct mce *m);
> > > > void mce_log(struct mce *m);
> > > > -DECLARE_PER_CPU(struct device, mce_device);
> > > > +extern struct device *mce_device[CONFIG_NR_CPUS];
> > >
> > > Minor nit, i don't think we have any other such [CONFIG_NR_CPUS]
> > > pattern in the kernel.
> > >
> > > This should be something like:
> > >
> > > DECLARE_PER_CPU(struct device *, mce_device);
> >
> > That is what we used to have, but with just a static struct
> > device. [...]
>
> Which was fine in itself for a per CPU data structure - wouldnt
> the warning be fixed by memset()-ing before registering the
> device or such, if device registry absolutely needs a pre-zeroed
> buffer?
It was already fixed that way, but the problem is that you can not have
statically allocated 'struct device' objects in the system. That's what
my add-on patch fixed, also resolving the syslog messages saying there
was no release function for the device as well.
> I still think there must be some bug/assumption lurking in the
> device layer - do you require all device allocations to be one
> via zalloc()? Seems like a weird and unrobust requirement.
Yes, that's always been the requirement.
> I don't object to the quick fix that gets rid of the warnings,
> but that quick fix came at the price of leaving the real bug
> unfixed and at the price of introducing a new ugliness ;-)
Nope, all of the bugs are now fixed :)
> > [...] We really don't need this to be in the per-cpu area, a
> > flat array should be just fine, why can't we use the
> > CONFIG_NR_CPUS value? Should we use something else?
>
> By that argument we don't really need PER_CPU() areas to begin
> with, a flat [CONFIG_NR_CPUS] array is just fine, right?
I never said that, only for this type of variable.
> Amongst other things we use PER_CPU to have an array of just 2
> elements on a dual core system, even if it boots a
> CONFIG_NR_CPUS=512 distro kernel. That saves RAM, and with
> higher CONFIG_NR_CPUS values it adds up quickly.
>
> > > Or the pointer should be attached to the CPU info structure.
> >
> > Ok, I have no objection to that, do you want me to make a
> > patch doing that, now that this is already in Linus's tree?
>
> Would be nice if you could do that or some other equivalent
> solution, i'd really not like to see the [CONFIG_NR_CPUS]
> pattern to spread in the kernel, we spent a lot of time getting
> rid of such uses ;-)
Ok, I'll work on resolving this.
thanks,
greg k-h
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-18 14:42 ` Greg KH
@ 2012-01-18 15:51 ` Alan Stern
2012-01-18 17:28 ` Luck, Tony
2012-01-19 12:28 ` Ingo Molnar
1 sibling, 1 reply; 27+ messages in thread
From: Alan Stern @ 2012-01-18 15:51 UTC (permalink / raw)
To: Greg KH
Cc: Ingo Molnar, Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, Borislav Petkov, tglx, prasad,
Ming Lei, Djalal Harouni, Borislav Petkov, Hidetoshi Seto,
Andi Kleen, gouders, Marcos Souza, justinmattock, Jeff Chua
On Wed, 18 Jan 2012, Greg KH wrote:
> > > > Minor nit, i don't think we have any other such [CONFIG_NR_CPUS]
> > > > pattern in the kernel.
> > > >
> > > > This should be something like:
> > > >
> > > > DECLARE_PER_CPU(struct device *, mce_device);
> > >
> > > That is what we used to have, but with just a static struct
> > > device. [...]
Ingo's suggestion is fine. The difference is that instead of a static
array of struct device's, this is a static pre-cpu pointer to a dynamic
struct device (effectively, an array of pointers rather than an array
of structures).
> > Which was fine in itself for a per CPU data structure - wouldnt
> > the warning be fixed by memset()-ing before registering the
> > device or such, if device registry absolutely needs a pre-zeroed
> > buffer?
>
> It was already fixed that way, but the problem is that you can not have
> statically allocated 'struct device' objects in the system. That's what
> my add-on patch fixed, also resolving the syslog messages saying there
> was no release function for the device as well.
>
> > I still think there must be some bug/assumption lurking in the
> > device layer - do you require all device allocations to be one
> > via zalloc()? Seems like a weird and unrobust requirement.
>
> Yes, that's always been the requirement.
There's an additional requirement: Device structures may not be reused.
Not even if the caller clears all the fields to 0 in between. That was
the real bug in the original code -- and adding a dummy release routine
wouldn't fix it.
> > Amongst other things we use PER_CPU to have an array of just 2
> > elements on a dual core system, even if it boots a
> > CONFIG_NR_CPUS=512 distro kernel. That saves RAM, and with
> > higher CONFIG_NR_CPUS values it adds up quickly.
> >
> > > > Or the pointer should be attached to the CPU info structure.
> > >
> > > Ok, I have no objection to that, do you want me to make a
> > > patch doing that, now that this is already in Linus's tree?
> >
> > Would be nice if you could do that or some other equivalent
> > solution, i'd really not like to see the [CONFIG_NR_CPUS]
> > pattern to spread in the kernel, we spent a lot of time getting
> > rid of such uses ;-)
>
> Ok, I'll work on resolving this.
A static per-cpu pointer to struct device should work perfectly.
Alan Stern
^ permalink raw reply [flat|nested] 27+ messages in thread
* RE: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-18 15:51 ` Alan Stern
@ 2012-01-18 17:28 ` Luck, Tony
2012-01-18 17:54 ` Srivatsa S. Bhat
2012-01-18 18:10 ` Alan Stern
0 siblings, 2 replies; 27+ messages in thread
From: Luck, Tony @ 2012-01-18 17:28 UTC (permalink / raw)
To: Alan Stern, Greg KH
Cc: Ingo Molnar, Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Borislav Petkov, tglx, prasad, Ming Lei,
Djalal Harouni, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
Greg said:
> It was already fixed that way, but the problem is that you can not have
> statically allocated 'struct device' objects in the system.
and then Alan said:
> There's an additional requirement: Device structures may not be reused.
> Not even if the caller clears all the fields to 0 in between. That was
> the real bug in the original code -- and adding a dummy release routine
> wouldn't fix it.
There seems to be some curious logic happening here which I don't understand
at all. How can the code that deals with 'struct device' tell whether it was
statically declared or dynamically allocated? Why would it care?
What happens if we play by the rules using a dynamic structure and do
device_register() + device_create_file(dev)
...
device_remove_file(dev) + device_unregister()
then later come back to re-add this and by pure random fluke kzalloc()
gives us back the exact same block of memory that we used for dev before?
By Alan's logic we are screwed - we are re-using the same device structure
(unless kfree() + kzalloc() does some magic pixie dust thing so that this
same block of memory is now not the same device structure we had before, even
though it has the same address).
In summary - I can totally buy the argument that you must start with a zeroed
struct device (and that it is just fine that device_unregister() doesn't waste
cpu cycles cleaning up the structure just in case someone will re-use it, because
that isn't going to be the common case).
I just don't understand the magical difference between a static structure that
has been memset() to all zero, and a dynamic block returned from kzalloc().
-Tony
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-18 17:28 ` Luck, Tony
@ 2012-01-18 17:54 ` Srivatsa S. Bhat
2012-01-18 18:10 ` Alan Stern
1 sibling, 0 replies; 27+ messages in thread
From: Srivatsa S. Bhat @ 2012-01-18 17:54 UTC (permalink / raw)
To: Luck, Tony
Cc: Alan Stern, Greg KH, Ingo Molnar, Linus Torvalds,
Rafael J. Wysocki, Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Borislav Petkov, tglx, prasad, Ming Lei,
Djalal Harouni, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
On 01/18/2012 10:58 PM, Luck, Tony wrote:
> Greg said:
>> It was already fixed that way, but the problem is that you can not have
>> statically allocated 'struct device' objects in the system.
>
> and then Alan said:
>> There's an additional requirement: Device structures may not be reused.
>> Not even if the caller clears all the fields to 0 in between. That was
>> the real bug in the original code -- and adding a dummy release routine
>> wouldn't fix it.
>
> There seems to be some curious logic happening here which I don't understand
> at all. How can the code that deals with 'struct device' tell whether it was
> statically declared or dynamically allocated? Why would it care?
>
> What happens if we play by the rules using a dynamic structure and do
>
> device_register() + device_create_file(dev)
> ...
> device_remove_file(dev) + device_unregister()
>
> then later come back to re-add this and by pure random fluke kzalloc()
> gives us back the exact same block of memory that we used for dev before?
>
> By Alan's logic we are screwed - we are re-using the same device structure
> (unless kfree() + kzalloc() does some magic pixie dust thing so that this
> same block of memory is now not the same device structure we had before, even
> though it has the same address).
>
> In summary - I can totally buy the argument that you must start with a zeroed
> struct device (and that it is just fine that device_unregister() doesn't waste
> cpu cycles cleaning up the structure just in case someone will re-use it, because
> that isn't going to be the common case).
>
> I just don't understand the magical difference between a static structure that
> has been memset() to all zero, and a dynamic block returned from kzalloc().
>
I am in total agreement with what Tony said above. We have already seen that
my patch did a memset of the device structure and solved the suspend issue.
So, the suspend issue is no longer haunting us, which demonstrates that there
is really no difference between using a zeroed struct device vs using a
structure which is dynamically allocated using zalloc().
What Greg is trying to do with this patch is - get rid of the "Machinecheck
doesn't have release() function" warning in a proper way - something better
than having a dummy release function. Functionality-wise, that patch is not
fixing anything!
Regards,
Srivatsa S. Bhat
^ permalink raw reply [flat|nested] 27+ messages in thread
* RE: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-18 17:28 ` Luck, Tony
2012-01-18 17:54 ` Srivatsa S. Bhat
@ 2012-01-18 18:10 ` Alan Stern
2012-01-18 18:50 ` Kay Sievers
1 sibling, 1 reply; 27+ messages in thread
From: Alan Stern @ 2012-01-18 18:10 UTC (permalink / raw)
To: Luck, Tony
Cc: Greg KH, Ingo Molnar, Linus Torvalds, Rafael J. Wysocki,
Srivatsa S. Bhat, Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Borislav Petkov, tglx, prasad, Ming Lei,
Djalal Harouni, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
On Wed, 18 Jan 2012, Luck, Tony wrote:
> Greg said:
> > It was already fixed that way, but the problem is that you can not have
> > statically allocated 'struct device' objects in the system.
>
> and then Alan said:
> > There's an additional requirement: Device structures may not be reused.
> > Not even if the caller clears all the fields to 0 in between. That was
> > the real bug in the original code -- and adding a dummy release routine
> > wouldn't fix it.
>
> There seems to be some curious logic happening here which I don't understand
> at all. How can the code that deals with 'struct device' tell whether it was
> statically declared or dynamically allocated? Why would it care?
>
> What happens if we play by the rules using a dynamic structure and do
>
> device_register() + device_create_file(dev)
> ...
> device_remove_file(dev) + device_unregister()
>
> then later come back to re-add this and by pure random fluke kzalloc()
> gives us back the exact same block of memory that we used for dev before?
Okay, that's a reasonable question.
> By Alan's logic we are screwed - we are re-using the same device structure
> (unless kfree() + kzalloc() does some magic pixie dust thing so that this
> same block of memory is now not the same device structure we had before, even
> though it has the same address).
No, it's a new structure that just happens to occupy the same address
as the old structure. :-) The real point is that kzalloc() won't give
you that address for the new structure before kfree() has deallocated
the old one.
Normally kfree() would be called by the release routine. Therefore if
kzalloc() gives you the same address, you can be sure that the release
routine has run. The problem with statically allocated device
structures is that they generally don't have release routines (as in
this MCE case).
Now if you had a static structure and you gave it a release routine
then yes -- you could reuse it _provided_ you waited for the release
routine to be called first. But that's not under your control; the
release routine won't be called until all the references have been
dropped, which can take an arbitrarily long time. It's better to avoid
the whole issue by not using static allocation.
> In summary - I can totally buy the argument that you must start with a zeroed
> struct device (and that it is just fine that device_unregister() doesn't waste
> cpu cycles cleaning up the structure just in case someone will re-use it, because
> that isn't going to be the common case).
>
> I just don't understand the magical difference between a static structure that
> has been memset() to all zero, and a dynamic block returned from kzalloc().
The difference is that a block returned from kzalloc() is _known_ not
to have any pre-existing references. A static structure that has been
reset to all zero may still have some; in general you can't know.
There's nothing special about the driver model code in this respect.
The same restriction applies wherever object lifetimes are controlled
by reference counting.
Alan Stern
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-18 18:10 ` Alan Stern
@ 2012-01-18 18:50 ` Kay Sievers
2012-01-18 19:00 ` Luck, Tony
` (2 more replies)
0 siblings, 3 replies; 27+ messages in thread
From: Kay Sievers @ 2012-01-18 18:50 UTC (permalink / raw)
To: Alan Stern
Cc: Luck, Tony, Greg KH, Ingo Molnar, Linus Torvalds,
Rafael J. Wysocki, Srivatsa S. Bhat, Sergei Trofimovich,
linux-kernel, Linux PM mailing list, Borislav Petkov, tglx,
prasad, Ming Lei, Djalal Harouni, Borislav Petkov,
Hidetoshi Seto, Andi Kleen, gouders, Marcos Souza, justinmattock,
Jeff Chua
On Wed, Jan 18, 2012 at 19:10, Alan Stern <stern@rowland.harvard.edu> wrote:
> There's nothing special about the driver model code in this respect.
> The same restriction applies wherever object lifetimes are controlled
> by reference counting.
Right. But it might not be obvious what 's the background here:
An allocated device object(memory) usually represents an actual
device(hardware). The object can have N users. Every of the users is
required to take a reference to the object, which pins the object's
memory as long as any of the N users might need to access it.
In a hotplug world, we deal with device-removal. On disconnect, we
usually just orphan the object, we remove it from visibility,
disconnect the device <-> object relation.
All of the N users with a reference can still access the memory, they
just do not talk to a real device anymore. The invalidated/orphaned
state is communicated otherwise by locks and flags in the device
object. Only after all of the N users left the object alone, the
memory of the orphan if free'd.
If in the time-window between disconnecting the object from the device
and freeing the orphaned object's memory, the same device comes back,
we allocate a new object which is associated with the device. It
usually has the same name and same properties as the original one.
This way, the new object is full functional, does not conflict with
the older one, and also all the users of the old memory are still fine
and can cleanup a lazy as they need without much synchronization.
Now, all that might not apply to machinecheck, and it might be that
machinecheck is fully able to handle all that just fine with the
statically allocated same memory -- allocating new device memory on
hotplug is still the model that should always be preferred over any
other, if possible.
It's usually the simplest safest and most flexible for anything that
can come and go at any time, and memory which might be in used by
other running code.
And hey, filesystems work successful with this model since ages.
Nobody invented anything new here. Device hotplug/unplug/cleanup is
not much different from create/unlink/release. Open/truncate is
usually much harder to manage in concurrency situations. :)
Kay
^ permalink raw reply [flat|nested] 27+ messages in thread
* RE: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-18 18:50 ` Kay Sievers
@ 2012-01-18 19:00 ` Luck, Tony
2012-01-18 19:31 ` Srivatsa S. Bhat
2012-01-19 12:32 ` Ingo Molnar
2 siblings, 0 replies; 27+ messages in thread
From: Luck, Tony @ 2012-01-18 19:00 UTC (permalink / raw)
To: Kay Sievers, Alan Stern
Cc: Greg KH, Ingo Molnar, Linus Torvalds, Rafael J. Wysocki,
Srivatsa S. Bhat, Sergei Trofimovich, linux-kernel,
Linux PM mailing list, Borislav Petkov, tglx, prasad, Ming Lei,
Djalal Harouni, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="utf-8", Size: 424 bytes --]
> This way, the new object is full functional, does not conflict with
> the older one, and also all the users of the old memory are still fine
> and can cleanup a lazy as they need without much synchronization.
Thanks Alan & Kay ... makes sense to me now.
-Tony
ÿôèº{.nÇ+·®+%Ëÿ±éݶ\x17¥wÿº{.nÇ+·¥{±þG«éÿ{ayº\x1dÊÚë,j\a¢f£¢·hïêÿêçz_è®\x03(éÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?¨èÚ&£ø§~á¶iOæ¬z·vØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?I¥
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-18 18:50 ` Kay Sievers
2012-01-18 19:00 ` Luck, Tony
@ 2012-01-18 19:31 ` Srivatsa S. Bhat
2012-01-19 12:32 ` Ingo Molnar
2 siblings, 0 replies; 27+ messages in thread
From: Srivatsa S. Bhat @ 2012-01-18 19:31 UTC (permalink / raw)
To: Kay Sievers
Cc: Alan Stern, Luck, Tony, Greg KH, Ingo Molnar, Linus Torvalds,
Rafael J. Wysocki, Sergei Trofimovich, linux-kernel,
Linux PM mailing list, Borislav Petkov, tglx, prasad, Ming Lei,
Djalal Harouni, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
On 01/19/2012 12:20 AM, Kay Sievers wrote:
> On Wed, Jan 18, 2012 at 19:10, Alan Stern <stern@rowland.harvard.edu> wrote:
>
>> There's nothing special about the driver model code in this respect.
>> The same restriction applies wherever object lifetimes are controlled
>> by reference counting.
>
> Right. But it might not be obvious what 's the background here:
>
> An allocated device object(memory) usually represents an actual
> device(hardware). The object can have N users. Every of the users is
> required to take a reference to the object, which pins the object's
> memory as long as any of the N users might need to access it.
>
> In a hotplug world, we deal with device-removal. On disconnect, we
> usually just orphan the object, we remove it from visibility,
> disconnect the device <-> object relation.
>
> All of the N users with a reference can still access the memory, they
> just do not talk to a real device anymore. The invalidated/orphaned
> state is communicated otherwise by locks and flags in the device
> object. Only after all of the N users left the object alone, the
> memory of the orphan if free'd.
>
> If in the time-window between disconnecting the object from the device
> and freeing the orphaned object's memory, the same device comes back,
> we allocate a new object which is associated with the device. It
> usually has the same name and same properties as the original one.
>
> This way, the new object is full functional, does not conflict with
> the older one, and also all the users of the old memory are still fine
> and can cleanup a lazy as they need without much synchronization.
>
> Now, all that might not apply to machinecheck, and it might be that
> machinecheck is fully able to handle all that just fine with the
> statically allocated same memory -- allocating new device memory on
> hotplug is still the model that should always be preferred over any
> other, if possible.
>
> It's usually the simplest safest and most flexible for anything that
> can come and go at any time, and memory which might be in used by
> other running code.
>
Thanks for the crystal clear explanations from both you and Alan. Now
I am convinced.. :-) And thanks to Tony for bringing this up!
Regards,
Srivatsa S. Bhat
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-18 14:42 ` Greg KH
2012-01-18 15:51 ` Alan Stern
@ 2012-01-19 12:28 ` Ingo Molnar
2012-01-26 23:49 ` MCE: convert static array of pointers to per-cpu variables Greg KH
1 sibling, 1 reply; 27+ messages in thread
From: Ingo Molnar @ 2012-01-19 12:28 UTC (permalink / raw)
To: Greg KH
Cc: Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, Borislav Petkov, tglx, prasad,
Ming Lei, Djalal Harouni, Borislav Petkov, Hidetoshi Seto,
Andi Kleen, gouders, Marcos Souza, justinmattock, Jeff Chua
* Greg KH <gregkh@suse.de> wrote:
> On Wed, Jan 18, 2012 at 10:31:38AM +0100, Ingo Molnar wrote:
> >
> > * Greg KH <gregkh@suse.de> wrote:
> >
> > > On Tue, Jan 17, 2012 at 09:38:43AM +0100, Ingo Molnar wrote:
> > > >
> > > > * Greg KH <gregkh@suse.de> wrote:
> > > >
> > > > > diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> > > > > index f35ce43..6aefb14 100644
> > > > > --- a/arch/x86/include/asm/mce.h
> > > > > +++ b/arch/x86/include/asm/mce.h
> > > > > @@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
> > > > >
> > > > > void mce_setup(struct mce *m);
> > > > > void mce_log(struct mce *m);
> > > > > -DECLARE_PER_CPU(struct device, mce_device);
> > > > > +extern struct device *mce_device[CONFIG_NR_CPUS];
> > > >
> > > > Minor nit, i don't think we have any other such [CONFIG_NR_CPUS]
> > > > pattern in the kernel.
> > > >
> > > > This should be something like:
> > > >
> > > > DECLARE_PER_CPU(struct device *, mce_device);
> > >
> > > That is what we used to have, but with just a static struct
> > > device. [...]
> >
> > Which was fine in itself for a per CPU data structure -
> > wouldnt the warning be fixed by memset()-ing before
> > registering the device or such, if device registry
> > absolutely needs a pre-zeroed buffer?
>
> It was already fixed that way, but the problem is that you can
> not have statically allocated 'struct device' objects in the
> system. [...]
Where does that limitation come from? Typically there's no
fundamental reason why there should be such restrictions in
place, but i might be missing something.
Although one could argue that *this* particular bug is evidence
why static allocations should be disallowed: reuse is way too
easy to mess up :-)
> > I don't object to the quick fix that gets rid of the
> > warnings, but that quick fix came at the price of leaving
> > the real bug unfixed and at the price of introducing a new
> > ugliness ;-)
>
> Nope, all of the bugs are now fixed :)
Okay :-)
> > > [...] We really don't need this to be in the per-cpu area,
> > > a flat array should be just fine, why can't we use the
> > > CONFIG_NR_CPUS value? Should we use something else?
> >
> > By that argument we don't really need PER_CPU() areas to
> > begin with, a flat [CONFIG_NR_CPUS] array is just fine,
> > right?
>
> I never said that, only for this type of variable.
There's nothing unusual about this: a percpu array of pointers
occurs in dozens of places in the kernel.
> > Would be nice if you could do that or some other equivalent
> > solution, i'd really not like to see the [CONFIG_NR_CPUS]
> > pattern to spread in the kernel, we spent a lot of time
> > getting rid of such uses ;-)
>
> Ok, I'll work on resolving this.
Thanks!
Ingo
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-18 18:50 ` Kay Sievers
2012-01-18 19:00 ` Luck, Tony
2012-01-18 19:31 ` Srivatsa S. Bhat
@ 2012-01-19 12:32 ` Ingo Molnar
2012-01-19 13:29 ` Srivatsa S. Bhat
2 siblings, 1 reply; 27+ messages in thread
From: Ingo Molnar @ 2012-01-19 12:32 UTC (permalink / raw)
To: Kay Sievers
Cc: Alan Stern, Luck, Tony, Greg KH, Linus Torvalds,
Rafael J. Wysocki, Srivatsa S. Bhat, Sergei Trofimovich,
linux-kernel, Linux PM mailing list, Borislav Petkov, tglx,
prasad, Ming Lei, Djalal Harouni, Borislav Petkov,
Hidetoshi Seto, Andi Kleen, gouders, Marcos Souza, justinmattock,
Jeff Chua
* Kay Sievers <kay.sievers@vrfy.org> wrote:
> > There's nothing special about the driver model code in this
> > respect. The same restriction applies wherever object
> > lifetimes are controlled by reference counting.
>
> Right. But it might not be obvious what 's the background
> here:
>
> An allocated device object(memory) usually represents an
> actual device(hardware). The object can have N users. Every of
> the users is required to take a reference to the object, which
> pins the object's memory as long as any of the N users might
> need to access it.
>
> In a hotplug world, we deal with device-removal. On
> disconnect, we usually just orphan the object, we remove it
> from visibility, disconnect the device <-> object relation.
>
> All of the N users with a reference can still access the
> memory, they just do not talk to a real device anymore. The
> invalidated/orphaned state is communicated otherwise by locks
> and flags in the device object. Only after all of the N users
> left the object alone, the memory of the orphan if free'd.
But this is not what happened here - it's a special piece of
fundamental hardware that doesnt hot-plug separately from the
CPU and that has just a single "user".
So i'm curious, why wasn't the memset() enough? It should have
resolved the bug AFAICS.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-19 12:32 ` Ingo Molnar
@ 2012-01-19 13:29 ` Srivatsa S. Bhat
2012-01-19 15:13 ` Alan Stern
0 siblings, 1 reply; 27+ messages in thread
From: Srivatsa S. Bhat @ 2012-01-19 13:29 UTC (permalink / raw)
To: Ingo Molnar
Cc: Kay Sievers, Alan Stern, Luck, Tony, Greg KH, Linus Torvalds,
Rafael J. Wysocki, Sergei Trofimovich, linux-kernel,
Linux PM mailing list, Borislav Petkov, tglx, prasad, Ming Lei,
Djalal Harouni, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
On 01/19/2012 06:02 PM, Ingo Molnar wrote:
>
> * Kay Sievers <kay.sievers@vrfy.org> wrote:
>
>>> There's nothing special about the driver model code in this
>>> respect. The same restriction applies wherever object
>>> lifetimes are controlled by reference counting.
>>
>> Right. But it might not be obvious what 's the background
>> here:
>>
>> An allocated device object(memory) usually represents an
>> actual device(hardware). The object can have N users. Every of
>> the users is required to take a reference to the object, which
>> pins the object's memory as long as any of the N users might
>> need to access it.
>>
>> In a hotplug world, we deal with device-removal. On
>> disconnect, we usually just orphan the object, we remove it
>> from visibility, disconnect the device <-> object relation.
>>
>> All of the N users with a reference can still access the
>> memory, they just do not talk to a real device anymore. The
>> invalidated/orphaned state is communicated otherwise by locks
>> and flags in the device object. Only after all of the N users
>> left the object alone, the memory of the orphan if free'd.
>
> But this is not what happened here - it's a special piece of
> fundamental hardware that doesnt hot-plug separately from the
> CPU and that has just a single "user".
>
> So i'm curious, why wasn't the memset() enough? It should have
> resolved the bug AFAICS.
>
It did! The memset _did_ fix the bug.
See commit a3301b7 (x86/mce: Fix CPU hotplug and suspend regression
related to MCE).
Just to clarify: the bug was that a CPU offline + CPU online would
lead to usage of stale pointers in some device structure related
to MCE and hence, suspend-resume would not work on the second attempt
to suspend. And (as expected), the other symptom of this bug was: a
CPU offline + CPU online would cause the machine to oops because it
tried to dereference some invalid pointer.
And the memset() fixed this bug. Completely.
But what still remained after the memset, was only a harmless warning
about machinecheck not having a release() function. This was only a
reflection of the semantics that the driver-core imposed, but not
really a bug as such. (And as I mentioned in one of my earlier posts,
this warning existed in much older kernels too, but was hidden because
pr_debug() was used to print it. Now that the callpaths changed after
the change over from sysdev to struct device, we now started hitting
a WARN(), instead of a mild pr_debug(). But the message conveyed
by either of these was exactly the same.)
So, the discussion in this thread was about how best to get rid of
that warning, by playing by the rules of the driver-core instead of
circumventing it by having a dummy release function just to silence
the warning.
Regards,
Srivatsa S. Bhat
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-19 13:29 ` Srivatsa S. Bhat
@ 2012-01-19 15:13 ` Alan Stern
2012-01-19 19:38 ` Ingo Molnar
0 siblings, 1 reply; 27+ messages in thread
From: Alan Stern @ 2012-01-19 15:13 UTC (permalink / raw)
To: Srivatsa S. Bhat
Cc: Ingo Molnar, Kay Sievers, Luck, Tony, Greg KH, Linus Torvalds,
Rafael J. Wysocki, Sergei Trofimovich, linux-kernel,
Linux PM mailing list, Borislav Petkov, tglx, prasad, Ming Lei,
Djalal Harouni, Borislav Petkov, Hidetoshi Seto, Andi Kleen,
gouders, Marcos Souza, justinmattock, Jeff Chua
On Thu, 19 Jan 2012, Srivatsa S. Bhat wrote:
> On 01/19/2012 06:02 PM, Ingo Molnar wrote:
> > But this is not what happened here - it's a special piece of
> > fundamental hardware that doesnt hot-plug separately from the
> > CPU and that has just a single "user".
> >
> > So i'm curious, why wasn't the memset() enough? It should have
> > resolved the bug AFAICS.
> >
>
>
> It did! The memset _did_ fix the bug.
But will it continue to fix the bug in the future?
Or to put it another way, even though no code takes references to these
device structures (can you really guarantee that even now?), how do you
prevent references being taken in future versions of the kernel?
Calling memset while there still are outstanding references very
definitely _is_ a bug.
IIRC, it used to be completely impossible to prevent this from
happening because sysfs would take references whenever user tasks
opened attribute files. Sysfs no longer does this, but the basic
principle of defensive programming still applies.
Alan Stern
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-19 15:13 ` Alan Stern
@ 2012-01-19 19:38 ` Ingo Molnar
2012-01-19 20:52 ` Alan Stern
0 siblings, 1 reply; 27+ messages in thread
From: Ingo Molnar @ 2012-01-19 19:38 UTC (permalink / raw)
To: Alan Stern
Cc: Srivatsa S. Bhat, Kay Sievers, Luck, Tony, Greg KH,
Linus Torvalds, Rafael J. Wysocki, Sergei Trofimovich,
linux-kernel, Linux PM mailing list, Borislav Petkov, tglx,
prasad, Ming Lei, Djalal Harouni, Borislav Petkov,
Hidetoshi Seto, Andi Kleen, gouders, Marcos Souza, justinmattock,
Jeff Chua
* Alan Stern <stern@rowland.harvard.edu> wrote:
> Or to put it another way, even though no code takes references
> to these device structures (can you really guarantee that even
> now?), [...]
That's a good question. Do these devices get exposed to
user-space access via sysfs somehow, where such an extra
reference could materialize?
> [...] how do you prevent references being taken in future
> versions of the kernel? Calling memset while there still are
> outstanding references very definitely _is_ a bug.
Unless there's a reference i missed the 'refcounting' here is
very simple and does not exist at all.
IMHO you should not impose complexity and abstraction on code
just because. Now i don't care much about the extra pointer, but
the [NR_CPUS] array was doubly ugly (and Greg promised to fix
that, so no problems from me).
So the conclusion is that the reasons stated here don't really
apply. The one i mentioned *does* apply: the original bug was
due to sloppy life time rules of this driver buffer, and that
came back to bite us. Making it dynamic solves that kind of
problem. In that sense the warning is fine too.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH] mce: fix warning messages about static struct mce_device
2012-01-19 19:38 ` Ingo Molnar
@ 2012-01-19 20:52 ` Alan Stern
0 siblings, 0 replies; 27+ messages in thread
From: Alan Stern @ 2012-01-19 20:52 UTC (permalink / raw)
To: Ingo Molnar
Cc: Srivatsa S. Bhat, Kay Sievers, Luck, Tony, Greg KH,
Linus Torvalds, Rafael J. Wysocki, Sergei Trofimovich,
linux-kernel, Linux PM mailing list, Borislav Petkov, tglx,
prasad, Ming Lei, Djalal Harouni, Borislav Petkov,
Hidetoshi Seto, Andi Kleen, gouders, Marcos Souza, justinmattock,
Jeff Chua
On Thu, 19 Jan 2012, Ingo Molnar wrote:
>
> * Alan Stern <stern@rowland.harvard.edu> wrote:
>
> > Or to put it another way, even though no code takes references
> > to these device structures (can you really guarantee that even
> > now?), [...]
>
> That's a good question. Do these devices get exposed to
> user-space access via sysfs somehow, where such an extra
> reference could materialize?
_Every_ struct device that gets registered is exposed to userspace via
sysfs. Of course, that doesn't mean the exposure can create extra
references.
> > [...] how do you prevent references being taken in future
> > versions of the kernel? Calling memset while there still are
> > outstanding references very definitely _is_ a bug.
>
> Unless there's a reference i missed the 'refcounting' here is
> very simple and does not exist at all.
>
> IMHO you should not impose complexity and abstraction on code
> just because. Now i don't care much about the extra pointer, but
> the [NR_CPUS] array was doubly ugly (and Greg promised to fix
> that, so no problems from me).
>
> So the conclusion is that the reasons stated here don't really
> apply. The one i mentioned *does* apply: the original bug was
> due to sloppy life time rules of this driver buffer, and that
> came back to bite us. Making it dynamic solves that kind of
> problem. In that sense the warning is fine too.
Then it looks like we are in agreement. :-)
Alan Stern
^ permalink raw reply [flat|nested] 27+ messages in thread
* MCE: convert static array of pointers to per-cpu variables
2012-01-19 12:28 ` Ingo Molnar
@ 2012-01-26 23:49 ` Greg KH
2012-01-27 13:14 ` Srivatsa S. Bhat
0 siblings, 1 reply; 27+ messages in thread
From: Greg KH @ 2012-01-26 23:49 UTC (permalink / raw)
To: Ingo Molnar
Cc: Linus Torvalds, Rafael J. Wysocki, Srivatsa S. Bhat,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, Borislav Petkov, tglx, prasad,
Ming Lei, Djalal Harouni, Borislav Petkov, Hidetoshi Seto,
Andi Kleen, gouders, Marcos Souza, justinmattock, Jeff Chua
From: Greg Kroah-Hartman <gregkh@suse.de>
When I previously fixed up the mce_device code, I used a static array of
the pointers. It was (rightfully) pointed out to me that I should be
using the per_cpu code instead.
This patch converts the code over to that structure, moving the variable
back into the per_cpu area, like it used to be for 3.2 and earlier.
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 6aefb14..441520e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
-extern struct device *mce_device[CONFIG_NR_CPUS];
+DECLARE_PER_CPU(struct device *, mce_device);
/*
* Maximum banks number.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5a11ae2..4979a5d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1859,7 +1859,7 @@ static struct bus_type mce_subsys = {
.dev_name = "machinecheck",
};
-struct device *mce_device[CONFIG_NR_CPUS];
+DEFINE_PER_CPU(struct device *, mce_device);
__cpuinitdata
void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
@@ -2038,7 +2038,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
goto error2;
}
cpumask_set_cpu(cpu, mce_device_initialized);
- mce_device[cpu] = dev;
+ per_cpu(mce_device, cpu) = dev;
return 0;
error2:
@@ -2055,7 +2055,7 @@ error:
static __cpuinit void mce_device_remove(unsigned int cpu)
{
- struct device *dev = mce_device[cpu];
+ struct device *dev = per_cpu(mce_device, cpu);
int i;
if (!cpumask_test_cpu(cpu, mce_device_initialized))
@@ -2069,7 +2069,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
device_unregister(dev);
cpumask_clear_cpu(cpu, mce_device_initialized);
- mce_device[cpu] = NULL;
+ per_cpu(mce_device, cpu) = NULL;
}
/* Make sure there are no machine checks on offlined CPUs. */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 786e76a..a4bf9d2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -523,7 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
{
int i, err = 0;
struct threshold_bank *b = NULL;
- struct device *dev = mce_device[cpu];
+ struct device *dev = per_cpu(mce_device, cpu);
char name[32];
sprintf(name, "threshold_bank%i", bank);
@@ -585,7 +585,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (i == cpu)
continue;
- dev = mce_device[i];
+ dev = per_cpu(mce_device, i);
if (dev)
err = sysfs_create_link(&dev->kobj,b->kobj, name);
if (err)
@@ -665,7 +665,8 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
#ifdef CONFIG_SMP
/* sibling symlink */
if (shared_bank[bank] && b->blocks->cpu != cpu) {
- sysfs_remove_link(&mce_device[cpu]->kobj, name);
+ dev = per_cpu(mce_device, cpu);
+ sysfs_remove_link(&dev->kobj, name);
per_cpu(threshold_banks, cpu)[bank] = NULL;
return;
@@ -677,7 +678,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
if (i == cpu)
continue;
- dev = mce_device[i];
+ dev = per_cpu(mce_device, i);
if (dev)
sysfs_remove_link(&dev->kobj, name);
per_cpu(threshold_banks, i)[bank] = NULL;
^ permalink raw reply related [flat|nested] 27+ messages in thread
* Re: MCE: convert static array of pointers to per-cpu variables
2012-01-26 23:49 ` MCE: convert static array of pointers to per-cpu variables Greg KH
@ 2012-01-27 13:14 ` Srivatsa S. Bhat
0 siblings, 0 replies; 27+ messages in thread
From: Srivatsa S. Bhat @ 2012-01-27 13:14 UTC (permalink / raw)
To: Greg KH
Cc: Ingo Molnar, Linus Torvalds, Rafael J. Wysocki,
Sergei Trofimovich, linux-kernel, Kay Sievers,
Linux PM mailing list, Tony Luck, Borislav Petkov, tglx, prasad,
Ming Lei, Djalal Harouni, Borislav Petkov, Hidetoshi Seto,
Andi Kleen, gouders, Marcos Souza, justinmattock, Jeff Chua
On 01/27/2012 05:19 AM, Greg KH wrote:
> From: Greg Kroah-Hartman <gregkh@suse.de>
>
> When I previously fixed up the mce_device code, I used a static array of
> the pointers. It was (rightfully) pointed out to me that I should be
> using the per_cpu code instead.
>
> This patch converts the code over to that structure, moving the variable
> back into the per_cpu area, like it used to be for 3.2 and earlier.
>
> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Regards,
Srivatsa S. Bhat
^ permalink raw reply [flat|nested] 27+ messages in thread
end of thread, other threads:[~2012-01-27 13:14 UTC | newest]
Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-16 22:40 [PATCH] mce: fix warning messages about static struct mce_device Greg KH
2012-01-17 0:14 ` Djalal Harouni
2012-01-17 0:15 ` Greg KH
2012-01-17 0:21 ` Linus Torvalds
2012-01-17 1:00 ` Greg KH
2012-01-17 8:38 ` Ingo Molnar
2012-01-17 15:51 ` Greg KH
2012-01-17 16:28 ` Greg KH
2012-01-18 9:31 ` Ingo Molnar
2012-01-18 14:42 ` Greg KH
2012-01-18 15:51 ` Alan Stern
2012-01-18 17:28 ` Luck, Tony
2012-01-18 17:54 ` Srivatsa S. Bhat
2012-01-18 18:10 ` Alan Stern
2012-01-18 18:50 ` Kay Sievers
2012-01-18 19:00 ` Luck, Tony
2012-01-18 19:31 ` Srivatsa S. Bhat
2012-01-19 12:32 ` Ingo Molnar
2012-01-19 13:29 ` Srivatsa S. Bhat
2012-01-19 15:13 ` Alan Stern
2012-01-19 19:38 ` Ingo Molnar
2012-01-19 20:52 ` Alan Stern
2012-01-19 12:28 ` Ingo Molnar
2012-01-26 23:49 ` MCE: convert static array of pointers to per-cpu variables Greg KH
2012-01-27 13:14 ` Srivatsa S. Bhat
2012-01-17 12:36 ` [PATCH] mce: fix warning messages about static struct mce_device Srivatsa S. Bhat
2012-01-17 15:52 ` Greg KH
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).