* [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-21 8:51 Nicholas Piggin
2016-09-21 10:25 ` kbuild test robot
` (3 more replies)
0 siblings, 4 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-21 8:51 UTC (permalink / raw)
To: Tejun Heo, Christoph Lameter
Cc: Nicholas Piggin, linux-kernel, linux-arch, linuxppc-dev
Some architectures require an additional load to find the address of
percpu pointers. In some implemenatations, the C aliasing rules do not
allow the result of that load to be kept over the store that modifies
the percpu variable, which causes additional loads.
Work around this by finding the pointer first, then operating on that.
It's also possible to mark things as restrict and those kind of games,
but that can require larger and arch specific changes.
On powerpc, __this_cpu_inc_return compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
ld 9,48(13)
ldx 3,9,3
With this patch it saves 2 loads:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
include/asm-generic/percpu.h | 54 +++++++++++++++++++++++++-------------------
1 file changed, 31 insertions(+), 23 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 4d9f233..3fe18fe 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
#define PER_CPU_DEF_ATTRIBUTES
#endif
+#define raw_cpu_generic_read(pcp) \
+({ \
+ *raw_cpu_ptr(&(pcp)); \
+})
+
#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
*raw_cpu_ptr(&(pcp)) op val; \
@@ -72,34 +77,39 @@ do { \
#define raw_cpu_generic_add_return(pcp, val) \
({ \
- raw_cpu_add(pcp, val); \
- raw_cpu_read(pcp); \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
+ \
+ *__p += val; \
+ *__p; \
})
#define raw_cpu_generic_xchg(pcp, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = *__p; \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
+ __ret = *__p; \
if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
+ typeof(&(pcp1)) __p1 = raw_cpu_ptr(&(pcp1)); \
+ typeof(&(pcp2)) __p2 = raw_cpu_ptr(&(pcp2)); \
int __ret = 0; \
- if (raw_cpu_read(pcp1) == (oval1) && \
- raw_cpu_read(pcp2) == (oval2)) { \
- raw_cpu_write(pcp1, nval1); \
- raw_cpu_write(pcp2, nval2); \
+ if (*__p1 == (oval1) && *__p2 == (oval2)) { \
+ *__p1 = nval1; \
+ *__p2 = nval2; \
__ret = 1; \
} \
(__ret); \
@@ -109,7 +119,7 @@ do { \
({ \
typeof(pcp) __ret; \
preempt_disable(); \
- __ret = *this_cpu_ptr(&(pcp)); \
+ __ret = raw_cpu_generic_read(pcp); \
preempt_enable(); \
__ret; \
})
@@ -118,17 +128,18 @@ do { \
do { \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- *raw_cpu_ptr(&(pcp)) op val; \
+ raw_cpu_generic_to_op(pcp, val, op); \
raw_local_irq_restore(__flags); \
} while (0)
+
#define this_cpu_generic_add_return(pcp, val) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- raw_cpu_add(pcp, val); \
- __ret = raw_cpu_read(pcp); \
+ __ret = raw_cpu_generic_add_return(pcp, val); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -138,8 +149,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_xchg(pcp); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -149,9 +159,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -168,16 +176,16 @@ do { \
})
#ifndef raw_cpu_read_1
-#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_2
-#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_4
-#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_8
-#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_write_1
--
2.9.3
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
2016-09-21 8:51 [PATCH] percpu: improve generic percpu modify-return implementation Nicholas Piggin
2016-09-21 10:25 ` kbuild test robot
@ 2016-09-21 10:25 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:57 ` Nicholas Piggin
3 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2016-09-21 10:25 UTC (permalink / raw)
To: Nicholas Piggin
Cc: kbuild-all, Tejun Heo, Christoph Lameter, Nicholas Piggin,
linux-kernel, linux-arch, linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 5055 bytes --]
Hi Nicholas,
[auto build test ERROR on asm-generic/master]
[also build test ERROR on v4.8-rc7 next-20160920]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/percpu-improve-generic-percpu-modify-return-implementation/20160921-170016
base: https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
config: blackfin-BF561-EZKIT-SMP_defconfig (attached as .config)
compiler: bfin-uclinux-gcc (GCC) 6.2.0
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=blackfin
All error/warnings (new ones prefixed by >>):
mm/vmstat.c: In function 'refresh_cpu_vm_stats':
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^ ~~~~~~~~~~~~
In file included from arch/blackfin/include/generated/asm/percpu.h:1:0,
from include/linux/percpu.h:12,
from include/linux/percpu-rwsem.h:6,
from include/linux/fs.h:30,
from mm/vmstat.c:12:
>> include/asm-generic/percpu.h:152:10: error: 'raw_cpu_generic_xchg' undeclared (first use in this function)
__ret = raw_cpu_generic_xchg(pcp); \
^
>> include/asm-generic/percpu.h:382:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_1(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:323:24: note: in expansion of macro 'this_cpu_xchg_1'
case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
^~~~
>> include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
>> mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
include/asm-generic/percpu.h:152:10: note: each undeclared identifier is reported only once for each function it appears in
__ret = raw_cpu_generic_xchg(pcp); \
^
>> include/asm-generic/percpu.h:382:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_1(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:323:24: note: in expansion of macro 'this_cpu_xchg_1'
case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
^~~~
>> include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
>> mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^ ~~~~~~~~~~~~
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
vim +/raw_cpu_generic_xchg +476 mm/vmstat.c
ee99c71c KOSAKI Motohiro 2009-03-31 470 for_each_populated_zone(zone) {
fbc2edb0 Christoph Lameter 2013-09-11 471 struct per_cpu_pageset __percpu *p = zone->pageset;
2244b95a Christoph Lameter 2006-06-30 472
fbc2edb0 Christoph Lameter 2013-09-11 473 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
a7f75e25 Christoph Lameter 2008-02-04 474 int v;
a7f75e25 Christoph Lameter 2008-02-04 475
fbc2edb0 Christoph Lameter 2013-09-11 @476 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
fbc2edb0 Christoph Lameter 2013-09-11 477 if (v) {
fbc2edb0 Christoph Lameter 2013-09-11 478
a7f75e25 Christoph Lameter 2008-02-04 479 atomic_long_add(v, &zone->vm_stat[i]);
:::::: The code at line 476 was first introduced by commit
:::::: fbc2edb05354480a88aa39db8a6acb5782fa1a1b vmstat: use this_cpu() to avoid irqon/off sequence in refresh_cpu_vm_stats
:::::: TO: Christoph Lameter <cl@linux.com>
:::::: CC: Linus Torvalds <torvalds@linux-foundation.org>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 10251 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-21 10:25 ` kbuild test robot
0 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2016-09-21 10:25 UTC (permalink / raw)
Cc: kbuild-all, Tejun Heo, Christoph Lameter, Nicholas Piggin,
linux-kernel, linux-arch, linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 5055 bytes --]
Hi Nicholas,
[auto build test ERROR on asm-generic/master]
[also build test ERROR on v4.8-rc7 next-20160920]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/percpu-improve-generic-percpu-modify-return-implementation/20160921-170016
base: https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
config: blackfin-BF561-EZKIT-SMP_defconfig (attached as .config)
compiler: bfin-uclinux-gcc (GCC) 6.2.0
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=blackfin
All error/warnings (new ones prefixed by >>):
mm/vmstat.c: In function 'refresh_cpu_vm_stats':
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^ ~~~~~~~~~~~~
In file included from arch/blackfin/include/generated/asm/percpu.h:1:0,
from include/linux/percpu.h:12,
from include/linux/percpu-rwsem.h:6,
from include/linux/fs.h:30,
from mm/vmstat.c:12:
>> include/asm-generic/percpu.h:152:10: error: 'raw_cpu_generic_xchg' undeclared (first use in this function)
__ret = raw_cpu_generic_xchg(pcp); \
^
>> include/asm-generic/percpu.h:382:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_1(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:323:24: note: in expansion of macro 'this_cpu_xchg_1'
case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
^~~~
>> include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
>> mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
include/asm-generic/percpu.h:152:10: note: each undeclared identifier is reported only once for each function it appears in
__ret = raw_cpu_generic_xchg(pcp); \
^
>> include/asm-generic/percpu.h:382:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_1(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:323:24: note: in expansion of macro 'this_cpu_xchg_1'
case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
^~~~
>> include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
>> mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^ ~~~~~~~~~~~~
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
vim +/raw_cpu_generic_xchg +476 mm/vmstat.c
ee99c71c KOSAKI Motohiro 2009-03-31 470 for_each_populated_zone(zone) {
fbc2edb0 Christoph Lameter 2013-09-11 471 struct per_cpu_pageset __percpu *p = zone->pageset;
2244b95a Christoph Lameter 2006-06-30 472
fbc2edb0 Christoph Lameter 2013-09-11 473 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
a7f75e25 Christoph Lameter 2008-02-04 474 int v;
a7f75e25 Christoph Lameter 2008-02-04 475
fbc2edb0 Christoph Lameter 2013-09-11 @476 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
fbc2edb0 Christoph Lameter 2013-09-11 477 if (v) {
fbc2edb0 Christoph Lameter 2013-09-11 478
a7f75e25 Christoph Lameter 2008-02-04 479 atomic_long_add(v, &zone->vm_stat[i]);
:::::: The code at line 476 was first introduced by commit
:::::: fbc2edb05354480a88aa39db8a6acb5782fa1a1b vmstat: use this_cpu() to avoid irqon/off sequence in refresh_cpu_vm_stats
:::::: TO: Christoph Lameter <cl@linux.com>
:::::: CC: Linus Torvalds <torvalds@linux-foundation.org>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 10251 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-21 10:25 ` kbuild test robot
0 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2016-09-21 10:25 UTC (permalink / raw)
To: Nicholas Piggin
Cc: kbuild-all, Tejun Heo, Christoph Lameter, linux-kernel,
linux-arch, linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 5055 bytes --]
Hi Nicholas,
[auto build test ERROR on asm-generic/master]
[also build test ERROR on v4.8-rc7 next-20160920]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/percpu-improve-generic-percpu-modify-return-implementation/20160921-170016
base: https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
config: blackfin-BF561-EZKIT-SMP_defconfig (attached as .config)
compiler: bfin-uclinux-gcc (GCC) 6.2.0
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=blackfin
All error/warnings (new ones prefixed by >>):
mm/vmstat.c: In function 'refresh_cpu_vm_stats':
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^ ~~~~~~~~~~~~
In file included from arch/blackfin/include/generated/asm/percpu.h:1:0,
from include/linux/percpu.h:12,
from include/linux/percpu-rwsem.h:6,
from include/linux/fs.h:30,
from mm/vmstat.c:12:
>> include/asm-generic/percpu.h:152:10: error: 'raw_cpu_generic_xchg' undeclared (first use in this function)
__ret = raw_cpu_generic_xchg(pcp); \
^
>> include/asm-generic/percpu.h:382:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_1(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:323:24: note: in expansion of macro 'this_cpu_xchg_1'
case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
^~~~
>> include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
>> mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
include/asm-generic/percpu.h:152:10: note: each undeclared identifier is reported only once for each function it appears in
__ret = raw_cpu_generic_xchg(pcp); \
^
>> include/asm-generic/percpu.h:382:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_1(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:323:24: note: in expansion of macro 'this_cpu_xchg_1'
case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \
^~~~
>> include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
>> mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^ ~~~~~~~~~~~~
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
>> mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
vim +/raw_cpu_generic_xchg +476 mm/vmstat.c
ee99c71c KOSAKI Motohiro 2009-03-31 470 for_each_populated_zone(zone) {
fbc2edb0 Christoph Lameter 2013-09-11 471 struct per_cpu_pageset __percpu *p = zone->pageset;
2244b95a Christoph Lameter 2006-06-30 472
fbc2edb0 Christoph Lameter 2013-09-11 473 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
a7f75e25 Christoph Lameter 2008-02-04 474 int v;
a7f75e25 Christoph Lameter 2008-02-04 475
fbc2edb0 Christoph Lameter 2013-09-11 @476 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
fbc2edb0 Christoph Lameter 2013-09-11 477 if (v) {
fbc2edb0 Christoph Lameter 2013-09-11 478
a7f75e25 Christoph Lameter 2008-02-04 479 atomic_long_add(v, &zone->vm_stat[i]);
:::::: The code at line 476 was first introduced by commit
:::::: fbc2edb05354480a88aa39db8a6acb5782fa1a1b vmstat: use this_cpu() to avoid irqon/off sequence in refresh_cpu_vm_stats
:::::: TO: Christoph Lameter <cl@linux.com>
:::::: CC: Linus Torvalds <torvalds@linux-foundation.org>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 10251 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
2016-09-21 8:51 [PATCH] percpu: improve generic percpu modify-return implementation Nicholas Piggin
2016-09-21 10:25 ` kbuild test robot
@ 2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:57 ` Nicholas Piggin
3 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2016-09-21 10:30 UTC (permalink / raw)
To: Nicholas Piggin
Cc: kbuild-all, Tejun Heo, Christoph Lameter, Nicholas Piggin,
linux-kernel, linux-arch, linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 2873 bytes --]
Hi Nicholas,
[auto build test ERROR on asm-generic/master]
[also build test ERROR on v4.8-rc7 next-20160920]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/percpu-improve-generic-percpu-modify-return-implementation/20160921-170016
base: https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
config: tile-tilegx_defconfig (attached as .config)
compiler: tilegx-linux-gcc (GCC) 4.6.2
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=tile
All errors (new ones prefixed by >>):
mm/vmstat.c: In function 'refresh_cpu_vm_stats':
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
>> mm/vmstat.c:476:1: error: 'raw_cpu_generic_xchg' undeclared (first use in this function)
mm/vmstat.c:476:1: note: each undeclared identifier is reported only once for each function it appears in
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
vim +/raw_cpu_generic_xchg +476 mm/vmstat.c
ee99c71c KOSAKI Motohiro 2009-03-31 470 for_each_populated_zone(zone) {
fbc2edb0 Christoph Lameter 2013-09-11 471 struct per_cpu_pageset __percpu *p = zone->pageset;
2244b95a Christoph Lameter 2006-06-30 472
fbc2edb0 Christoph Lameter 2013-09-11 473 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
a7f75e25 Christoph Lameter 2008-02-04 474 int v;
a7f75e25 Christoph Lameter 2008-02-04 475
fbc2edb0 Christoph Lameter 2013-09-11 @476 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
fbc2edb0 Christoph Lameter 2013-09-11 477 if (v) {
fbc2edb0 Christoph Lameter 2013-09-11 478
a7f75e25 Christoph Lameter 2008-02-04 479 atomic_long_add(v, &zone->vm_stat[i]);
:::::: The code at line 476 was first introduced by commit
:::::: fbc2edb05354480a88aa39db8a6acb5782fa1a1b vmstat: use this_cpu() to avoid irqon/off sequence in refresh_cpu_vm_stats
:::::: TO: Christoph Lameter <cl@linux.com>
:::::: CC: Linus Torvalds <torvalds@linux-foundation.org>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 15729 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
2016-09-21 8:51 [PATCH] percpu: improve generic percpu modify-return implementation Nicholas Piggin
2016-09-21 10:25 ` kbuild test robot
@ 2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:57 ` Nicholas Piggin
3 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2016-09-21 10:30 UTC (permalink / raw)
To: Nicholas Piggin
Cc: kbuild-all, Tejun Heo, Christoph Lameter, Nicholas Piggin,
linux-kernel, linux-arch, linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 7000 bytes --]
Hi Nicholas,
[auto build test WARNING on asm-generic/master]
[also build test WARNING on v4.8-rc7 next-20160920]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/percpu-improve-generic-percpu-modify-return-implementation/20160921-170016
base: https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
config: i386-randconfig-s0-201638 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
All warnings (new ones prefixed by >>):
In file included from arch/x86/include/asm/percpu.h:551:0,
from arch/x86/include/asm/preempt.h:5,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from arch/x86/kernel/nmi.c:13:
arch/x86/kernel/nmi.c: In function 'do_nmi':
include/asm-generic/percpu.h:138:17: warning: unused variable '__p' [-Wunused-variable]
typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
^
include/asm-generic/percpu.h:378:41: note: in expansion of macro 'this_cpu_generic_add_return'
#define this_cpu_add_return_8(pcp, val) this_cpu_generic_add_return(pcp, val)
^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/percpu-defs.h:326:24: note: in expansion of macro 'this_cpu_add_return_8'
case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
^~~~
include/linux/percpu-defs.h:499:39: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
^~~~~~~~~~~~~~~~~~~~~~~~
include/linux/percpu-defs.h:511:34: note: in expansion of macro 'this_cpu_add_return'
#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1)
^~~~~~~~~~~~~~~~~~~
>> arch/x86/kernel/nmi.c:544:6: note: in expansion of macro 'this_cpu_dec_return'
if (this_cpu_dec_return(nmi_state))
^~~~~~~~~~~~~~~~~~~
--
mm/vmstat.c: In function 'refresh_cpu_vm_stats':
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^ ~~~~~~~~~~~~
In file included from arch/x86/include/asm/percpu.h:551:0,
from arch/x86/include/asm/preempt.h:5,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from include/linux/wait.h:8,
from include/linux/fs.h:5,
from mm/vmstat.c:12:
include/asm-generic/percpu.h:152:10: error: 'raw_cpu_generic_xchg' undeclared (first use in this function)
__ret = raw_cpu_generic_xchg(pcp); \
^
include/asm-generic/percpu.h:391:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:326:24: note: in expansion of macro 'this_cpu_xchg_8'
case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
^~~~
include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
include/asm-generic/percpu.h:152:10: note: each undeclared identifier is reported only once for each function it appears in
__ret = raw_cpu_generic_xchg(pcp); \
^
include/asm-generic/percpu.h:391:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:326:24: note: in expansion of macro 'this_cpu_xchg_8'
case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
^~~~
include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
vim +/this_cpu_dec_return +544 arch/x86/kernel/nmi.c
1d48922c Don Zickus 2011-09-30 528 inc_irq_stat(__nmi_count);
1d48922c Don Zickus 2011-09-30 529
1d48922c Don Zickus 2011-09-30 530 if (!ignore_nmis)
1d48922c Don Zickus 2011-09-30 531 default_do_nmi(regs);
1d48922c Don Zickus 2011-09-30 532
1d48922c Don Zickus 2011-09-30 533 nmi_exit();
228bdaa9 Steven Rostedt 2011-12-09 534
9d050416 Andy Lutomirski 2015-07-15 535 #ifdef CONFIG_X86_64
9d050416 Andy Lutomirski 2015-07-15 536 if (unlikely(this_cpu_read(update_debug_stack))) {
9d050416 Andy Lutomirski 2015-07-15 537 debug_stack_reset();
9d050416 Andy Lutomirski 2015-07-15 538 this_cpu_write(update_debug_stack, 0);
9d050416 Andy Lutomirski 2015-07-15 539 }
9d050416 Andy Lutomirski 2015-07-15 540 #endif
9d050416 Andy Lutomirski 2015-07-15 541
9d050416 Andy Lutomirski 2015-07-15 542 if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
9d050416 Andy Lutomirski 2015-07-15 543 write_cr2(this_cpu_read(nmi_cr2));
9d050416 Andy Lutomirski 2015-07-15 @544 if (this_cpu_dec_return(nmi_state))
9d050416 Andy Lutomirski 2015-07-15 545 goto nmi_restart;
1d48922c Don Zickus 2011-09-30 546 }
9326638c Masami Hiramatsu 2014-04-17 547 NOKPROBE_SYMBOL(do_nmi);
1d48922c Don Zickus 2011-09-30 548
1d48922c Don Zickus 2011-09-30 549 void stop_nmi(void)
1d48922c Don Zickus 2011-09-30 550 {
1d48922c Don Zickus 2011-09-30 551 ignore_nmis++;
1d48922c Don Zickus 2011-09-30 552 }
:::::: The code at line 544 was first introduced by commit
:::::: 9d05041679904b12c12421cbcf9cb5f4860a8d7b x86/nmi: Enable nested do_nmi() handling for 64-bit kernels
:::::: TO: Andy Lutomirski <luto@kernel.org>
:::::: CC: Ingo Molnar <mingo@kernel.org>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28216 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-21 10:30 ` kbuild test robot
0 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2016-09-21 10:30 UTC (permalink / raw)
Cc: kbuild-all, Tejun Heo, Christoph Lameter, Nicholas Piggin,
linux-kernel, linux-arch, linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 2873 bytes --]
Hi Nicholas,
[auto build test ERROR on asm-generic/master]
[also build test ERROR on v4.8-rc7 next-20160920]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/percpu-improve-generic-percpu-modify-return-implementation/20160921-170016
base: https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
config: tile-tilegx_defconfig (attached as .config)
compiler: tilegx-linux-gcc (GCC) 4.6.2
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=tile
All errors (new ones prefixed by >>):
mm/vmstat.c: In function 'refresh_cpu_vm_stats':
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
>> mm/vmstat.c:476:1: error: 'raw_cpu_generic_xchg' undeclared (first use in this function)
mm/vmstat.c:476:1: note: each undeclared identifier is reported only once for each function it appears in
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
vim +/raw_cpu_generic_xchg +476 mm/vmstat.c
ee99c71c KOSAKI Motohiro 2009-03-31 470 for_each_populated_zone(zone) {
fbc2edb0 Christoph Lameter 2013-09-11 471 struct per_cpu_pageset __percpu *p = zone->pageset;
2244b95a Christoph Lameter 2006-06-30 472
fbc2edb0 Christoph Lameter 2013-09-11 473 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
a7f75e25 Christoph Lameter 2008-02-04 474 int v;
a7f75e25 Christoph Lameter 2008-02-04 475
fbc2edb0 Christoph Lameter 2013-09-11 @476 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
fbc2edb0 Christoph Lameter 2013-09-11 477 if (v) {
fbc2edb0 Christoph Lameter 2013-09-11 478
a7f75e25 Christoph Lameter 2008-02-04 479 atomic_long_add(v, &zone->vm_stat[i]);
:::::: The code at line 476 was first introduced by commit
:::::: fbc2edb05354480a88aa39db8a6acb5782fa1a1b vmstat: use this_cpu() to avoid irqon/off sequence in refresh_cpu_vm_stats
:::::: TO: Christoph Lameter <cl@linux.com>
:::::: CC: Linus Torvalds <torvalds@linux-foundation.org>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 15729 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-21 10:30 ` kbuild test robot
0 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2016-09-21 10:30 UTC (permalink / raw)
Cc: kbuild-all, Tejun Heo, Christoph Lameter, Nicholas Piggin,
linux-kernel, linux-arch, linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 7000 bytes --]
Hi Nicholas,
[auto build test WARNING on asm-generic/master]
[also build test WARNING on v4.8-rc7 next-20160920]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/percpu-improve-generic-percpu-modify-return-implementation/20160921-170016
base: https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
config: i386-randconfig-s0-201638 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
All warnings (new ones prefixed by >>):
In file included from arch/x86/include/asm/percpu.h:551:0,
from arch/x86/include/asm/preempt.h:5,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from arch/x86/kernel/nmi.c:13:
arch/x86/kernel/nmi.c: In function 'do_nmi':
include/asm-generic/percpu.h:138:17: warning: unused variable '__p' [-Wunused-variable]
typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
^
include/asm-generic/percpu.h:378:41: note: in expansion of macro 'this_cpu_generic_add_return'
#define this_cpu_add_return_8(pcp, val) this_cpu_generic_add_return(pcp, val)
^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/percpu-defs.h:326:24: note: in expansion of macro 'this_cpu_add_return_8'
case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
^~~~
include/linux/percpu-defs.h:499:39: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
^~~~~~~~~~~~~~~~~~~~~~~~
include/linux/percpu-defs.h:511:34: note: in expansion of macro 'this_cpu_add_return'
#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1)
^~~~~~~~~~~~~~~~~~~
>> arch/x86/kernel/nmi.c:544:6: note: in expansion of macro 'this_cpu_dec_return'
if (this_cpu_dec_return(nmi_state))
^~~~~~~~~~~~~~~~~~~
--
mm/vmstat.c: In function 'refresh_cpu_vm_stats':
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^ ~~~~~~~~~~~~
In file included from arch/x86/include/asm/percpu.h:551:0,
from arch/x86/include/asm/preempt.h:5,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from include/linux/wait.h:8,
from include/linux/fs.h:5,
from mm/vmstat.c:12:
include/asm-generic/percpu.h:152:10: error: 'raw_cpu_generic_xchg' undeclared (first use in this function)
__ret = raw_cpu_generic_xchg(pcp); \
^
include/asm-generic/percpu.h:391:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:326:24: note: in expansion of macro 'this_cpu_xchg_8'
case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
^~~~
include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
include/asm-generic/percpu.h:152:10: note: each undeclared identifier is reported only once for each function it appears in
__ret = raw_cpu_generic_xchg(pcp); \
^
include/asm-generic/percpu.h:391:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:326:24: note: in expansion of macro 'this_cpu_xchg_8'
case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
^~~~
include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
vim +/this_cpu_dec_return +544 arch/x86/kernel/nmi.c
1d48922c Don Zickus 2011-09-30 528 inc_irq_stat(__nmi_count);
1d48922c Don Zickus 2011-09-30 529
1d48922c Don Zickus 2011-09-30 530 if (!ignore_nmis)
1d48922c Don Zickus 2011-09-30 531 default_do_nmi(regs);
1d48922c Don Zickus 2011-09-30 532
1d48922c Don Zickus 2011-09-30 533 nmi_exit();
228bdaa9 Steven Rostedt 2011-12-09 534
9d050416 Andy Lutomirski 2015-07-15 535 #ifdef CONFIG_X86_64
9d050416 Andy Lutomirski 2015-07-15 536 if (unlikely(this_cpu_read(update_debug_stack))) {
9d050416 Andy Lutomirski 2015-07-15 537 debug_stack_reset();
9d050416 Andy Lutomirski 2015-07-15 538 this_cpu_write(update_debug_stack, 0);
9d050416 Andy Lutomirski 2015-07-15 539 }
9d050416 Andy Lutomirski 2015-07-15 540 #endif
9d050416 Andy Lutomirski 2015-07-15 541
9d050416 Andy Lutomirski 2015-07-15 542 if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
9d050416 Andy Lutomirski 2015-07-15 543 write_cr2(this_cpu_read(nmi_cr2));
9d050416 Andy Lutomirski 2015-07-15 @544 if (this_cpu_dec_return(nmi_state))
9d050416 Andy Lutomirski 2015-07-15 545 goto nmi_restart;
1d48922c Don Zickus 2011-09-30 546 }
9326638c Masami Hiramatsu 2014-04-17 547 NOKPROBE_SYMBOL(do_nmi);
1d48922c Don Zickus 2011-09-30 548
1d48922c Don Zickus 2011-09-30 549 void stop_nmi(void)
1d48922c Don Zickus 2011-09-30 550 {
1d48922c Don Zickus 2011-09-30 551 ignore_nmis++;
1d48922c Don Zickus 2011-09-30 552 }
:::::: The code at line 544 was first introduced by commit
:::::: 9d05041679904b12c12421cbcf9cb5f4860a8d7b x86/nmi: Enable nested do_nmi() handling for 64-bit kernels
:::::: TO: Andy Lutomirski <luto@kernel.org>
:::::: CC: Ingo Molnar <mingo@kernel.org>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28216 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-21 10:30 ` kbuild test robot
0 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2016-09-21 10:30 UTC (permalink / raw)
To: Nicholas Piggin
Cc: kbuild-all, Tejun Heo, Christoph Lameter, linux-kernel,
linux-arch, linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 2873 bytes --]
Hi Nicholas,
[auto build test ERROR on asm-generic/master]
[also build test ERROR on v4.8-rc7 next-20160920]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/percpu-improve-generic-percpu-modify-return-implementation/20160921-170016
base: https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
config: tile-tilegx_defconfig (attached as .config)
compiler: tilegx-linux-gcc (GCC) 4.6.2
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=tile
All errors (new ones prefixed by >>):
mm/vmstat.c: In function 'refresh_cpu_vm_stats':
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
>> mm/vmstat.c:476:1: error: 'raw_cpu_generic_xchg' undeclared (first use in this function)
mm/vmstat.c:476:1: note: each undeclared identifier is reported only once for each function it appears in
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
vim +/raw_cpu_generic_xchg +476 mm/vmstat.c
ee99c71c KOSAKI Motohiro 2009-03-31 470 for_each_populated_zone(zone) {
fbc2edb0 Christoph Lameter 2013-09-11 471 struct per_cpu_pageset __percpu *p = zone->pageset;
2244b95a Christoph Lameter 2006-06-30 472
fbc2edb0 Christoph Lameter 2013-09-11 473 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
a7f75e25 Christoph Lameter 2008-02-04 474 int v;
a7f75e25 Christoph Lameter 2008-02-04 475
fbc2edb0 Christoph Lameter 2013-09-11 @476 v = this_cpu_xchg(p->vm_stat_diff[i], 0);
fbc2edb0 Christoph Lameter 2013-09-11 477 if (v) {
fbc2edb0 Christoph Lameter 2013-09-11 478
a7f75e25 Christoph Lameter 2008-02-04 479 atomic_long_add(v, &zone->vm_stat[i]);
:::::: The code at line 476 was first introduced by commit
:::::: fbc2edb05354480a88aa39db8a6acb5782fa1a1b vmstat: use this_cpu() to avoid irqon/off sequence in refresh_cpu_vm_stats
:::::: TO: Christoph Lameter <cl@linux.com>
:::::: CC: Linus Torvalds <torvalds@linux-foundation.org>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 15729 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-21 10:30 ` kbuild test robot
0 siblings, 0 replies; 24+ messages in thread
From: kbuild test robot @ 2016-09-21 10:30 UTC (permalink / raw)
To: Nicholas Piggin
Cc: kbuild-all, Tejun Heo, Christoph Lameter, linux-kernel,
linux-arch, linuxppc-dev
[-- Attachment #1: Type: text/plain, Size: 7000 bytes --]
Hi Nicholas,
[auto build test WARNING on asm-generic/master]
[also build test WARNING on v4.8-rc7 next-20160920]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]
url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/percpu-improve-generic-percpu-modify-return-implementation/20160921-170016
base: https://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git master
config: i386-randconfig-s0-201638 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
All warnings (new ones prefixed by >>):
In file included from arch/x86/include/asm/percpu.h:551:0,
from arch/x86/include/asm/preempt.h:5,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from arch/x86/kernel/nmi.c:13:
arch/x86/kernel/nmi.c: In function 'do_nmi':
include/asm-generic/percpu.h:138:17: warning: unused variable '__p' [-Wunused-variable]
typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
^
include/asm-generic/percpu.h:378:41: note: in expansion of macro 'this_cpu_generic_add_return'
#define this_cpu_add_return_8(pcp, val) this_cpu_generic_add_return(pcp, val)
^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/percpu-defs.h:326:24: note: in expansion of macro 'this_cpu_add_return_8'
case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
^~~~
include/linux/percpu-defs.h:499:39: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val)
^~~~~~~~~~~~~~~~~~~~~~~~
include/linux/percpu-defs.h:511:34: note: in expansion of macro 'this_cpu_add_return'
#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1)
^~~~~~~~~~~~~~~~~~~
>> arch/x86/kernel/nmi.c:544:6: note: in expansion of macro 'this_cpu_dec_return'
if (this_cpu_dec_return(nmi_state))
^~~~~~~~~~~~~~~~~~~
--
mm/vmstat.c: In function 'refresh_cpu_vm_stats':
mm/vmstat.c:476:1: error: macro "raw_cpu_generic_xchg" requires 2 arguments, but only 1 given
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^ ~~~~~~~~~~~~
In file included from arch/x86/include/asm/percpu.h:551:0,
from arch/x86/include/asm/preempt.h:5,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from include/linux/wait.h:8,
from include/linux/fs.h:5,
from mm/vmstat.c:12:
include/asm-generic/percpu.h:152:10: error: 'raw_cpu_generic_xchg' undeclared (first use in this function)
__ret = raw_cpu_generic_xchg(pcp); \
^
include/asm-generic/percpu.h:391:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:326:24: note: in expansion of macro 'this_cpu_xchg_8'
case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
^~~~
include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
include/asm-generic/percpu.h:152:10: note: each undeclared identifier is reported only once for each function it appears in
__ret = raw_cpu_generic_xchg(pcp); \
^
include/asm-generic/percpu.h:391:36: note: in expansion of macro 'this_cpu_generic_xchg'
#define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval)
^~~~~~~~~~~~~~~~~~~~~
>> include/linux/percpu-defs.h:326:24: note: in expansion of macro 'this_cpu_xchg_8'
case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \
^~~~
include/linux/percpu-defs.h:500:34: note: in expansion of macro '__pcpu_size_call_return2'
#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval)
^~~~~~~~~~~~~~~~~~~~~~~~
mm/vmstat.c:476:8: note: in expansion of macro 'this_cpu_xchg'
v = this_cpu_xchg(p->vm_stat_diff[i], 0);
^~~~~~~~~~~~~
vim +/this_cpu_dec_return +544 arch/x86/kernel/nmi.c
1d48922c Don Zickus 2011-09-30 528 inc_irq_stat(__nmi_count);
1d48922c Don Zickus 2011-09-30 529
1d48922c Don Zickus 2011-09-30 530 if (!ignore_nmis)
1d48922c Don Zickus 2011-09-30 531 default_do_nmi(regs);
1d48922c Don Zickus 2011-09-30 532
1d48922c Don Zickus 2011-09-30 533 nmi_exit();
228bdaa9 Steven Rostedt 2011-12-09 534
9d050416 Andy Lutomirski 2015-07-15 535 #ifdef CONFIG_X86_64
9d050416 Andy Lutomirski 2015-07-15 536 if (unlikely(this_cpu_read(update_debug_stack))) {
9d050416 Andy Lutomirski 2015-07-15 537 debug_stack_reset();
9d050416 Andy Lutomirski 2015-07-15 538 this_cpu_write(update_debug_stack, 0);
9d050416 Andy Lutomirski 2015-07-15 539 }
9d050416 Andy Lutomirski 2015-07-15 540 #endif
9d050416 Andy Lutomirski 2015-07-15 541
9d050416 Andy Lutomirski 2015-07-15 542 if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
9d050416 Andy Lutomirski 2015-07-15 543 write_cr2(this_cpu_read(nmi_cr2));
9d050416 Andy Lutomirski 2015-07-15 @544 if (this_cpu_dec_return(nmi_state))
9d050416 Andy Lutomirski 2015-07-15 545 goto nmi_restart;
1d48922c Don Zickus 2011-09-30 546 }
9326638c Masami Hiramatsu 2014-04-17 547 NOKPROBE_SYMBOL(do_nmi);
1d48922c Don Zickus 2011-09-30 548
1d48922c Don Zickus 2011-09-30 549 void stop_nmi(void)
1d48922c Don Zickus 2011-09-30 550 {
1d48922c Don Zickus 2011-09-30 551 ignore_nmis++;
1d48922c Don Zickus 2011-09-30 552 }
:::::: The code at line 544 was first introduced by commit
:::::: 9d05041679904b12c12421cbcf9cb5f4860a8d7b x86/nmi: Enable nested do_nmi() handling for 64-bit kernels
:::::: TO: Andy Lutomirski <luto@kernel.org>
:::::: CC: Ingo Molnar <mingo@kernel.org>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 28216 bytes --]
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
2016-09-21 8:51 [PATCH] percpu: improve generic percpu modify-return implementation Nicholas Piggin
2016-09-21 10:25 ` kbuild test robot
@ 2016-09-21 10:57 ` Nicholas Piggin
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:57 ` Nicholas Piggin
3 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-21 10:57 UTC (permalink / raw)
To: Tejun Heo, Christoph Lameter; +Cc: linux-kernel, linux-arch, linuxppc-dev
On Wed, 21 Sep 2016 18:51:37 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:
> Some architectures require an additional load to find the address of
> percpu pointers. In some implemenatations, the C aliasing rules do not
> allow the result of that load to be kept over the store that modifies
> the percpu variable, which causes additional loads.
Sorry I picked up an old patch here. This one should be better.
>From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Sep 2016 18:23:43 +1000
Subject: [PATCH] percpu: improve generic percpu modify-return implementations
Some architectures require an additional load to find the address of
percpu pointers. In some implemenatations, the C aliasing rules do not
allow the result of that load to be kept over the store that modifies
the percpu variable, which causes additional loads.
Work around this by finding the pointer first, then operating on that.
It's also possible to mark things as restrict and those kind of games,
but that can require larger and arch specific changes.
On powerpc, __this_cpu_inc_return compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
ld 9,48(13)
ldx 3,9,3
With this patch it compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
include/asm-generic/percpu.h | 53 +++++++++++++++++++++++++-------------------
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 4d9f233..40e8870 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
#define PER_CPU_DEF_ATTRIBUTES
#endif
+#define raw_cpu_generic_read(pcp) \
+({ \
+ *raw_cpu_ptr(&(pcp)); \
+})
+
#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
*raw_cpu_ptr(&(pcp)) op val; \
@@ -72,34 +77,39 @@ do { \
#define raw_cpu_generic_add_return(pcp, val) \
({ \
- raw_cpu_add(pcp, val); \
- raw_cpu_read(pcp); \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
+ \
+ *__p += val; \
+ *__p; \
})
#define raw_cpu_generic_xchg(pcp, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = *__p; \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
+ __ret = *__p; \
if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
+ typeof(&(pcp1)) __p1 = raw_cpu_ptr(&(pcp1)); \
+ typeof(&(pcp2)) __p2 = raw_cpu_ptr(&(pcp2)); \
int __ret = 0; \
- if (raw_cpu_read(pcp1) == (oval1) && \
- raw_cpu_read(pcp2) == (oval2)) { \
- raw_cpu_write(pcp1, nval1); \
- raw_cpu_write(pcp2, nval2); \
+ if (*__p1 == (oval1) && *__p2 == (oval2)) { \
+ *__p1 = nval1; \
+ *__p2 = nval2; \
__ret = 1; \
} \
(__ret); \
@@ -109,7 +119,7 @@ do { \
({ \
typeof(pcp) __ret; \
preempt_disable(); \
- __ret = *this_cpu_ptr(&(pcp)); \
+ __ret = raw_cpu_generic_read(pcp); \
preempt_enable(); \
__ret; \
})
@@ -118,17 +128,17 @@ do { \
do { \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- *raw_cpu_ptr(&(pcp)) op val; \
+ raw_cpu_generic_to_op(pcp, val, op); \
raw_local_irq_restore(__flags); \
} while (0)
+
#define this_cpu_generic_add_return(pcp, val) \
({ \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- raw_cpu_add(pcp, val); \
- __ret = raw_cpu_read(pcp); \
+ __ret = raw_cpu_generic_add_return(pcp, val); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -138,8 +148,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_xchg(pcp, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -149,9 +158,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -168,16 +175,16 @@ do { \
})
#ifndef raw_cpu_read_1
-#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_2
-#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_4
-#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_8
-#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_write_1
--
2.9.3
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-21 10:57 ` Nicholas Piggin
0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-21 10:57 UTC (permalink / raw)
To: Tejun Heo, Christoph Lameter; +Cc: linux-kernel, linux-arch, linuxppc-dev
On Wed, 21 Sep 2016 18:51:37 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:
> Some architectures require an additional load to find the address of
> percpu pointers. In some implemenatations, the C aliasing rules do not
> allow the result of that load to be kept over the store that modifies
> the percpu variable, which causes additional loads.
Sorry I picked up an old patch here. This one should be better.
From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Sep 2016 18:23:43 +1000
Subject: [PATCH] percpu: improve generic percpu modify-return implementations
Some architectures require an additional load to find the address of
percpu pointers. In some implemenatations, the C aliasing rules do not
allow the result of that load to be kept over the store that modifies
the percpu variable, which causes additional loads.
Work around this by finding the pointer first, then operating on that.
It's also possible to mark things as restrict and those kind of games,
but that can require larger and arch specific changes.
On powerpc, __this_cpu_inc_return compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
ld 9,48(13)
ldx 3,9,3
With this patch it compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
include/asm-generic/percpu.h | 53 +++++++++++++++++++++++++-------------------
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 4d9f233..40e8870 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
#define PER_CPU_DEF_ATTRIBUTES
#endif
+#define raw_cpu_generic_read(pcp) \
+({ \
+ *raw_cpu_ptr(&(pcp)); \
+})
+
#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
*raw_cpu_ptr(&(pcp)) op val; \
@@ -72,34 +77,39 @@ do { \
#define raw_cpu_generic_add_return(pcp, val) \
({ \
- raw_cpu_add(pcp, val); \
- raw_cpu_read(pcp); \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
+ \
+ *__p += val; \
+ *__p; \
})
#define raw_cpu_generic_xchg(pcp, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = *__p; \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
+ __ret = *__p; \
if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
+ typeof(&(pcp1)) __p1 = raw_cpu_ptr(&(pcp1)); \
+ typeof(&(pcp2)) __p2 = raw_cpu_ptr(&(pcp2)); \
int __ret = 0; \
- if (raw_cpu_read(pcp1) == (oval1) && \
- raw_cpu_read(pcp2) == (oval2)) { \
- raw_cpu_write(pcp1, nval1); \
- raw_cpu_write(pcp2, nval2); \
+ if (*__p1 == (oval1) && *__p2 == (oval2)) { \
+ *__p1 = nval1; \
+ *__p2 = nval2; \
__ret = 1; \
} \
(__ret); \
@@ -109,7 +119,7 @@ do { \
({ \
typeof(pcp) __ret; \
preempt_disable(); \
- __ret = *this_cpu_ptr(&(pcp)); \
+ __ret = raw_cpu_generic_read(pcp); \
preempt_enable(); \
__ret; \
})
@@ -118,17 +128,17 @@ do { \
do { \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- *raw_cpu_ptr(&(pcp)) op val; \
+ raw_cpu_generic_to_op(pcp, val, op); \
raw_local_irq_restore(__flags); \
} while (0)
+
#define this_cpu_generic_add_return(pcp, val) \
({ \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- raw_cpu_add(pcp, val); \
- __ret = raw_cpu_read(pcp); \
+ __ret = raw_cpu_generic_add_return(pcp, val); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -138,8 +148,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_xchg(pcp, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -149,9 +158,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -168,16 +175,16 @@ do { \
})
#ifndef raw_cpu_read_1
-#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_2
-#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_4
-#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_8
-#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_write_1
--
2.9.3
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-21 10:57 ` Nicholas Piggin
0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-21 10:57 UTC (permalink / raw)
To: Tejun Heo, Christoph Lameter; +Cc: linux-kernel, linux-arch, linuxppc-dev
On Wed, 21 Sep 2016 18:51:37 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:
> Some architectures require an additional load to find the address of
> percpu pointers. In some implemenatations, the C aliasing rules do not
> allow the result of that load to be kept over the store that modifies
> the percpu variable, which causes additional loads.
Sorry I picked up an old patch here. This one should be better.
=46rom d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Sep 2016 18:23:43 +1000
Subject: [PATCH] percpu: improve generic percpu modify-return implementatio=
ns
Some architectures require an additional load to find the address of
percpu pointers. In some implemenatations, the C aliasing rules do not
allow the result of that load to be kept over the store that modifies
the percpu variable, which causes additional loads.
Work around this by finding the pointer first, then operating on that.
It's also possible to mark things as restrict and those kind of games,
but that can require larger and arch specific changes.
On powerpc, __this_cpu_inc_return compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
ld 9,48(13)
ldx 3,9,3
With this patch it compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
include/asm-generic/percpu.h | 53 +++++++++++++++++++++++++---------------=
----
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 4d9f233..40e8870 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
#define PER_CPU_DEF_ATTRIBUTES
#endif
=20
+#define raw_cpu_generic_read(pcp) \
+({ \
+ *raw_cpu_ptr(&(pcp)); \
+})
+
#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
*raw_cpu_ptr(&(pcp)) op val; \
@@ -72,34 +77,39 @@ do { \
=20
#define raw_cpu_generic_add_return(pcp, val) \
({ \
- raw_cpu_add(pcp, val); \
- raw_cpu_read(pcp); \
+ typeof(&(pcp)) __p =3D raw_cpu_ptr(&(pcp)); \
+ \
+ *__p +=3D val; \
+ *__p; \
})
=20
#define raw_cpu_generic_xchg(pcp, nval) \
({ \
+ typeof(&(pcp)) __p =3D raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret =3D raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret =3D *__p; \
+ *__p =3D nval; \
__ret; \
})
=20
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
+ typeof(&(pcp)) __p =3D raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret =3D raw_cpu_read(pcp); \
+ __ret =3D *__p; \
if (__ret =3D=3D (oval)) \
- raw_cpu_write(pcp, nval); \
+ *__p =3D nval; \
__ret; \
})
=20
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nv=
al2) \
({ \
+ typeof(&(pcp1)) __p1 =3D raw_cpu_ptr(&(pcp1)); \
+ typeof(&(pcp2)) __p2 =3D raw_cpu_ptr(&(pcp2)); \
int __ret =3D 0; \
- if (raw_cpu_read(pcp1) =3D=3D (oval1) && \
- raw_cpu_read(pcp2) =3D=3D (oval2)) { \
- raw_cpu_write(pcp1, nval1); \
- raw_cpu_write(pcp2, nval2); \
+ if (*__p1 =3D=3D (oval1) && *__p2 =3D=3D (oval2)) { \
+ *__p1 =3D nval1; \
+ *__p2 =3D nval2; \
__ret =3D 1; \
} \
(__ret); \
@@ -109,7 +119,7 @@ do { \
({ \
typeof(pcp) __ret; \
preempt_disable(); \
- __ret =3D *this_cpu_ptr(&(pcp)); \
+ __ret =3D raw_cpu_generic_read(pcp); \
preempt_enable(); \
__ret; \
})
@@ -118,17 +128,17 @@ do { \
do { \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- *raw_cpu_ptr(&(pcp)) op val; \
+ raw_cpu_generic_to_op(pcp, val, op); \
raw_local_irq_restore(__flags); \
} while (0)
=20
+
#define this_cpu_generic_add_return(pcp, val) \
({ \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- raw_cpu_add(pcp, val); \
- __ret =3D raw_cpu_read(pcp); \
+ __ret =3D raw_cpu_generic_add_return(pcp, val); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -138,8 +148,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret =3D raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret =3D raw_cpu_generic_xchg(pcp, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -149,9 +158,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret =3D raw_cpu_read(pcp); \
- if (__ret =3D=3D (oval)) \
- raw_cpu_write(pcp, nval); \
+ __ret =3D raw_cpu_generic_cmpxchg(pcp, oval, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -168,16 +175,16 @@ do { \
})
=20
#ifndef raw_cpu_read_1
-#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_2
-#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_4
-#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_8
-#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp)
#endif
=20
#ifndef raw_cpu_write_1
--=20
2.9.3
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
2016-09-21 10:57 ` Nicholas Piggin
(?)
(?)
@ 2016-09-21 14:23 ` Tejun Heo
2016-09-21 20:16 ` Christoph Lameter
2016-09-22 4:35 ` Nicholas Piggin
-1 siblings, 2 replies; 24+ messages in thread
From: Tejun Heo @ 2016-09-21 14:23 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: Christoph Lameter, linux-kernel, linux-arch, linuxppc-dev
Hello, Nick.
How have you been? :)
On Wed, Sep 21, 2016 at 08:57:11PM +1000, Nicholas Piggin wrote:
> On Wed, 21 Sep 2016 18:51:37 +1000
> Nicholas Piggin <npiggin@gmail.com> wrote:
>
> > Some architectures require an additional load to find the address of
> > percpu pointers. In some implemenatations, the C aliasing rules do not
> > allow the result of that load to be kept over the store that modifies
> > the percpu variable, which causes additional loads.
>
> Sorry I picked up an old patch here. This one should be better.
>
> From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
> From: Nicholas Piggin <npiggin@gmail.com>
> Date: Wed, 21 Sep 2016 18:23:43 +1000
> Subject: [PATCH] percpu: improve generic percpu modify-return implementations
>
> Some architectures require an additional load to find the address of
> percpu pointers. In some implemenatations, the C aliasing rules do not
> allow the result of that load to be kept over the store that modifies
> the percpu variable, which causes additional loads.
>
> Work around this by finding the pointer first, then operating on that.
>
> It's also possible to mark things as restrict and those kind of games,
> but that can require larger and arch specific changes.
>
> On powerpc, __this_cpu_inc_return compiles to:
>
> ld 10,48(13)
> ldx 9,3,10
> addi 9,9,1
> stdx 9,3,10
> ld 9,48(13)
> ldx 3,9,3
>
> With this patch it compiles to:
>
> ld 10,48(13)
> ldx 9,3,10
> addi 9,9,1
> stdx 9,3,10
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Patch looks good to me but seems QP encoded. Can you please resend?
Thanks and it's great to see you again!
--
tejun
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
2016-09-21 14:23 ` Tejun Heo
@ 2016-09-21 20:16 ` Christoph Lameter
2016-09-22 4:35 ` Nicholas Piggin
1 sibling, 0 replies; 24+ messages in thread
From: Christoph Lameter @ 2016-09-21 20:16 UTC (permalink / raw)
To: Tejun Heo
Cc: Nicholas Piggin, linux-kernel, linux-arch, linux-mm, linuxppc-dev
On Wed, 21 Sep 2016, Tejun Heo wrote:
> Hello, Nick.
>
> How have you been? :)
>
He is baack. Are we getting SL!B? ;-)
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-21 20:16 ` Christoph Lameter
0 siblings, 0 replies; 24+ messages in thread
From: Christoph Lameter @ 2016-09-21 20:16 UTC (permalink / raw)
To: Tejun Heo
Cc: Nicholas Piggin, linux-kernel, linux-arch, linux-mm, linuxppc-dev
On Wed, 21 Sep 2016, Tejun Heo wrote:
> Hello, Nick.
>
> How have you been? :)
>
He is baack. Are we getting SL!B? ;-)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
2016-09-21 14:23 ` Tejun Heo
2016-09-21 20:16 ` Christoph Lameter
2016-09-22 4:35 ` Nicholas Piggin
@ 2016-09-22 4:35 ` Nicholas Piggin
1 sibling, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-22 4:35 UTC (permalink / raw)
To: Tejun Heo; +Cc: Christoph Lameter, linux-kernel, linux-arch, linuxppc-dev
On Wed, 21 Sep 2016 10:23:43 -0400
Tejun Heo <tj@kernel.org> wrote:
> Hello, Nick.
>
> How have you been? :)
Hey Tejun,
Well thank you, how about you?
> On Wed, Sep 21, 2016 at 08:57:11PM +1000, Nicholas Piggin wrote:
> > On Wed, 21 Sep 2016 18:51:37 +1000
> > Nicholas Piggin <npiggin@gmail.com> wrote:
> >
> > > Some architectures require an additional load to find the address of
> > > percpu pointers. In some implemenatations, the C aliasing rules do not
> > > allow the result of that load to be kept over the store that modifies
> > > the percpu variable, which causes additional loads.
> >
> > Sorry I picked up an old patch here. This one should be better.
> >
> > From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
> > From: Nicholas Piggin <npiggin@gmail.com>
> > Date: Wed, 21 Sep 2016 18:23:43 +1000
> > Subject: [PATCH] percpu: improve generic percpu modify-return implementations
> >
> > Some architectures require an additional load to find the address of
> > percpu pointers. In some implemenatations, the C aliasing rules do not
> > allow the result of that load to be kept over the store that modifies
> > the percpu variable, which causes additional loads.
> >
> > Work around this by finding the pointer first, then operating on that.
> >
> > It's also possible to mark things as restrict and those kind of games,
> > but that can require larger and arch specific changes.
> >
> > On powerpc, __this_cpu_inc_return compiles to:
> >
> > ld 10,48(13)
> > ldx 9,3,10
> > addi 9,9,1
> > stdx 9,3,10
> > ld 9,48(13)
> > ldx 3,9,3
> >
> > With this patch it compiles to:
> >
> > ld 10,48(13)
> > ldx 9,3,10
> > addi 9,9,1
> > stdx 9,3,10
> >
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>
> Patch looks good to me but seems QP encoded. Can you please resend?
>
> Thanks and it's great to see you again!
>
Trying a new mail client, sorry. It *seems* to be working now, how's
this?
>From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Sep 2016 18:23:43 +1000
Subject: [PATCH] percpu: improve generic percpu modify-return implementations
Some architectures require an additional load to find the address of
percpu pointers. In some implemenatations, the C aliasing rules do not
allow the result of that load to be kept over the store that modifies
the percpu variable, which causes additional loads.
Work around this by finding the pointer first, then operating on that.
It's also possible to mark things as restrict and those kind of games,
but that can require larger and arch specific changes.
On powerpc, __this_cpu_inc_return compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
ld 9,48(13)
ldx 3,9,3
With this patch it compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
To: Tejun Heo <tj@kernel.org>
To: Christoph Lameter <cl@linux.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org
---
include/asm-generic/percpu.h | 53 +++++++++++++++++++++++++-------------------
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 4d9f233..40e8870 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
#define PER_CPU_DEF_ATTRIBUTES
#endif
+#define raw_cpu_generic_read(pcp) \
+({ \
+ *raw_cpu_ptr(&(pcp)); \
+})
+
#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
*raw_cpu_ptr(&(pcp)) op val; \
@@ -72,34 +77,39 @@ do { \
#define raw_cpu_generic_add_return(pcp, val) \
({ \
- raw_cpu_add(pcp, val); \
- raw_cpu_read(pcp); \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
+ \
+ *__p += val; \
+ *__p; \
})
#define raw_cpu_generic_xchg(pcp, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = *__p; \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
+ __ret = *__p; \
if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
+ typeof(&(pcp1)) __p1 = raw_cpu_ptr(&(pcp1)); \
+ typeof(&(pcp2)) __p2 = raw_cpu_ptr(&(pcp2)); \
int __ret = 0; \
- if (raw_cpu_read(pcp1) == (oval1) && \
- raw_cpu_read(pcp2) == (oval2)) { \
- raw_cpu_write(pcp1, nval1); \
- raw_cpu_write(pcp2, nval2); \
+ if (*__p1 == (oval1) && *__p2 == (oval2)) { \
+ *__p1 = nval1; \
+ *__p2 = nval2; \
__ret = 1; \
} \
(__ret); \
@@ -109,7 +119,7 @@ do { \
({ \
typeof(pcp) __ret; \
preempt_disable(); \
- __ret = *this_cpu_ptr(&(pcp)); \
+ __ret = raw_cpu_generic_read(pcp); \
preempt_enable(); \
__ret; \
})
@@ -118,17 +128,17 @@ do { \
do { \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- *raw_cpu_ptr(&(pcp)) op val; \
+ raw_cpu_generic_to_op(pcp, val, op); \
raw_local_irq_restore(__flags); \
} while (0)
+
#define this_cpu_generic_add_return(pcp, val) \
({ \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- raw_cpu_add(pcp, val); \
- __ret = raw_cpu_read(pcp); \
+ __ret = raw_cpu_generic_add_return(pcp, val); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -138,8 +148,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_xchg(pcp, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -149,9 +158,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -168,16 +175,16 @@ do { \
})
#ifndef raw_cpu_read_1
-#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_2
-#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_4
-#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_8
-#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_write_1
--
2.9.3
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-22 4:35 ` Nicholas Piggin
0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-22 4:35 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-arch, Christoph Lameter, linuxppc-dev, linux-kernel
On Wed, 21 Sep 2016 10:23:43 -0400
Tejun Heo <tj@kernel.org> wrote:
> Hello, Nick.
>
> How have you been? :)
Hey Tejun,
Well thank you, how about you?
> On Wed, Sep 21, 2016 at 08:57:11PM +1000, Nicholas Piggin wrote:
> > On Wed, 21 Sep 2016 18:51:37 +1000
> > Nicholas Piggin <npiggin@gmail.com> wrote:
> >
> > > Some architectures require an additional load to find the address of
> > > percpu pointers. In some implemenatations, the C aliasing rules do not
> > > allow the result of that load to be kept over the store that modifies
> > > the percpu variable, which causes additional loads.
> >
> > Sorry I picked up an old patch here. This one should be better.
> >
> > From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
> > From: Nicholas Piggin <npiggin@gmail.com>
> > Date: Wed, 21 Sep 2016 18:23:43 +1000
> > Subject: [PATCH] percpu: improve generic percpu modify-return implementations
> >
> > Some architectures require an additional load to find the address of
> > percpu pointers. In some implemenatations, the C aliasing rules do not
> > allow the result of that load to be kept over the store that modifies
> > the percpu variable, which causes additional loads.
> >
> > Work around this by finding the pointer first, then operating on that.
> >
> > It's also possible to mark things as restrict and those kind of games,
> > but that can require larger and arch specific changes.
> >
> > On powerpc, __this_cpu_inc_return compiles to:
> >
> > ld 10,48(13)
> > ldx 9,3,10
> > addi 9,9,1
> > stdx 9,3,10
> > ld 9,48(13)
> > ldx 3,9,3
> >
> > With this patch it compiles to:
> >
> > ld 10,48(13)
> > ldx 9,3,10
> > addi 9,9,1
> > stdx 9,3,10
> >
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>
> Patch looks good to me but seems QP encoded. Can you please resend?
>
> Thanks and it's great to see you again!
>
Trying a new mail client, sorry. It *seems* to be working now, how's
this?
From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Sep 2016 18:23:43 +1000
Subject: [PATCH] percpu: improve generic percpu modify-return implementations
Some architectures require an additional load to find the address of
percpu pointers. In some implemenatations, the C aliasing rules do not
allow the result of that load to be kept over the store that modifies
the percpu variable, which causes additional loads.
Work around this by finding the pointer first, then operating on that.
It's also possible to mark things as restrict and those kind of games,
but that can require larger and arch specific changes.
On powerpc, __this_cpu_inc_return compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
ld 9,48(13)
ldx 3,9,3
With this patch it compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
To: Tejun Heo <tj@kernel.org>
To: Christoph Lameter <cl@linux.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org
---
include/asm-generic/percpu.h | 53 +++++++++++++++++++++++++-------------------
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 4d9f233..40e8870 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
#define PER_CPU_DEF_ATTRIBUTES
#endif
+#define raw_cpu_generic_read(pcp) \
+({ \
+ *raw_cpu_ptr(&(pcp)); \
+})
+
#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
*raw_cpu_ptr(&(pcp)) op val; \
@@ -72,34 +77,39 @@ do { \
#define raw_cpu_generic_add_return(pcp, val) \
({ \
- raw_cpu_add(pcp, val); \
- raw_cpu_read(pcp); \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
+ \
+ *__p += val; \
+ *__p; \
})
#define raw_cpu_generic_xchg(pcp, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = *__p; \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
+ __ret = *__p; \
if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
+ typeof(&(pcp1)) __p1 = raw_cpu_ptr(&(pcp1)); \
+ typeof(&(pcp2)) __p2 = raw_cpu_ptr(&(pcp2)); \
int __ret = 0; \
- if (raw_cpu_read(pcp1) == (oval1) && \
- raw_cpu_read(pcp2) == (oval2)) { \
- raw_cpu_write(pcp1, nval1); \
- raw_cpu_write(pcp2, nval2); \
+ if (*__p1 == (oval1) && *__p2 == (oval2)) { \
+ *__p1 = nval1; \
+ *__p2 = nval2; \
__ret = 1; \
} \
(__ret); \
@@ -109,7 +119,7 @@ do { \
({ \
typeof(pcp) __ret; \
preempt_disable(); \
- __ret = *this_cpu_ptr(&(pcp)); \
+ __ret = raw_cpu_generic_read(pcp); \
preempt_enable(); \
__ret; \
})
@@ -118,17 +128,17 @@ do { \
do { \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- *raw_cpu_ptr(&(pcp)) op val; \
+ raw_cpu_generic_to_op(pcp, val, op); \
raw_local_irq_restore(__flags); \
} while (0)
+
#define this_cpu_generic_add_return(pcp, val) \
({ \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- raw_cpu_add(pcp, val); \
- __ret = raw_cpu_read(pcp); \
+ __ret = raw_cpu_generic_add_return(pcp, val); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -138,8 +148,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_xchg(pcp, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -149,9 +158,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -168,16 +175,16 @@ do { \
})
#ifndef raw_cpu_read_1
-#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_2
-#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_4
-#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_8
-#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_write_1
--
2.9.3
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-22 4:35 ` Nicholas Piggin
0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-22 4:35 UTC (permalink / raw)
To: Tejun Heo; +Cc: Christoph Lameter, linux-kernel, linux-arch, linuxppc-dev
On Wed, 21 Sep 2016 10:23:43 -0400
Tejun Heo <tj@kernel.org> wrote:
> Hello, Nick.
>
> How have you been? :)
Hey Tejun,
Well thank you, how about you?
> On Wed, Sep 21, 2016 at 08:57:11PM +1000, Nicholas Piggin wrote:
> > On Wed, 21 Sep 2016 18:51:37 +1000
> > Nicholas Piggin <npiggin@gmail.com> wrote:
> >
> > > Some architectures require an additional load to find the address of
> > > percpu pointers. In some implemenatations, the C aliasing rules do not
> > > allow the result of that load to be kept over the store that modifies
> > > the percpu variable, which causes additional loads.
> >
> > Sorry I picked up an old patch here. This one should be better.
> >
> > From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
> > From: Nicholas Piggin <npiggin@gmail.com>
> > Date: Wed, 21 Sep 2016 18:23:43 +1000
> > Subject: [PATCH] percpu: improve generic percpu modify-return implementations
> >
> > Some architectures require an additional load to find the address of
> > percpu pointers. In some implemenatations, the C aliasing rules do not
> > allow the result of that load to be kept over the store that modifies
> > the percpu variable, which causes additional loads.
> >
> > Work around this by finding the pointer first, then operating on that.
> >
> > It's also possible to mark things as restrict and those kind of games,
> > but that can require larger and arch specific changes.
> >
> > On powerpc, __this_cpu_inc_return compiles to:
> >
> > ld 10,48(13)
> > ldx 9,3,10
> > addi 9,9,1
> > stdx 9,3,10
> > ld 9,48(13)
> > ldx 3,9,3
> >
> > With this patch it compiles to:
> >
> > ld 10,48(13)
> > ldx 9,3,10
> > addi 9,9,1
> > stdx 9,3,10
> >
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>
> Patch looks good to me but seems QP encoded. Can you please resend?
>
> Thanks and it's great to see you again!
>
Trying a new mail client, sorry. It *seems* to be working now, how's
this?
From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Sep 2016 18:23:43 +1000
Subject: [PATCH] percpu: improve generic percpu modify-return implementations
Some architectures require an additional load to find the address of
percpu pointers. In some implemenatations, the C aliasing rules do not
allow the result of that load to be kept over the store that modifies
the percpu variable, which causes additional loads.
Work around this by finding the pointer first, then operating on that.
It's also possible to mark things as restrict and those kind of games,
but that can require larger and arch specific changes.
On powerpc, __this_cpu_inc_return compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
ld 9,48(13)
ldx 3,9,3
With this patch it compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
To: Tejun Heo <tj@kernel.org>
To: Christoph Lameter <cl@linux.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org
---
include/asm-generic/percpu.h | 53 +++++++++++++++++++++++++-------------------
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 4d9f233..40e8870 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
#define PER_CPU_DEF_ATTRIBUTES
#endif
+#define raw_cpu_generic_read(pcp) \
+({ \
+ *raw_cpu_ptr(&(pcp)); \
+})
+
#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
*raw_cpu_ptr(&(pcp)) op val; \
@@ -72,34 +77,39 @@ do { \
#define raw_cpu_generic_add_return(pcp, val) \
({ \
- raw_cpu_add(pcp, val); \
- raw_cpu_read(pcp); \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
+ \
+ *__p += val; \
+ *__p; \
})
#define raw_cpu_generic_xchg(pcp, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = *__p; \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
+ __ret = *__p; \
if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
+ typeof(&(pcp1)) __p1 = raw_cpu_ptr(&(pcp1)); \
+ typeof(&(pcp2)) __p2 = raw_cpu_ptr(&(pcp2)); \
int __ret = 0; \
- if (raw_cpu_read(pcp1) == (oval1) && \
- raw_cpu_read(pcp2) == (oval2)) { \
- raw_cpu_write(pcp1, nval1); \
- raw_cpu_write(pcp2, nval2); \
+ if (*__p1 == (oval1) && *__p2 == (oval2)) { \
+ *__p1 = nval1; \
+ *__p2 = nval2; \
__ret = 1; \
} \
(__ret); \
@@ -109,7 +119,7 @@ do { \
({ \
typeof(pcp) __ret; \
preempt_disable(); \
- __ret = *this_cpu_ptr(&(pcp)); \
+ __ret = raw_cpu_generic_read(pcp); \
preempt_enable(); \
__ret; \
})
@@ -118,17 +128,17 @@ do { \
do { \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- *raw_cpu_ptr(&(pcp)) op val; \
+ raw_cpu_generic_to_op(pcp, val, op); \
raw_local_irq_restore(__flags); \
} while (0)
+
#define this_cpu_generic_add_return(pcp, val) \
({ \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- raw_cpu_add(pcp, val); \
- __ret = raw_cpu_read(pcp); \
+ __ret = raw_cpu_generic_add_return(pcp, val); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -138,8 +148,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_xchg(pcp, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -149,9 +158,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -168,16 +175,16 @@ do { \
})
#ifndef raw_cpu_read_1
-#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_2
-#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_4
-#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_8
-#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_write_1
--
2.9.3
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-22 4:35 ` Nicholas Piggin
0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-22 4:35 UTC (permalink / raw)
To: Tejun Heo; +Cc: Christoph Lameter, linux-kernel, linux-arch, linuxppc-dev
On Wed, 21 Sep 2016 10:23:43 -0400
Tejun Heo <tj@kernel.org> wrote:
> Hello, Nick.
>=20
> How have you been? :)
Hey Tejun,
Well thank you, how about you?
=20
> On Wed, Sep 21, 2016 at 08:57:11PM +1000, Nicholas Piggin wrote:
> > On Wed, 21 Sep 2016 18:51:37 +1000
> > Nicholas Piggin <npiggin@gmail.com> wrote:
> > =20
> > > Some architectures require an additional load to find the address of
> > > percpu pointers. In some implemenatations, the C aliasing rules do not
> > > allow the result of that load to be kept over the store that modifies
> > > the percpu variable, which causes additional loads. =20
> >=20
> > Sorry I picked up an old patch here. This one should be better.
> >=20
> > From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
> > From: Nicholas Piggin <npiggin@gmail.com>
> > Date: Wed, 21 Sep 2016 18:23:43 +1000
> > Subject: [PATCH] percpu: improve generic percpu modify-return implement=
ations
> >=20
> > Some architectures require an additional load to find the address of
> > percpu pointers. In some implemenatations, the C aliasing rules do not
> > allow the result of that load to be kept over the store that modifies
> > the percpu variable, which causes additional loads.
> >=20
> > Work around this by finding the pointer first, then operating on that.
> >=20
> > It's also possible to mark things as restrict and those kind of games,
> > but that can require larger and arch specific changes.
> >=20
> > On powerpc, __this_cpu_inc_return compiles to:
> >=20
> > ld 10,48(13)
> > ldx 9,3,10
> > addi 9,9,1
> > stdx 9,3,10
> > ld 9,48(13)
> > ldx 3,9,3
> >=20
> > With this patch it compiles to:
> >=20
> > ld 10,48(13)
> > ldx 9,3,10
> > addi 9,9,1
> > stdx 9,3,10
> >=20
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> =20
>=20
> Patch looks good to me but seems QP encoded. Can you please resend?
>=20
> Thanks and it's great to see you again!
>=20
Trying a new mail client, sorry. It *seems* to be working now, how's
this?
=46rom d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Sep 2016 18:23:43 +1000
Subject: [PATCH] percpu: improve generic percpu modify-return implementatio=
ns
Some architectures require an additional load to find the address of
percpu pointers. In some implemenatations, the C aliasing rules do not
allow the result of that load to be kept over the store that modifies
the percpu variable, which causes additional loads.
Work around this by finding the pointer first, then operating on that.
It's also possible to mark things as restrict and those kind of games,
but that can require larger and arch specific changes.
On powerpc, __this_cpu_inc_return compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
ld 9,48(13)
ldx 3,9,3
With this patch it compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
To: Tejun Heo <tj@kernel.org>
To: Christoph Lameter <cl@linux.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org
---
include/asm-generic/percpu.h | 53 +++++++++++++++++++++++++---------------=
----
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 4d9f233..40e8870 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
#define PER_CPU_DEF_ATTRIBUTES
#endif
=20
+#define raw_cpu_generic_read(pcp) \
+({ \
+ *raw_cpu_ptr(&(pcp)); \
+})
+
#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
*raw_cpu_ptr(&(pcp)) op val; \
@@ -72,34 +77,39 @@ do { \
=20
#define raw_cpu_generic_add_return(pcp, val) \
({ \
- raw_cpu_add(pcp, val); \
- raw_cpu_read(pcp); \
+ typeof(&(pcp)) __p =3D raw_cpu_ptr(&(pcp)); \
+ \
+ *__p +=3D val; \
+ *__p; \
})
=20
#define raw_cpu_generic_xchg(pcp, nval) \
({ \
+ typeof(&(pcp)) __p =3D raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret =3D raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret =3D *__p; \
+ *__p =3D nval; \
__ret; \
})
=20
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
+ typeof(&(pcp)) __p =3D raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret =3D raw_cpu_read(pcp); \
+ __ret =3D *__p; \
if (__ret =3D=3D (oval)) \
- raw_cpu_write(pcp, nval); \
+ *__p =3D nval; \
__ret; \
})
=20
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nv=
al2) \
({ \
+ typeof(&(pcp1)) __p1 =3D raw_cpu_ptr(&(pcp1)); \
+ typeof(&(pcp2)) __p2 =3D raw_cpu_ptr(&(pcp2)); \
int __ret =3D 0; \
- if (raw_cpu_read(pcp1) =3D=3D (oval1) && \
- raw_cpu_read(pcp2) =3D=3D (oval2)) { \
- raw_cpu_write(pcp1, nval1); \
- raw_cpu_write(pcp2, nval2); \
+ if (*__p1 =3D=3D (oval1) && *__p2 =3D=3D (oval2)) { \
+ *__p1 =3D nval1; \
+ *__p2 =3D nval2; \
__ret =3D 1; \
} \
(__ret); \
@@ -109,7 +119,7 @@ do { \
({ \
typeof(pcp) __ret; \
preempt_disable(); \
- __ret =3D *this_cpu_ptr(&(pcp)); \
+ __ret =3D raw_cpu_generic_read(pcp); \
preempt_enable(); \
__ret; \
})
@@ -118,17 +128,17 @@ do { \
do { \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- *raw_cpu_ptr(&(pcp)) op val; \
+ raw_cpu_generic_to_op(pcp, val, op); \
raw_local_irq_restore(__flags); \
} while (0)
=20
+
#define this_cpu_generic_add_return(pcp, val) \
({ \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- raw_cpu_add(pcp, val); \
- __ret =3D raw_cpu_read(pcp); \
+ __ret =3D raw_cpu_generic_add_return(pcp, val); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -138,8 +148,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret =3D raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret =3D raw_cpu_generic_xchg(pcp, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -149,9 +158,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret =3D raw_cpu_read(pcp); \
- if (__ret =3D=3D (oval)) \
- raw_cpu_write(pcp, nval); \
+ __ret =3D raw_cpu_generic_cmpxchg(pcp, oval, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -168,16 +175,16 @@ do { \
})
=20
#ifndef raw_cpu_read_1
-#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_2
-#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_4
-#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_8
-#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp)
#endif
=20
#ifndef raw_cpu_write_1
--=20
2.9.3
^ permalink raw reply related [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
2016-09-21 20:16 ` Christoph Lameter
@ 2016-09-22 4:42 ` Nicholas Piggin
-1 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-22 4:42 UTC (permalink / raw)
To: Christoph Lameter
Cc: Tejun Heo, linux-kernel, linux-arch, linux-mm, linuxppc-dev
On Wed, 21 Sep 2016 15:16:25 -0500 (CDT)
Christoph Lameter <cl@linux.com> wrote:
> On Wed, 21 Sep 2016, Tejun Heo wrote:
>
> > Hello, Nick.
> >
> > How have you been? :)
> >
>
> He is baack. Are we getting SL!B? ;-)
>
Hey Christoph. Sure, why not.
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
@ 2016-09-22 4:42 ` Nicholas Piggin
0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-22 4:42 UTC (permalink / raw)
To: Christoph Lameter
Cc: Tejun Heo, linux-kernel, linux-arch, linux-mm, linuxppc-dev
On Wed, 21 Sep 2016 15:16:25 -0500 (CDT)
Christoph Lameter <cl@linux.com> wrote:
> On Wed, 21 Sep 2016, Tejun Heo wrote:
>
> > Hello, Nick.
> >
> > How have you been? :)
> >
>
> He is baack. Are we getting SL!B? ;-)
>
Hey Christoph. Sure, why not.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
2016-09-22 4:35 ` Nicholas Piggin
` (2 preceding siblings ...)
(?)
@ 2016-09-22 16:07 ` Tejun Heo
2016-09-23 7:33 ` Nicholas Piggin
-1 siblings, 1 reply; 24+ messages in thread
From: Tejun Heo @ 2016-09-22 16:07 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: Christoph Lameter, linux-kernel, linux-arch, linuxppc-dev
Hello,
On Thu, Sep 22, 2016 at 02:35:00PM +1000, Nicholas Piggin wrote:
> Well thank you, how about you?
Heh, can't complain. Hope to see you around sometime. It's been
forever.
> Trying a new mail client, sorry. It *seems* to be working now, how's
> this?
Hmm... Still encoded.
> From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
> From: Nicholas Piggin <npiggin@gmail.com>
> Date: Wed, 21 Sep 2016 18:23:43 +1000
> Subject: [PATCH] percpu: improve generic percpu modify-return implementations
>
> Some architectures require an additional load to find the address of
> percpu pointers. In some implemenatations, the C aliasing rules do not
> allow the result of that load to be kept over the store that modifies
> the percpu variable, which causes additional loads.
>
> Work around this by finding the pointer first, then operating on that.
>
> It's also possible to mark things as restrict and those kind of games,
> but that can require larger and arch specific changes.
QP-decoded and applied to percpu/for-4.9.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 24+ messages in thread
* Re: [PATCH] percpu: improve generic percpu modify-return implementation
2016-09-22 16:07 ` Tejun Heo
@ 2016-09-23 7:33 ` Nicholas Piggin
0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-09-23 7:33 UTC (permalink / raw)
To: Tejun Heo; +Cc: Christoph Lameter, linux-kernel, linux-arch, linuxppc-dev
On Thu, 22 Sep 2016 12:07:49 -0400
Tejun Heo <tj@kernel.org> wrote:
> Hello,
>
> On Thu, Sep 22, 2016 at 02:35:00PM +1000, Nicholas Piggin wrote:
> > Well thank you, how about you?
>
> Heh, can't complain. Hope to see you around sometime. It's been
> forever.
Yeah, it has been. Hopefully I'll see you around.
> > Trying a new mail client, sorry. It *seems* to be working now, how's
> > this?
>
> Hmm... Still encoded.
It looks to be a helpful surprise feature of claws. Any line starting with
the word "From" make it go q-p due to some mail servers treating that
differently. Sigh.
>
> > From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
> > From: Nicholas Piggin <npiggin@gmail.com>
> > Date: Wed, 21 Sep 2016 18:23:43 +1000
> > Subject: [PATCH] percpu: improve generic percpu modify-return implementations
> >
> > Some architectures require an additional load to find the address of
> > percpu pointers. In some implemenatations, the C aliasing rules do not
> > allow the result of that load to be kept over the store that modifies
> > the percpu variable, which causes additional loads.
> >
> > Work around this by finding the pointer first, then operating on that.
> >
> > It's also possible to mark things as restrict and those kind of games,
> > but that can require larger and arch specific changes.
>
> QP-decoded and applied to percpu/for-4.9.
>
> Thanks.
>
Thanks!
^ permalink raw reply [flat|nested] 24+ messages in thread
end of thread, other threads:[~2016-09-23 7:34 UTC | newest]
Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-21 8:51 [PATCH] percpu: improve generic percpu modify-return implementation Nicholas Piggin
2016-09-21 10:25 ` kbuild test robot
2016-09-21 10:25 ` kbuild test robot
2016-09-21 10:25 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:57 ` Nicholas Piggin
2016-09-21 10:57 ` Nicholas Piggin
2016-09-21 10:57 ` Nicholas Piggin
2016-09-21 14:23 ` Tejun Heo
2016-09-21 20:16 ` Christoph Lameter
2016-09-21 20:16 ` Christoph Lameter
2016-09-22 4:42 ` Nicholas Piggin
2016-09-22 4:42 ` Nicholas Piggin
2016-09-22 4:35 ` Nicholas Piggin
2016-09-22 4:35 ` Nicholas Piggin
2016-09-22 4:35 ` Nicholas Piggin
2016-09-22 4:35 ` Nicholas Piggin
2016-09-22 16:07 ` Tejun Heo
2016-09-23 7:33 ` Nicholas Piggin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.