All of lore.kernel.org
 help / color / mirror / Atom feed
* v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64
@ 2018-04-02 23:24 Luck, Tony
  2018-04-02 23:39 ` Luck, Tony
  2018-04-03  7:37 ` Peter Zijlstra
  0 siblings, 2 replies; 11+ messages in thread
From: Luck, Tony @ 2018-04-02 23:24 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Mel Gorman, Vincent Guittot, Patrick Bellasi, Ingo Molnar,
	Norbert Manthey, Frederic Weisbecker, linux-kernel

v4.16 boots cleanly. But with the first bunch of merges
(Linus HEAD = 46e0d28bdb8e6d00e27a0fe9e1d15df6098f0ffb)
I see a bunch of:

ia64_handle_unaligned: 4863 callbacks suppressed
kernel unaligned access to 0xe00000031660fd74, ip=0xa0000001000f23e0
kernel unaligned access to 0xe00000033bdffbcc, ip=0xa0000001000f2370
kernel unaligned access to 0xe00000031660fd74, ip=0xa0000001000f23e0
kernel unaligned access to 0xe00000033bdffbcc, ip=0xa0000001000f2370
kernel unaligned access to 0xe00000031660fd74, ip=0xa0000001000f23e0

The addresses are all 4-byte, but not 8-byte aligned.

Any guesses before I start to bisect?

-Tony

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64
  2018-04-02 23:24 v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64 Luck, Tony
@ 2018-04-02 23:39 ` Luck, Tony
  2018-04-03  7:37 ` Peter Zijlstra
  1 sibling, 0 replies; 11+ messages in thread
From: Luck, Tony @ 2018-04-02 23:39 UTC (permalink / raw)
  To: Luck, Tony, Peter Zijlstra
  Cc: Mel Gorman, Vincent Guittot, Patrick Bellasi, Ingo Molnar,
	Norbert Manthey, Frederic Weisbecker, linux-kernel

> kernel unaligned access to 0xe00000031660fd74, ip=0xa0000001000f23e0
> kernel unaligned access to 0xe00000033bdffbcc, ip=0xa0000001000f2370

Here's the disassembly of dequeu_task_fair() in case it would help to see
which two instructions are getting all the faults:

a0000001000f21c0 <dequeue_task_fair>:
a0000001000f21c0:	08 28 29 0e 80 05 	[MMI]       alloc r37=ar.pfs,10,7,0
a0000001000f21c6:	c0 00 33 7e 46 00 	            adds r12=-32,r12
a0000001000f21cc:	c2 08 85 84       	            adds r16=4236,r33
a0000001000f21d0:	09 58 40 ab 16 27 	[MMI]       addl r11=-685232,r1
a0000001000f21d6:	80 02 84 02 42 20 	            adds r40=128,r33
a0000001000f21dc:	05 10 01 84       	            mov r41=r34;;
a0000001000f21e0:	08 00 00 00 01 00 	[MMI]       nop.m 0x0
a0000001000f21e6:	f0 00 2c 00 42 80 	            mov r15=r11
a0000001000f21ec:	04 00 c4 00       	            mov r36=b0
a0000001000f21f0:	19 30 00 50 07 39 	[MMB]       cmp.eq p6,p7=0,r40
a0000001000f21f6:	30 00 c0 a3 4e 03 	            mov r3=-219008
a0000001000f21fc:	60 00 00 41       	      (p06) br.cond.spnt.few a0000001000f2250 <dequeue_task_fair+0x90>;;
a0000001000f2200:	0b 50 00 20 10 10 	[MMI]       ld4 r10=[r16];;
a0000001000f2206:	90 50 3c 24 40 00 	            shladd r9=r10,3,r15
a0000001000f220c:	00 00 04 00       	            nop.i 0x0;;
a0000001000f2210:	0b 40 00 12 18 10 	[MMI]       ld8 r8=[r9];;
a0000001000f2216:	60 1a 20 00 40 00 	            add r38=r3,r8
a0000001000f221c:	00 00 04 00       	            nop.i 0x0;;
a0000001000f2220:	11 18 71 4c 01 21 	[MIB]       adds r35=156,r38
a0000001000f2226:	70 02 98 02 42 00 	            adds r39=128,r38
a0000001000f222c:	68 f3 ff 58       	            br.call.sptk.many b0=a0000001000f1580 <dequeue_entity>;;
a0000001000f2230:	0a 10 00 46 10 10 	[MMI]       ld4 r2=[r35];;
a0000001000f2236:	e0 f8 0b 7e 46 00 	            adds r14=-1,r2
a0000001000f223c:	00 00 04 00       	            nop.i 0x0
a0000001000f2240:	0a 00 00 00 01 00 	[MMI]       nop.m 0x0;;
a0000001000f2246:	00 70 8c 20 23 00 	            st4 [r35]=r14
a0000001000f224c:	00 00 04 00       	            nop.i 0x0
a0000001000f2250:	09 98 c0 02 d7 26 	[MMI]       addl r19=-2069584,r1
a0000001000f2256:	50 21 80 00 42 00 	            adds r21=4,r32
a0000001000f225c:	83 01 05 84       	            adds r24=152,r32;;
a0000001000f2260:	09 90 00 26 18 10 	[MMI]       ld8 r18=[r19]
a0000001000f2266:	60 01 54 20 20 00 	            ld4 r22=[r21]
a0000001000f226c:	00 00 04 00       	            nop.i 0x0;;
a0000001000f2270:	09 a0 fc 2d 3f 23 	[MMI]       adds r20=-1,r22
a0000001000f2276:	10 01 48 a0 20 00 	            ld4.a r17=[r18]
a0000001000f227c:	00 00 04 00       	            nop.i 0x0;;
a0000001000f2280:	08 00 00 00 01 00 	[MMI]       nop.m 0x0
a0000001000f2286:	00 a0 54 20 23 00 	            st4 [r21]=r20
a0000001000f228c:	a1 8a 24 50       	            tbit.z p8,p9=r17,21
a0000001000f2290:	18 00 00 00 01 00 	[MMB]       nop.m 0x0
a0000001000f2296:	10 d9 00 80 02 00 	            chk.a.clr r17,a0000001000f2440 <dequeue_task_fair+0x280>
a0000001000f229c:	00 00 00 20       	            nop.b 0x0
a0000001000f22a0:	10 00 00 00 01 00 	[MIB]       nop.m 0x0
a0000001000f22a6:	00 00 00 02 00 04 	            nop.i 0x0
a0000001000f22ac:	e0 00 00 43       	      (p08) br.cond.dpnt.few a0000001000f2380 <dequeue_task_fair+0x1c0>
a0000001000f22b0:	0b b8 00 30 10 10 	[MMI]       ld4 r23=[r24];;
a0000001000f22b6:	b0 00 5c 14 73 00 	            cmp4.eq p11,p10=0,r23
a0000001000f22bc:	00 00 04 00       	            nop.i 0x0;;
a0000001000f22c0:	71 01 81 40 02 e1 	[MIB] (p11) adds r32=288,r32
a0000001000f22c6:	12 01 00 00 42 05 	      (p11) mov r17=r0
a0000001000f22cc:	e0 00 00 42       	      (p10) br.cond.dptk.few a0000001000f23a0 <dequeue_task_fair+0x1e0>;;
a0000001000f22d0:	09 f8 40 18 00 21 	[MMI]       adds r31=16,r12
a0000001000f22d6:	00 88 80 60 23 c0 	            st4.rel [r32]=r17
a0000001000f22dc:	00 10 1d 50       	            tbit.z p6,p7=r34,0;;
a0000001000f22e0:	10 00 44 3e 90 11 	[MIB]       st4 [r31]=r17
a0000001000f22e6:	00 00 00 02 00 03 	            nop.i 0x0
a0000001000f22ec:	a0 00 00 43       	      (p06) br.cond.dpnt.few a0000001000f2380 <dequeue_task_fair+0x1c0>
a0000001000f22f0:	09 88 e0 42 02 21 	[MMI]       adds r17=312,r33
a0000001000f22f6:	e0 80 85 04 42 40 	            adds r14=304,r33
a0000001000f22fc:	c4 0b 09 84       	            adds r34=316,r33;;
a0000001000f2300:	09 00 01 22 10 10 	[MMI]       ld4 r32=[r17]
a0000001000f2306:	00 01 88 20 20 00 	            ld4 r16=[r34]
a0000001000f230c:	00 00 04 00       	            nop.i 0x0;;
a0000001000f2310:	10 00 00 00 01 00 	[MIB]       nop.m 0x0
a0000001000f2316:	90 00 80 10 28 04 	            tbit.z p9,p8=r32,0
a0000001000f231c:	70 00 00 43       	      (p08) br.cond.dpnt.few a0000001000f2380 <dequeue_task_fair+0x1c0>
a0000001000f2320:	0b 48 01 1c b8 10 	[MMI]       ld8.acq r41=[r14];;
a0000001000f2326:	10 0a a4 5c 40 00 	            or r33=1,r41
a0000001000f232c:	00 00 04 00       	            nop.i 0x0;;
a0000001000f2330:	0b 40 85 20 05 20 	[MMI]       sub r40=r33,r16;;
a0000001000f2336:	70 4a a0 00 42 00 	            adds r39=9,r40
a0000001000f233c:	00 00 04 00       	            nop.i 0x0;;
a0000001000f2340:	0a 58 48 4e 8a f5 	[MMI]       cmp4.ltu p11,p10=18,r39;;
a0000001000f2346:	82 82 a0 22 c0 e5 	      (p11) shladd r40=r16,2,r40
a0000001000f234c:	04 62 00 84       	      (p11) adds r39=32,r12
a0000001000f2350:	62 81 70 18 00 21 	[MII] (p11) adds r16=28,r12
a0000001000f2356:	00 00 00 02 80 05 	            nop.i 0x0;;
a0000001000f235c:	45 40 75 52       	      (p11) extr.u r40=r40,2,30
a0000001000f2360:	6a 01 84 20 90 d1 	[MMI] (p11) st4 [r16]=r33;;
a0000001000f2366:	02 40 9d 20 23 00 	      (p11) st4 [r39]=r40
a0000001000f236c:	00 00 04 00       	            nop.i 0x0
*a0000001000f2370:	6a 39 01 20 18 d0 	[MMI] (p11) ld8 r39=[r16];;
a0000001000f2376:	02 38 45 70 23 00 	      (p11) st8.rel [r17]=r39
a0000001000f237c:	00 00 04 00       	            nop.i 0x0
a0000001000f2380:	00 00 00 00 01 00 	[MII]       nop.m 0x0
a0000001000f2386:	00 28 01 55 00 00 	            mov.i ar.pfs=r37
a0000001000f238c:	40 0a 00 07       	            mov b0=r36
a0000001000f2390:	19 00 00 00 01 00 	[MMB]       nop.m 0x0
a0000001000f2396:	c0 00 31 00 42 80 	            adds r12=32,r12
a0000001000f239c:	08 00 84 00       	            br.ret.sptk.many b0;;
a0000001000f23a0:	18 f0 e0 42 02 21 	[MMB]       adds r30=312,r33
a0000001000f23a6:	00 02 81 04 42 00 	            adds r32=288,r32
a0000001000f23ac:	00 00 00 20       	            nop.b 0x0
a0000001000f23b0:	09 e0 50 18 00 21 	[MMI]       adds r28=20,r12
a0000001000f23b6:	f0 81 30 00 42 c0 	            adds r31=16,r12
a0000001000f23bc:	00 10 1d 50       	            tbit.z p6,p7=r34,0;;
a0000001000f23c0:	09 e8 00 3c b8 10 	[MMI]       ld8.acq r29=[r30]
a0000001000f23c6:	10 01 80 20 20 00 	            ld4 r17=[r32]
a0000001000f23cc:	00 00 04 00       	            nop.i 0x0;;
a0000001000f23d0:	00 00 00 00 01 00 	[MII]       nop.m 0x0
a0000001000f23d6:	b0 01 76 3e 29 40 	            shr.u r27=r29,32
a0000001000f23dc:	03 e8 00 84       	            mov r26=r29
*a0000001000f23e0:	0b 00 74 38 98 11 	[MMI]       st8 [r28]=r29;;
a0000001000f23e6:	f0 e8 6c 1c 69 00 	            cmp4.ltu p15,p14=r29,r27
a0000001000f23ec:	00 00 04 00       	            nop.i 0x0;;
a0000001000f23f0:	eb d1 00 36 00 21 	[MMI] (p15) mov r26=r27;;
a0000001000f23f6:	90 09 68 5c 40 00 	            or r25=1,r26
a0000001000f23fc:	00 00 04 00       	            nop.i 0x0;;
a0000001000f2400:	02 68 44 32 8c 34 	[MII]       cmp4.ltu p13,p12=r17,r25
a0000001000f2406:	00 00 00 02 00 26 	            nop.i 0x0;;
a0000001000f240c:	12 c9 14 80       	      (p12) sub r17=r17,r25
a0000001000f2410:	a2 89 44 22 05 20 	[MII] (p13) sub r17=r17,r17
a0000001000f2416:	00 00 00 02 00 00 	            nop.i 0x0;;
a0000001000f241c:	00 00 04 00       	            nop.i 0x0
a0000001000f2420:	18 00 44 40 b0 11 	[MMB]       st4.rel [r32]=r17
a0000001000f2426:	00 88 7c 20 a3 03 	            st4 [r31]=r17
a0000001000f242c:	d0 fe ff 4a       	      (p07) br.cond.dptk.few a0000001000f22f0 <dequeue_task_fair+0x130>
a0000001000f2430:	10 00 00 00 01 00 	[MIB]       nop.m 0x0
a0000001000f2436:	00 00 00 02 00 00 	            nop.i 0x0
a0000001000f243c:	50 ff ff 48       	            br.few a0000001000f2380 <dequeue_task_fair+0x1c0>
a0000001000f2440:	09 00 00 00 01 00 	[MMI]       nop.m 0x0
a0000001000f2446:	10 01 48 20 20 00 	            ld4 r17=[r18]
a0000001000f244c:	00 00 04 00       	            nop.i 0x0;;
a0000001000f2450:	11 00 00 00 01 00 	[MIB]       nop.m 0x0
a0000001000f2456:	80 50 45 12 28 00 	            tbit.z p8,p9=r17,21
a0000001000f245c:	50 fe ff 48       	            br.few a0000001000f22a0 <dequeue_task_fair+0xe0>;;

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64
  2018-04-02 23:24 v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64 Luck, Tony
  2018-04-02 23:39 ` Luck, Tony
@ 2018-04-03  7:37 ` Peter Zijlstra
  2018-04-03 18:58   ` Luck, Tony
  1 sibling, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2018-04-03  7:37 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Mel Gorman, Vincent Guittot, Patrick Bellasi, Ingo Molnar,
	Norbert Manthey, Frederic Weisbecker, linux-kernel

On Mon, Apr 02, 2018 at 04:24:49PM -0700, Luck, Tony wrote:
> v4.16 boots cleanly. But with the first bunch of merges
> (Linus HEAD = 46e0d28bdb8e6d00e27a0fe9e1d15df6098f0ffb)
> I see a bunch of:
> 
> ia64_handle_unaligned: 4863 callbacks suppressed
> kernel unaligned access to 0xe00000031660fd74, ip=0xa0000001000f23e0
> kernel unaligned access to 0xe00000033bdffbcc, ip=0xa0000001000f2370
> kernel unaligned access to 0xe00000031660fd74, ip=0xa0000001000f23e0
> kernel unaligned access to 0xe00000033bdffbcc, ip=0xa0000001000f2370
> kernel unaligned access to 0xe00000031660fd74, ip=0xa0000001000f23e0
> 
> The addresses are all 4-byte, but not 8-byte aligned.
> 
> Any guesses before I start to bisect?

That doesn't sound good. The only guess I have at this moment is you
accidentially enabled RANDSTRUCT_PLUGIN and that messes things up.

struct task_struct whould be at least L1_CACHE_BYTES aligned, and C
otherwise makes it fairly hard to cause unaligned accesses. Packed
structures and/or casting are required, and I don't think we added
anything dodgy like that here.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64
  2018-04-03  7:37 ` Peter Zijlstra
@ 2018-04-03 18:58   ` Luck, Tony
  2018-04-04  0:04     ` Luck, Tony
  0 siblings, 1 reply; 11+ messages in thread
From: Luck, Tony @ 2018-04-03 18:58 UTC (permalink / raw)
  To: Patrick Bellasi
  Cc: Mel Gorman, Vincent Guittot, Peter Zijlstra, Ingo Molnar,
	Norbert Manthey, Frederic Weisbecker, linux-kernel

On Tue, Apr 03, 2018 at 09:37:06AM +0200, Peter Zijlstra wrote:
> On Mon, Apr 02, 2018 at 04:24:49PM -0700, Luck, Tony wrote:
> > Any guesses before I start to bisect?
> 
> That doesn't sound good. The only guess I have at this moment is you
> accidentially enabled RANDSTRUCT_PLUGIN and that messes things up.
> 
> struct task_struct whould be at least L1_CACHE_BYTES aligned, and C
> otherwise makes it fairly hard to cause unaligned accesses. Packed
> structures and/or casting are required, and I don't think we added
> anything dodgy like that here.

bisect says:

d519329f72a6 ("sched/fair: Update util_est only on util_avg updates")

Reverting just this commit makes the problem go away.

-Tony

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64
  2018-04-03 18:58   ` Luck, Tony
@ 2018-04-04  0:04     ` Luck, Tony
  2018-04-04  7:25       ` Peter Zijlstra
  0 siblings, 1 reply; 11+ messages in thread
From: Luck, Tony @ 2018-04-04  0:04 UTC (permalink / raw)
  To: Luck, Tony, Patrick Bellasi
  Cc: Mel Gorman, Vincent Guittot, Peter Zijlstra, Ingo Molnar,
	Norbert Manthey, Frederic Weisbecker, linux-kernel

> bisect says:
>
> d519329f72a6 ("sched/fair: Update util_est only on util_avg updates")
> 
> Reverting just this commit makes the problem go away.

The unaligned read and write seem to come from:

struct util_est ue = READ_ONCE(p->se.avg.util_est);
WRITE_ONCE(p->se.avg.util_est, ue);

which is puzzling as they were around before. Also the "avg"
field is tagged with an attribute to make it cache aligned
and there don't look to be holes in the structure that would
make util_est not be 8-byte aligned ... though it does consist
of two 4-byte fields, so legal for it to be 4-byte aligned.

-Tony

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64
  2018-04-04  0:04     ` Luck, Tony
@ 2018-04-04  7:25       ` Peter Zijlstra
  2018-04-04 16:38         ` Luck, Tony
  0 siblings, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2018-04-04  7:25 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Patrick Bellasi, Mel Gorman, Vincent Guittot, Ingo Molnar,
	Norbert Manthey, Frederic Weisbecker, linux-kernel

On Wed, Apr 04, 2018 at 12:04:00AM +0000, Luck, Tony wrote:
> > bisect says:
> >
> > d519329f72a6 ("sched/fair: Update util_est only on util_avg updates")
> > 
> > Reverting just this commit makes the problem go away.
> 
> The unaligned read and write seem to come from:
> 
> struct util_est ue = READ_ONCE(p->se.avg.util_est);
> WRITE_ONCE(p->se.avg.util_est, ue);
> 
> which is puzzling as they were around before. Also the "avg"
> field is tagged with an attribute to make it cache aligned
> and there don't look to be holes in the structure that would
> make util_est not be 8-byte aligned ... though it does consist
> of two 4-byte fields, so legal for it to be 4-byte aligned.

Right, I remember being careful with that. Which again brings me to the
RANDSTRUCT thing, which will mess that up.

Does the below cure things? It makes absolutely no difference for my
x86_64-defconfig build, but it puts more explicit alignment constraints
on things.


diff --git a/include/linux/sched.h b/include/linux/sched.h
index f228c6033832..b3d697f3b573 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -300,7 +300,7 @@ struct util_est {
 	unsigned int			enqueued;
 	unsigned int			ewma;
 #define UTIL_EST_WEIGHT_SHIFT		2
-};
+} __attribute__((__aligned__(sizeof(u64))));
 
 /*
  * The load_avg/util_avg accumulates an infinite geometric series
@@ -364,7 +364,7 @@ struct sched_avg {
 	unsigned long			runnable_load_avg;
 	unsigned long			util_avg;
 	struct util_est			util_est;
-};
+} ____cacheline_aligned;
 
 struct sched_statistics {
 #ifdef CONFIG_SCHEDSTATS
@@ -435,7 +435,7 @@ struct sched_entity {
 	 * Put into separate cache line so it does not
 	 * collide with read-mostly values above.
 	 */
-	struct sched_avg		avg ____cacheline_aligned_in_smp;
+	struct sched_avg		avg;
 #endif
 };
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64
  2018-04-04  7:25       ` Peter Zijlstra
@ 2018-04-04 16:38         ` Luck, Tony
  2018-04-04 16:53           ` Peter Zijlstra
  0 siblings, 1 reply; 11+ messages in thread
From: Luck, Tony @ 2018-04-04 16:38 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Patrick Bellasi, Mel Gorman, Vincent Guittot, Ingo Molnar,
	Norbert Manthey, Frederic Weisbecker, linux-kernel

On Wed, Apr 04, 2018 at 09:25:13AM +0200, Peter Zijlstra wrote:
> Right, I remember being careful with that. Which again brings me to the
> RANDSTRUCT thing, which will mess that up.

No RANDSTRUCT config options set for my build.

> Does the below cure things? It makes absolutely no difference for my
> x86_64-defconfig build, but it puts more explicit alignment constraints
> on things.

Yes. That fixes it. No unaligned traps with this patch applied.

Tested-by: Tony Luck <tony.luck@intel.com>

Thanks

-Tony

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64
  2018-04-04 16:38         ` Luck, Tony
@ 2018-04-04 16:53           ` Peter Zijlstra
  2018-04-05  8:05             ` Peter Zijlstra
  0 siblings, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2018-04-04 16:53 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Patrick Bellasi, Mel Gorman, Vincent Guittot, Ingo Molnar,
	Norbert Manthey, Frederic Weisbecker, linux-kernel

On Wed, Apr 04, 2018 at 09:38:56AM -0700, Luck, Tony wrote:
> On Wed, Apr 04, 2018 at 09:25:13AM +0200, Peter Zijlstra wrote:
> > Right, I remember being careful with that. Which again brings me to the
> > RANDSTRUCT thing, which will mess that up.
> 
> No RANDSTRUCT config options set for my build.

Weird though, with or without that patch, my ia64-defconfig gives the
below layout.

$ pahole -EC sched_avg ia64-defconfig/kernel/sched/core.o
die__process_function: tag not supported (INVALID)!
struct sched_avg {
        /* typedef u64 */ long long unsigned int     last_update_time;                   /*     0     8 */
        /* typedef u64 */ long long unsigned int     load_sum;                           /*     8     8 */
        /* typedef u64 */ long long unsigned int     runnable_load_sum;                  /*    16     8 */
        /* typedef u32 */ unsigned int               util_sum;                           /*    24     4 */
        /* typedef u32 */ unsigned int               period_contrib;                     /*    28     4 */
        long unsigned int          load_avg;                                             /*    32     8 */
        long unsigned int          runnable_load_avg;                                    /*    40     8 */
        long unsigned int          util_avg;                                             /*    48     8 */
        struct util_est {
                unsigned int       enqueued;                                             /*    56     4 */
                unsigned int       ewma;                                                 /*    60     4 */
        } util_est; /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */

        /* size: 64, cachelines: 1, members: 9 */
};

> > Does the below cure things? It makes absolutely no difference for my
> > x86_64-defconfig build, but it puts more explicit alignment constraints
> > on things.
> 
> Yes. That fixes it. No unaligned traps with this patch applied.
> 
> Tested-by: Tony Luck <tony.luck@intel.com>

Awesome, I'll go get it merged, even though I don't understand where it
went wobbly.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64
  2018-04-04 16:53           ` Peter Zijlstra
@ 2018-04-05  8:05             ` Peter Zijlstra
  2018-04-05  8:56               ` Ingo Molnar
  2018-04-05  9:18               ` [tip:sched/urgent] sched/core: Force proper alignment of 'struct util_est' tip-bot for Peter Zijlstra
  0 siblings, 2 replies; 11+ messages in thread
From: Peter Zijlstra @ 2018-04-05  8:05 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Patrick Bellasi, Mel Gorman, Vincent Guittot, Ingo Molnar,
	Norbert Manthey, Frederic Weisbecker, linux-kernel

On Wed, Apr 04, 2018 at 06:53:10PM +0200, Peter Zijlstra wrote:
> Awesome, I'll go get it merged, even though I don't understand where it
> went wobbly.

Ingo, could you magic this in?

---
Subject: sched: Force alignment of struct util_est
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu Apr  5 09:56:16 CEST 2018

For some as yet not understood reason, Tony gets unaligned access
traps on IA64 because of:

  struct util_est ue = READ_ONCE(p->se.avg.util_est);

and:

  WRITE_ONCE(p->se.avg.util_est, ue);

introduced by commit:

  d519329f72a6 ("sched/fair: Update util_est only on util_avg updates")

Normally those two fields should end up on an 8-byte aligned location,
but UP and RANDSTRUCT can mess that up so enforce the alignment
explicitly.

Also make the alignment on sched_avg unconditional, as it is really
about data locality, not false-sharing.

With or without this patch the layout for sched_avg on a
ia64-defconfig build looks like:

$ pahole -EC sched_avg ia64-defconfig/kernel/sched/core.o
die__process_function: tag not supported (INVALID)!
struct sched_avg {
        /* typedef u64 */ long long unsigned int     last_update_time;                   /*     0     8 */
        /* typedef u64 */ long long unsigned int     load_sum;                           /*     8     8 */
        /* typedef u64 */ long long unsigned int     runnable_load_sum;                  /*    16     8 */
        /* typedef u32 */ unsigned int               util_sum;                           /*    24     4 */
        /* typedef u32 */ unsigned int               period_contrib;                     /*    28     4 */
        long unsigned int          load_avg;                                             /*    32     8 */
        long unsigned int          runnable_load_avg;                                    /*    40     8 */
        long unsigned int          util_avg;                                             /*    48     8 */
        struct util_est {
                unsigned int       enqueued;                                             /*    56     4 */
                unsigned int       ewma;                                                 /*    60     4 */
        } util_est; /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */

        /* size: 64, cachelines: 1, members: 9 */
};

Fixes: d519329f72a6 ("sched/fair: Update util_est only on util_avg updates")
Reported-and-Tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/sched.h |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -300,7 +300,7 @@ struct util_est {
 	unsigned int			enqueued;
 	unsigned int			ewma;
 #define UTIL_EST_WEIGHT_SHIFT		2
-};
+} __attribute__((__aligned__(sizeof(u64))));
 
 /*
  * The load_avg/util_avg accumulates an infinite geometric series
@@ -364,7 +364,7 @@ struct sched_avg {
 	unsigned long			runnable_load_avg;
 	unsigned long			util_avg;
 	struct util_est			util_est;
-};
+} ____cacheline_aligned;
 
 struct sched_statistics {
 #ifdef CONFIG_SCHEDSTATS
@@ -435,7 +435,7 @@ struct sched_entity {
 	 * Put into separate cache line so it does not
 	 * collide with read-mostly values above.
 	 */
-	struct sched_avg		avg ____cacheline_aligned_in_smp;
+	struct sched_avg		avg;
 #endif
 };
 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64
  2018-04-05  8:05             ` Peter Zijlstra
@ 2018-04-05  8:56               ` Ingo Molnar
  2018-04-05  9:18               ` [tip:sched/urgent] sched/core: Force proper alignment of 'struct util_est' tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 11+ messages in thread
From: Ingo Molnar @ 2018-04-05  8:56 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Luck, Tony, Patrick Bellasi, Mel Gorman, Vincent Guittot,
	Norbert Manthey, Frederic Weisbecker, linux-kernel


* Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Apr 04, 2018 at 06:53:10PM +0200, Peter Zijlstra wrote:
> > Awesome, I'll go get it merged, even though I don't understand where it
> > went wobbly.
> 
> Ingo, could you magic this in?
> 
> ---
> Subject: sched: Force alignment of struct util_est
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Thu Apr  5 09:56:16 CEST 2018

Sure, done!

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tip:sched/urgent] sched/core: Force proper alignment of 'struct util_est'
  2018-04-05  8:05             ` Peter Zijlstra
  2018-04-05  8:56               ` Ingo Molnar
@ 2018-04-05  9:18               ` tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 11+ messages in thread
From: tip-bot for Peter Zijlstra @ 2018-04-05  9:18 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mgorman, acme, torvalds, tglx, vincent.guittot, hpa, peterz,
	frederic, patrick.bellasi, mingo, tony.luck, nmanthey,
	linux-kernel

Commit-ID:  317d359df95dd0cb7653d09b7fc513770590cf85
Gitweb:     https://git.kernel.org/tip/317d359df95dd0cb7653d09b7fc513770590cf85
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Thu, 5 Apr 2018 10:05:21 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 5 Apr 2018 10:56:16 +0200

sched/core: Force proper alignment of 'struct util_est'

For some as yet not understood reason, Tony gets unaligned access
traps on IA64 because of:

  struct util_est ue = READ_ONCE(p->se.avg.util_est);

and:

  WRITE_ONCE(p->se.avg.util_est, ue);

introduced by commit:

  d519329f72a6 ("sched/fair: Update util_est only on util_avg updates")

Normally those two fields should end up on an 8-byte aligned location,
but UP and RANDSTRUCT can mess that up so enforce the alignment
explicitly.

Also make the alignment on sched_avg unconditional, as it is really
about data locality, not false-sharing.

With or without this patch the layout for sched_avg on a
ia64-defconfig build looks like:

	$ pahole -EC sched_avg ia64-defconfig/kernel/sched/core.o
	die__process_function: tag not supported (INVALID)!
	struct sched_avg {
		/* typedef u64 */ long long unsigned int     last_update_time;                   /*     0     8 */
		/* typedef u64 */ long long unsigned int     load_sum;                           /*     8     8 */
		/* typedef u64 */ long long unsigned int     runnable_load_sum;                  /*    16     8 */
		/* typedef u32 */ unsigned int               util_sum;                           /*    24     4 */
		/* typedef u32 */ unsigned int               period_contrib;                     /*    28     4 */
		long unsigned int          load_avg;                                             /*    32     8 */
		long unsigned int          runnable_load_avg;                                    /*    40     8 */
		long unsigned int          util_avg;                                             /*    48     8 */
		struct util_est {
			unsigned int       enqueued;                                             /*    56     4 */
			unsigned int       ewma;                                                 /*    60     4 */
		} util_est; /*    56     8 */
		/* --- cacheline 1 boundary (64 bytes) --- */

		/* size: 64, cachelines: 1, members: 9 */
	};

Reported-and-Tested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Norbert Manthey <nmanthey@amazon.de>
Cc: Patrick Bellasi <patrick.bellasi@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony <tony.luck@intel.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Fixes: d519329f72a6 ("sched/fair: Update util_est only on util_avg updates")
Link: http://lkml.kernel.org/r/20180405080521.GG4129@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f228c6033832..b3d697f3b573 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -300,7 +300,7 @@ struct util_est {
 	unsigned int			enqueued;
 	unsigned int			ewma;
 #define UTIL_EST_WEIGHT_SHIFT		2
-};
+} __attribute__((__aligned__(sizeof(u64))));
 
 /*
  * The load_avg/util_avg accumulates an infinite geometric series
@@ -364,7 +364,7 @@ struct sched_avg {
 	unsigned long			runnable_load_avg;
 	unsigned long			util_avg;
 	struct util_est			util_est;
-};
+} ____cacheline_aligned;
 
 struct sched_statistics {
 #ifdef CONFIG_SCHEDSTATS
@@ -435,7 +435,7 @@ struct sched_entity {
 	 * Put into separate cache line so it does not
 	 * collide with read-mostly values above.
 	 */
-	struct sched_avg		avg ____cacheline_aligned_in_smp;
+	struct sched_avg		avg;
 #endif
 };
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2018-04-05  9:19 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-02 23:24 v4.16+ seeing many unaligned access in dequeue_task_fair() on IA64 Luck, Tony
2018-04-02 23:39 ` Luck, Tony
2018-04-03  7:37 ` Peter Zijlstra
2018-04-03 18:58   ` Luck, Tony
2018-04-04  0:04     ` Luck, Tony
2018-04-04  7:25       ` Peter Zijlstra
2018-04-04 16:38         ` Luck, Tony
2018-04-04 16:53           ` Peter Zijlstra
2018-04-05  8:05             ` Peter Zijlstra
2018-04-05  8:56               ` Ingo Molnar
2018-04-05  9:18               ` [tip:sched/urgent] sched/core: Force proper alignment of 'struct util_est' tip-bot for Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.