All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] clocksource: Store reg field within struct clocksource
@ 2015-11-18 13:43 Marc Gonzalez
  2015-11-18 13:51 ` Måns Rullgård
  2015-11-18 17:21 ` Russell King - ARM Linux
  0 siblings, 2 replies; 19+ messages in thread
From: Marc Gonzalez @ 2015-11-18 13:43 UTC (permalink / raw)
  To: Thomas Gleixner, Daniel Lezcano, Russell King
  Cc: LKML, Mans Rullgard, Viresh Kumar, Nicolas Pitre, Tony Lindgren,
	Sebastian Frias

Since 'struct clocksource' is ____cacheline_aligned, gcc must insert
a lot of padding between reg and clksrc in 'struct clocksource_mmio'
(for example, L1_CACHE_BYTES = 64 on ARMv7).

Storing reg within 'struct clocksource' removes unnecessary padding,
and reg can then be grouped with other hot data. A nice side-effect
of this patch is making container_of() unnecessary, which makes the
code a bit simpler.

On 32-bit platforms, reg fits in the padding between read and mask,
meaning no downside from storing it there.

	0                4                8
	+----------------+----------------+
	|      read      |     pad/reg    |
	+----------------+----------------+
	|              mask               |
	+----------------+----------------+
	|      mult      |     shift      |
	+----------------+----------------+
	|           max_idle_ns           |
	+----------------+----------------+

On 64-bit platforms, placing reg between read and mask changes the
layout, moving max_idle_ns to offset +32 instead of +24.

	0                4                8
	+----------------+----------------+
	|              read               |
	+----------------+----------------+
	|               reg               |
	+----------------+----------------+
	|              mask               |
	+----------------+----------------+
	|      mult      |     shift      |
	+----------------+----------------+
	|           max_idle_ns           |
	+----------------+----------------+

Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
---
 drivers/clocksource/mmio.c  | 36 +++++++++++++-----------------------
 include/linux/clocksource.h |  3 +++
 2 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/drivers/clocksource/mmio.c b/drivers/clocksource/mmio.c
index 1593ade2a815..c28fc6ef63ef 100644
--- a/drivers/clocksource/mmio.c
+++ b/drivers/clocksource/mmio.c
@@ -10,34 +10,24 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 
-struct clocksource_mmio {
-	void __iomem *reg;
-	struct clocksource clksrc;
-};
-
-static inline struct clocksource_mmio *to_mmio_clksrc(struct clocksource *c)
-{
-	return container_of(c, struct clocksource_mmio, clksrc);
-}
-
 cycle_t clocksource_mmio_readl_up(struct clocksource *c)
 {
-	return (cycle_t)readl_relaxed(to_mmio_clksrc(c)->reg);
+	return (cycle_t)readl_relaxed(c->reg);
 }
 
 cycle_t clocksource_mmio_readl_down(struct clocksource *c)
 {
-	return ~(cycle_t)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask;
+	return ~(cycle_t)readl_relaxed(c->reg) & c->mask;
 }
 
 cycle_t clocksource_mmio_readw_up(struct clocksource *c)
 {
-	return (cycle_t)readw_relaxed(to_mmio_clksrc(c)->reg);
+	return (cycle_t)readw_relaxed(c->reg);
 }
 
 cycle_t clocksource_mmio_readw_down(struct clocksource *c)
 {
-	return ~(cycle_t)readw_relaxed(to_mmio_clksrc(c)->reg) & c->mask;
+	return ~(cycle_t)readw_relaxed(c->reg) & c->mask;
 }
 
 /**
@@ -53,21 +43,21 @@ int __init clocksource_mmio_init(void __iomem *base, const char *name,
 	unsigned long hz, int rating, unsigned bits,
 	cycle_t (*read)(struct clocksource *))
 {
-	struct clocksource_mmio *cs;
+	struct clocksource *cs;
 
 	if (bits > 32 || bits < 16)
 		return -EINVAL;
 
-	cs = kzalloc(sizeof(struct clocksource_mmio), GFP_KERNEL);
+	cs = kzalloc(sizeof *cs, GFP_KERNEL);
 	if (!cs)
 		return -ENOMEM;
 
-	cs->reg = base;
-	cs->clksrc.name = name;
-	cs->clksrc.rating = rating;
-	cs->clksrc.read = read;
-	cs->clksrc.mask = CLOCKSOURCE_MASK(bits);
-	cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+	cs->read = read;
+	cs->reg  = base;
+	cs->name = name;
+	cs->rating = rating;
+	cs->mask = CLOCKSOURCE_MASK(bits);
+	cs->flags = CLOCK_SOURCE_IS_CONTINUOUS;
 
-	return clocksource_register_hz(&cs->clksrc, hz);
+	return clocksource_register_hz(cs, hz);
 }
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 278dd279a7a8..50725fd23ab0 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -69,6 +69,9 @@ struct clocksource {
 	 * clocksource itself is cacheline aligned.
 	 */
 	cycle_t (*read)(struct clocksource *cs);
+#ifdef CONFIG_CLKSRC_MMIO
+	void __iomem *reg;
+#endif
 	cycle_t mask;
 	u32 mult;
 	u32 shift;
-- 
2.4.5


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-18 13:43 [PATCH] clocksource: Store reg field within struct clocksource Marc Gonzalez
@ 2015-11-18 13:51 ` Måns Rullgård
  2015-11-18 17:21 ` Russell King - ARM Linux
  1 sibling, 0 replies; 19+ messages in thread
From: Måns Rullgård @ 2015-11-18 13:51 UTC (permalink / raw)
  To: Marc Gonzalez
  Cc: Thomas Gleixner, Daniel Lezcano, Russell King, LKML,
	Viresh Kumar, Nicolas Pitre, Tony Lindgren, Sebastian Frias

Marc Gonzalez <marc_gonzalez@sigmadesigns.com> writes:

> Since 'struct clocksource' is ____cacheline_aligned, gcc must insert
> a lot of padding between reg and clksrc in 'struct clocksource_mmio'
> (for example, L1_CACHE_BYTES = 64 on ARMv7).
>
> Storing reg within 'struct clocksource' removes unnecessary padding,
> and reg can then be grouped with other hot data.

Can you demonstrate a difference with this change?  Not saying it's bad,
but it's always good to have numbers.

> A nice side-effect of this patch is making container_of() unnecessary,
> which makes the code a bit simpler.

You really need to get used to that construct.

-- 
Måns Rullgård
mans@mansr.com

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-18 13:43 [PATCH] clocksource: Store reg field within struct clocksource Marc Gonzalez
  2015-11-18 13:51 ` Måns Rullgård
@ 2015-11-18 17:21 ` Russell King - ARM Linux
  2015-11-19  9:27   ` Marc Gonzalez
  2015-11-19 10:33   ` Thomas Gleixner
  1 sibling, 2 replies; 19+ messages in thread
From: Russell King - ARM Linux @ 2015-11-18 17:21 UTC (permalink / raw)
  To: Marc Gonzalez
  Cc: Thomas Gleixner, Daniel Lezcano, LKML, Mans Rullgard,
	Viresh Kumar, Nicolas Pitre, Tony Lindgren, Sebastian Frias

On Wed, Nov 18, 2015 at 02:43:34PM +0100, Marc Gonzalez wrote:
> Since 'struct clocksource' is ____cacheline_aligned, gcc must insert
> a lot of padding between reg and clksrc in 'struct clocksource_mmio'
> (for example, L1_CACHE_BYTES = 64 on ARMv7).
> 
> Storing reg within 'struct clocksource' removes unnecessary padding,
> and reg can then be grouped with other hot data. A nice side-effect
> of this patch is making container_of() unnecessary, which makes the
> code a bit simpler.
> 
> On 32-bit platforms, reg fits in the padding between read and mask,
> meaning no downside from storing it there.

Just swap the order of 'reg' and 'clksrc'.

-- 
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-18 17:21 ` Russell King - ARM Linux
@ 2015-11-19  9:27   ` Marc Gonzalez
  2015-11-19 10:33     ` Russell King - ARM Linux
  2015-11-19 10:33   ` Thomas Gleixner
  1 sibling, 1 reply; 19+ messages in thread
From: Marc Gonzalez @ 2015-11-19  9:27 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Thomas Gleixner, Daniel Lezcano, LKML, Mans Rullgard,
	Viresh Kumar, Nicolas Pitre, Tony Lindgren, Sebastian Frias

On 18/11/2015 18:21, Russell King - ARM Linux wrote:

> On Wed, Nov 18, 2015 at 02:43:34PM +0100, Marc Gonzalez wrote:
>
>> Since 'struct clocksource' is ____cacheline_aligned, gcc must insert
>> a lot of padding between reg and clksrc in 'struct clocksource_mmio'
>> (for example, L1_CACHE_BYTES = 64 on ARMv7).
>>
>> Storing reg within 'struct clocksource' removes unnecessary padding,
>> and reg can then be grouped with other hot data. A nice side-effect
>> of this patch is making container_of() unnecessary, which makes the
>> code a bit simpler.
>>
>> On 32-bit platforms, reg fits in the padding between read and mask,
>> meaning no downside from storing it there.
> 
> Just swap the order of 'reg' and 'clksrc'.

You already suggested that the last time (April 1st).
What problem is this supposed to solve?
Swapping the fields does not change the amount of padding required,
and does not place reg close to the hot data.

On a 32-bit platform, with L1_CACHE_BYTES = 64

sizeof(struct unaligned_clocksource) = 80
sizeof(struct clocksource) = 128
sizeof(struct clocksource_mmio)  = 192, reg at +0,   clksrc at +64
sizeof(struct clocksource_mmio2) = 192, reg at +128, clksrc at +0

Same amount of padding.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-18 17:21 ` Russell King - ARM Linux
  2015-11-19  9:27   ` Marc Gonzalez
@ 2015-11-19 10:33   ` Thomas Gleixner
  2015-11-19 10:36     ` Russell King - ARM Linux
  1 sibling, 1 reply; 19+ messages in thread
From: Thomas Gleixner @ 2015-11-19 10:33 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Marc Gonzalez, Daniel Lezcano, LKML, Mans Rullgard, Viresh Kumar,
	Nicolas Pitre, Tony Lindgren, Sebastian Frias

Russell,

On Wed, 18 Nov 2015, Russell King - ARM Linux wrote:

> On Wed, Nov 18, 2015 at 02:43:34PM +0100, Marc Gonzalez wrote:
> > Since 'struct clocksource' is ____cacheline_aligned, gcc must insert
> > a lot of padding between reg and clksrc in 'struct clocksource_mmio'
> > (for example, L1_CACHE_BYTES = 64 on ARMv7).
> > 
> > Storing reg within 'struct clocksource' removes unnecessary padding,
> > and reg can then be grouped with other hot data. A nice side-effect
> > of this patch is making container_of() unnecessary, which makes the
> > code a bit simpler.
> > 
> > On 32-bit platforms, reg fits in the padding between read and mask,
> > meaning no downside from storing it there.
> 
> Just swap the order of 'reg' and 'clksrc'.

That might reduce the memory footprint, but it does not bring the
iomem pointer closer to the other hotpath clocksource data. So we
still need to touch at minimum two cache lines for reading the time.

With Marcs change we have all hotpath data in a single cacheline.

Thanks,

	tglx



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19  9:27   ` Marc Gonzalez
@ 2015-11-19 10:33     ` Russell King - ARM Linux
  2015-11-19 10:36       ` Thomas Gleixner
  0 siblings, 1 reply; 19+ messages in thread
From: Russell King - ARM Linux @ 2015-11-19 10:33 UTC (permalink / raw)
  To: Marc Gonzalez
  Cc: Thomas Gleixner, Daniel Lezcano, LKML, Mans Rullgard,
	Viresh Kumar, Nicolas Pitre, Tony Lindgren, Sebastian Frias

On Thu, Nov 19, 2015 at 10:27:33AM +0100, Marc Gonzalez wrote:
> On 18/11/2015 18:21, Russell King - ARM Linux wrote:
> > On Wed, Nov 18, 2015 at 02:43:34PM +0100, Marc Gonzalez wrote:
> >
> >> Since 'struct clocksource' is ____cacheline_aligned, gcc must insert
> >> a lot of padding between reg and clksrc in 'struct clocksource_mmio'
> >> (for example, L1_CACHE_BYTES = 64 on ARMv7).
> >>
> >> Storing reg within 'struct clocksource' removes unnecessary padding,
> >> and reg can then be grouped with other hot data. A nice side-effect
> >> of this patch is making container_of() unnecessary, which makes the
> >> code a bit simpler.
> >>
> >> On 32-bit platforms, reg fits in the padding between read and mask,
> >> meaning no downside from storing it there.
> > 
> > Just swap the order of 'reg' and 'clksrc'.
> 
> You already suggested that the last time (April 1st).
> What problem is this supposed to solve?
> Swapping the fields does not change the amount of padding required,
> and does not place reg close to the hot data.
> 
> On a 32-bit platform, with L1_CACHE_BYTES = 64
> 
> sizeof(struct unaligned_clocksource) = 80
> sizeof(struct clocksource) = 128
> sizeof(struct clocksource_mmio)  = 192, reg at +0,   clksrc at +64
> sizeof(struct clocksource_mmio2) = 192, reg at +128, clksrc at +0
> 
> Same amount of padding.

Maybe the ____cacheline_aligned is inappropriate then, because it means
any wrapping of struct clocksource has exactly the same problem.

-- 
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 10:33   ` Thomas Gleixner
@ 2015-11-19 10:36     ` Russell King - ARM Linux
  2015-11-19 10:42       ` Thomas Gleixner
  2015-11-19 10:55       ` [PATCH] clocksource: Store reg field within struct clocksource Marc Gonzalez
  0 siblings, 2 replies; 19+ messages in thread
From: Russell King - ARM Linux @ 2015-11-19 10:36 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Marc Gonzalez, Daniel Lezcano, LKML, Mans Rullgard, Viresh Kumar,
	Nicolas Pitre, Tony Lindgren, Sebastian Frias

On Thu, Nov 19, 2015 at 11:33:47AM +0100, Thomas Gleixner wrote:
> Russell,
> 
> On Wed, 18 Nov 2015, Russell King - ARM Linux wrote:
> 
> > On Wed, Nov 18, 2015 at 02:43:34PM +0100, Marc Gonzalez wrote:
> > > Since 'struct clocksource' is ____cacheline_aligned, gcc must insert
> > > a lot of padding between reg and clksrc in 'struct clocksource_mmio'
> > > (for example, L1_CACHE_BYTES = 64 on ARMv7).
> > > 
> > > Storing reg within 'struct clocksource' removes unnecessary padding,
> > > and reg can then be grouped with other hot data. A nice side-effect
> > > of this patch is making container_of() unnecessary, which makes the
> > > code a bit simpler.
> > > 
> > > On 32-bit platforms, reg fits in the padding between read and mask,
> > > meaning no downside from storing it there.
> > 
> > Just swap the order of 'reg' and 'clksrc'.
> 
> That might reduce the memory footprint, but it does not bring the
> iomem pointer closer to the other hotpath clocksource data. So we
> still need to touch at minimum two cache lines for reading the time.
> 
> With Marcs change we have all hotpath data in a single cacheline.

Right, and what it's doing is polluting struct clocksource with lots
of ifdefs which determine how much data is contained in there.  Seems
to me to be totally insane.

The basic cause of this problem is the ____cacheline_aligned annotation
which effectively prevents wrapping struct clocksource to provide
implementation specific data.

Maybe your idea is that struct clocksource should be bloated with all
implementation specific data in the long term?

-- 
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 10:33     ` Russell King - ARM Linux
@ 2015-11-19 10:36       ` Thomas Gleixner
  2015-11-19 10:40         ` Russell King - ARM Linux
  0 siblings, 1 reply; 19+ messages in thread
From: Thomas Gleixner @ 2015-11-19 10:36 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Marc Gonzalez, Daniel Lezcano, LKML, Mans Rullgard, Viresh Kumar,
	Nicolas Pitre, Tony Lindgren, Sebastian Frias

On Thu, 19 Nov 2015, Russell King - ARM Linux wrote:
> On Thu, Nov 19, 2015 at 10:27:33AM +0100, Marc Gonzalez wrote:
> > On 18/11/2015 18:21, Russell King - ARM Linux wrote:
> > > On Wed, Nov 18, 2015 at 02:43:34PM +0100, Marc Gonzalez wrote:
> > >
> > >> Since 'struct clocksource' is ____cacheline_aligned, gcc must insert
> > >> a lot of padding between reg and clksrc in 'struct clocksource_mmio'
> > >> (for example, L1_CACHE_BYTES = 64 on ARMv7).
> > >>
> > >> Storing reg within 'struct clocksource' removes unnecessary padding,
> > >> and reg can then be grouped with other hot data. A nice side-effect
> > >> of this patch is making container_of() unnecessary, which makes the
> > >> code a bit simpler.
> > >>
> > >> On 32-bit platforms, reg fits in the padding between read and mask,
> > >> meaning no downside from storing it there.
> > > 
> > > Just swap the order of 'reg' and 'clksrc'.
> > 
> > You already suggested that the last time (April 1st).
> > What problem is this supposed to solve?
> > Swapping the fields does not change the amount of padding required,
> > and does not place reg close to the hot data.
> > 
> > On a 32-bit platform, with L1_CACHE_BYTES = 64
> > 
> > sizeof(struct unaligned_clocksource) = 80
> > sizeof(struct clocksource) = 128
> > sizeof(struct clocksource_mmio)  = 192, reg at +0,   clksrc at +64
> > sizeof(struct clocksource_mmio2) = 192, reg at +128, clksrc at +0
> > 
> > Same amount of padding.
> 
> Maybe the ____cacheline_aligned is inappropriate then, because it means
> any wrapping of struct clocksource has exactly the same problem.

We could do that, but that does not necessarily solve the cache
footprint issue. Aside of that we'd need to add ____cacheline_aligned
to quite some of the statically allocated clocksource declarations.

Thanks,

	tglx


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 10:36       ` Thomas Gleixner
@ 2015-11-19 10:40         ` Russell King - ARM Linux
  0 siblings, 0 replies; 19+ messages in thread
From: Russell King - ARM Linux @ 2015-11-19 10:40 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Marc Gonzalez, Daniel Lezcano, LKML, Mans Rullgard, Viresh Kumar,
	Nicolas Pitre, Tony Lindgren, Sebastian Frias

On Thu, Nov 19, 2015 at 11:36:53AM +0100, Thomas Gleixner wrote:
> On Thu, 19 Nov 2015, Russell King - ARM Linux wrote:
> > Maybe the ____cacheline_aligned is inappropriate then, because it means
> > any wrapping of struct clocksource has exactly the same problem.
> 
> We could do that, but that does not necessarily solve the cache
> footprint issue. Aside of that we'd need to add ____cacheline_aligned
> to quite some of the statically allocated clocksource declarations.

Sorry, but it does solve the cache footprint issue, because it would
have the effect of moving 'reg' right into the same cache line as
the 'read' pointer.

Yes, we'd need ____cacheline_aligned at the static declarations, but
surely that's better than constantly having to stuff implementation
specific data into struct clocksource.

-- 
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 10:36     ` Russell King - ARM Linux
@ 2015-11-19 10:42       ` Thomas Gleixner
  2015-11-19 11:08         ` Russell King - ARM Linux
  2015-11-19 10:55       ` [PATCH] clocksource: Store reg field within struct clocksource Marc Gonzalez
  1 sibling, 1 reply; 19+ messages in thread
From: Thomas Gleixner @ 2015-11-19 10:42 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Marc Gonzalez, Daniel Lezcano, LKML, Mans Rullgard, Viresh Kumar,
	Nicolas Pitre, Tony Lindgren, Sebastian Frias

On Thu, 19 Nov 2015, Russell King - ARM Linux wrote:
> The basic cause of this problem is the ____cacheline_aligned annotation
> which effectively prevents wrapping struct clocksource to provide
> implementation specific data.
>
> Maybe your idea is that struct clocksource should be bloated with all
> implementation specific data in the long term?

Certainly not. That mmio use case is sane enough, but you are right,
that we should try to lift that ____cacheline_aligned restriction.

We have 77 instances of static struct clocksource declaration...

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 10:36     ` Russell King - ARM Linux
  2015-11-19 10:42       ` Thomas Gleixner
@ 2015-11-19 10:55       ` Marc Gonzalez
  2015-11-19 11:21         ` Russell King - ARM Linux
  1 sibling, 1 reply; 19+ messages in thread
From: Marc Gonzalez @ 2015-11-19 10:55 UTC (permalink / raw)
  To: Russell King - ARM Linux, Thomas Gleixner
  Cc: Daniel Lezcano, LKML, Mans Rullgard, Viresh Kumar, Nicolas Pitre,
	Tony Lindgren, Sebastian Frias

On 19/11/2015 11:36, Russell King - ARM Linux wrote:
> On Thu, Nov 19, 2015 at 11:33:47AM +0100, Thomas Gleixner wrote:
>> Russell,
>>
>> On Wed, 18 Nov 2015, Russell King - ARM Linux wrote:
>>
>>> On Wed, Nov 18, 2015 at 02:43:34PM +0100, Marc Gonzalez wrote:
>>>> Since 'struct clocksource' is ____cacheline_aligned, gcc must insert
>>>> a lot of padding between reg and clksrc in 'struct clocksource_mmio'
>>>> (for example, L1_CACHE_BYTES = 64 on ARMv7).
>>>>
>>>> Storing reg within 'struct clocksource' removes unnecessary padding,
>>>> and reg can then be grouped with other hot data. A nice side-effect
>>>> of this patch is making container_of() unnecessary, which makes the
>>>> code a bit simpler.
>>>>
>>>> On 32-bit platforms, reg fits in the padding between read and mask,
>>>> meaning no downside from storing it there.
>>>
>>> Just swap the order of 'reg' and 'clksrc'.
>>
>> That might reduce the memory footprint, but it does not bring the
>> iomem pointer closer to the other hotpath clocksource data. So we
>> still need to touch at minimum two cache lines for reading the time.
>>
>> With Marc's change we have all hotpath data in a single cacheline.
> 
> Right, and what it's doing is polluting struct clocksource with lots
> of ifdefs which determine how much data is contained in there.  Seems
> to me to be totally insane.

What I find puzzling is that you would consider the read field to be
a bona fide member of struct clocksource, but reg (the address passed
to read) does not belong inside the struct?

If you just object to the ifdef, then perhaps 'reg' can be included
unconditionally.

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 278dd279a7a8..50725fd23ab0 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -69,6 +69,7 @@ struct clocksource {
 	 * clocksource itself is cacheline aligned.
 	 */
 	cycle_t (*read)(struct clocksource *cs);
+	void __iomem *reg;
 	cycle_t mask;
 	u32 mult;
 	u32 shift;

> The basic cause of this problem is the ____cacheline_aligned annotation
> which effectively prevents wrapping struct clocksource to provide
> implementation specific data.

True. But note that placing reg inside struct clocksource comes
for free on 32-bit platforms (it just replaces padding).

> Maybe your idea is that struct clocksource should be bloated with all
> implementation specific data in the long term?

reg is not implementation-specific data, right?

Regards.


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 10:42       ` Thomas Gleixner
@ 2015-11-19 11:08         ` Russell King - ARM Linux
  2015-11-19 11:14           ` Thomas Gleixner
  0 siblings, 1 reply; 19+ messages in thread
From: Russell King - ARM Linux @ 2015-11-19 11:08 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Marc Gonzalez, Daniel Lezcano, LKML, Mans Rullgard, Viresh Kumar,
	Nicolas Pitre, Tony Lindgren, Sebastian Frias

On Thu, Nov 19, 2015 at 11:42:48AM +0100, Thomas Gleixner wrote:
> On Thu, 19 Nov 2015, Russell King - ARM Linux wrote:
> > The basic cause of this problem is the ____cacheline_aligned annotation
> > which effectively prevents wrapping struct clocksource to provide
> > implementation specific data.
> >
> > Maybe your idea is that struct clocksource should be bloated with all
> > implementation specific data in the long term?
> 
> Certainly not. That mmio use case is sane enough, but you are right,
> that we should try to lift that ____cacheline_aligned restriction.

I don't think the cache line alignment of struct clocksource matters
anymore - the core timekeeping code no longer uses struct clocksource
but instead uses struct timekeeper, which caches much of the data from
struct clocksource.  The only member of struct clocksource which it
does access is max_cycles, which is more than 32 bytes into struct
clocksource.

So, I see no reason to waste memory with all these struct clocksources
being bloated out to cacheline alignments.  In addition, once
____cacheline_aligned is removed, I see no reason for Marc's change
either.

-- 
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 11:08         ` Russell King - ARM Linux
@ 2015-11-19 11:14           ` Thomas Gleixner
  2015-11-19 12:26             ` Marc Gonzalez
  2015-11-25 21:33             ` [tip:timers/core] timekeeping: Lift clocksource cacheline restriction tip-bot for Thomas Gleixner
  0 siblings, 2 replies; 19+ messages in thread
From: Thomas Gleixner @ 2015-11-19 11:14 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Marc Gonzalez, Daniel Lezcano, LKML, Mans Rullgard, Viresh Kumar,
	Nicolas Pitre, Tony Lindgren, Sebastian Frias

On Thu, 19 Nov 2015, Russell King - ARM Linux wrote:

> On Thu, Nov 19, 2015 at 11:42:48AM +0100, Thomas Gleixner wrote:
> > On Thu, 19 Nov 2015, Russell King - ARM Linux wrote:
> > > The basic cause of this problem is the ____cacheline_aligned annotation
> > > which effectively prevents wrapping struct clocksource to provide
> > > implementation specific data.
> > >
> > > Maybe your idea is that struct clocksource should be bloated with all
> > > implementation specific data in the long term?
> > 
> > Certainly not. That mmio use case is sane enough, but you are right,
> > that we should try to lift that ____cacheline_aligned restriction.
> 
> I don't think the cache line alignment of struct clocksource matters
> anymore - the core timekeeping code no longer uses struct clocksource
> but instead uses struct timekeeper, which caches much of the data from
> struct clocksource.  The only member of struct clocksource which it
> does access is max_cycles, which is more than 32 bytes into struct
> clocksource.
>
> So, I see no reason to waste memory with all these struct clocksources
> being bloated out to cacheline alignments.  In addition, once
> ____cacheline_aligned is removed, I see no reason for Marc's change
> either.

Right. I completely forgot that I rewrote the core part some time
ago. I'm older than 50, so I'm entitled to use the beginning Alzheimer
excuse. :)

So yes, the alignment of the clocksource struct is not longer
relevant. The case where we access clocksource->max_cycles is when
CONFIG_DEBUG_TIMEKEEPING is enabled, which imposes worse performance
problems to timekeeping than the extra cacheline.

So the simple solution for this issue is indeed the one liner below.

Thanks,

	tglx

8<-------------------

--- tip.orig/include/linux/clocksource.h
+++ tip/include/linux/clocksource.h
@@ -95,7 +95,7 @@ struct clocksource {
 	cycle_t wd_last;
 #endif
 	struct module *owner;
-} ____cacheline_aligned;
+}
 
 /*
  * Clock source flags bits::


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 10:55       ` [PATCH] clocksource: Store reg field within struct clocksource Marc Gonzalez
@ 2015-11-19 11:21         ` Russell King - ARM Linux
  2015-11-19 12:41           ` Marc Gonzalez
  0 siblings, 1 reply; 19+ messages in thread
From: Russell King - ARM Linux @ 2015-11-19 11:21 UTC (permalink / raw)
  To: Marc Gonzalez
  Cc: Thomas Gleixner, Daniel Lezcano, LKML, Mans Rullgard,
	Viresh Kumar, Nicolas Pitre, Tony Lindgren, Sebastian Frias

On Thu, Nov 19, 2015 at 11:55:46AM +0100, Marc Gonzalez wrote:
> If you just object to the ifdef, then perhaps 'reg' can be included
> unconditionally.
> 
> diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
> index 278dd279a7a8..50725fd23ab0 100644
> --- a/include/linux/clocksource.h
> +++ b/include/linux/clocksource.h
> @@ -69,6 +69,7 @@ struct clocksource {
>  	 * clocksource itself is cacheline aligned.
>  	 */
>  	cycle_t (*read)(struct clocksource *cs);
> +	void __iomem *reg;
>  	cycle_t mask;
>  	u32 mult;
>  	u32 shift;
> 
> > The basic cause of this problem is the ____cacheline_aligned annotation
> > which effectively prevents wrapping struct clocksource to provide
> > implementation specific data.
> 
> True. But note that placing reg inside struct clocksource comes
> for free on 32-bit platforms (it just replaces padding).

Yes, I'd object less if it's just replacing padding.

> > Maybe your idea is that struct clocksource should be bloated with all
> > implementation specific data in the long term?
> 
> reg is not implementation-specific data, right?

That depends on how you look at what consitutes "implementation specific".

The vast majority of clocksource drivers access some non-MMIO register,
or some register that they need to store the __iomem pointer externally
for other reasons.  The original clocksource design did not have any
iomem pointer.

When I wrote the MMIO clocksource implementation, there was no
____cacheline_aligned on struct clocksource, and the arrangement I came
to for the structure put the 'reg' and 'read' within the same cache line
(note that the MMIO clocksource pre-dates Thomas' rearrangement of struct
clocksource and the addition of the cache line alignment.)  The original
layout did not have any padding gaps.

So, it was pointless to add a __iomem pointer to the main structure,
which would have bloated the struct for every user, and it made no sense
what so ever to do that.

Things may have changed, and there may be padding, but things have changed
again which actually mean that very little of struct clocksource will be
in a cache line when the ->read function is called, so the whole idea of
fitting things into one cache line here is totally irrelevant anymore.

-- 
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 11:14           ` Thomas Gleixner
@ 2015-11-19 12:26             ` Marc Gonzalez
  2015-11-19 13:57               ` Thomas Gleixner
  2015-11-25 21:33             ` [tip:timers/core] timekeeping: Lift clocksource cacheline restriction tip-bot for Thomas Gleixner
  1 sibling, 1 reply; 19+ messages in thread
From: Marc Gonzalez @ 2015-11-19 12:26 UTC (permalink / raw)
  To: Thomas Gleixner, Russell King - ARM Linux
  Cc: Daniel Lezcano, LKML, Mans Rullgard, Viresh Kumar, Nicolas Pitre,
	Tony Lindgren, Sebastian Frias

On 19/11/2015 12:14, Thomas Gleixner wrote:

> So yes, the alignment of the clocksource struct is not longer
> relevant. The case where we access clocksource->max_cycles is when
> CONFIG_DEBUG_TIMEKEEPING is enabled, which imposes worse performance
> problems to timekeeping than the extra cacheline.
> 
> So the simple solution for this issue is indeed the one liner below.

It would make sense to also remove the comment emphasizing the
alignment requirement.

Regards.

8<-------------------

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 278dd279a7a8..6a0f86a9a92d 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -64,10 +64,6 @@ struct module;
  * @owner:             module reference, must be set by clocksource in modules
  */
 struct clocksource {
-       /*
-        * Hotpath data, fits in a single cache line when the
-        * clocksource itself is cacheline aligned.
-        */
        cycle_t (*read)(struct clocksource *cs);
        cycle_t mask;
        u32 mult;
@@ -95,7 +91,7 @@ struct clocksource {
        cycle_t wd_last;
 #endif
        struct module *owner;
-} ____cacheline_aligned;
+};
 
 /*
  * Clock source flags bits::


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 11:21         ` Russell King - ARM Linux
@ 2015-11-19 12:41           ` Marc Gonzalez
  0 siblings, 0 replies; 19+ messages in thread
From: Marc Gonzalez @ 2015-11-19 12:41 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Thomas Gleixner, Daniel Lezcano, LKML, Mans Rullgard,
	Viresh Kumar, Nicolas Pitre, Tony Lindgren, Sebastian Frias

On 19/11/2015 12:21, Russell King wrote:

> When I wrote the MMIO clocksource implementation, there was no
> ____cacheline_aligned on struct clocksource, and the arrangement I came
> to for the structure put the 'reg' and 'read' within the same cache line
> (note that the MMIO clocksource pre-dates Thomas' rearrangement of struct
> clocksource and the addition of the cache line alignment.)  The original
> layout did not have any padding gaps.

For the record, I pointed out the chronology in a previous discussion.
But Thomas didn't comment at the time :-(

http://thread.gmane.org/gmane.linux.ports.arm.kernel/402968/focus=403604

Mason wrote:
> Oh and while I have your attention ;-) I have alignment-related
> questions about clocksource_mmio_init() (commit 442c8176d2) wrt
> Thomas Gleixner's 369db4c952 patch. (I think the two patches
> do not play nice.)

Regards.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 12:26             ` Marc Gonzalez
@ 2015-11-19 13:57               ` Thomas Gleixner
  2015-11-24 12:36                 ` Marc Gonzalez
  0 siblings, 1 reply; 19+ messages in thread
From: Thomas Gleixner @ 2015-11-19 13:57 UTC (permalink / raw)
  To: Marc Gonzalez
  Cc: Russell King - ARM Linux, Daniel Lezcano, LKML, Mans Rullgard,
	Viresh Kumar, Nicolas Pitre, Tony Lindgren, Sebastian Frias

On Thu, 19 Nov 2015, Marc Gonzalez wrote:
> On 19/11/2015 12:14, Thomas Gleixner wrote:
> 
> > So yes, the alignment of the clocksource struct is not longer
> > relevant. The case where we access clocksource->max_cycles is when
> > CONFIG_DEBUG_TIMEKEEPING is enabled, which imposes worse performance
> > problems to timekeeping than the extra cacheline.
> > 
> > So the simple solution for this issue is indeed the one liner below.
> 
> It would make sense to also remove the comment emphasizing the
> alignment requirement.

Indeed.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] clocksource: Store reg field within struct clocksource
  2015-11-19 13:57               ` Thomas Gleixner
@ 2015-11-24 12:36                 ` Marc Gonzalez
  0 siblings, 0 replies; 19+ messages in thread
From: Marc Gonzalez @ 2015-11-24 12:36 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Russell King - ARM Linux, Daniel Lezcano, LKML, Mans Rullgard,
	Viresh Kumar, Nicolas Pitre, Tony Lindgren, Sebastian Frias

On 19/11/2015 14:57, Thomas Gleixner wrote:
> On Thu, 19 Nov 2015, Marc Gonzalez wrote:
>> On 19/11/2015 12:14, Thomas Gleixner wrote:
>>
>>> So yes, the alignment of the clocksource struct is not longer
>>> relevant. The case where we access clocksource->max_cycles is when
>>> CONFIG_DEBUG_TIMEKEEPING is enabled, which imposes worse performance
>>> problems to timekeeping than the extra cacheline.
>>>
>>> So the simple solution for this issue is indeed the one liner below.
>>
>> It would make sense to also remove the comment emphasizing the
>> alignment requirement.
> 
> Indeed.

Thomas,

Will you commit that patch yourself?

Regards.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [tip:timers/core] timekeeping: Lift clocksource cacheline restriction
  2015-11-19 11:14           ` Thomas Gleixner
  2015-11-19 12:26             ` Marc Gonzalez
@ 2015-11-25 21:33             ` tip-bot for Thomas Gleixner
  1 sibling, 0 replies; 19+ messages in thread
From: tip-bot for Thomas Gleixner @ 2015-11-25 21:33 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: viresh.kumar, sebastian_frias, mingo, hpa, nico, linux-kernel,
	mans, tony, tglx, daniel.lezcano, linux, marc_gonzalez,
	john.stultz

Commit-ID:  09a9982016499daeb3fbee5ac8d87797310a565a
Gitweb:     http://git.kernel.org/tip/09a9982016499daeb3fbee5ac8d87797310a565a
Author:     Thomas Gleixner <tglx@linutronix.de>
AuthorDate: Thu, 19 Nov 2015 11:43:09 +0100
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Wed, 25 Nov 2015 22:28:30 +0100

timekeeping: Lift clocksource cacheline restriction

We cache all hotpath members of a clocksource in the time keeper
core. So there is no requirement in general to cache line align struct
clocksource. Remove the enforces alignment.

That allows users which need to wrap struct clocksource into their own
struct to align the struct without getting extra padding.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
Cc: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Mans Rullgard <mans@mansr.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Sebastian Frias <sebastian_frias@sigmadesigns.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1511191209000.3898@nanos
---
 include/linux/clocksource.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7784b59..6013021 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -62,12 +62,18 @@ struct module;
  * @suspend:		suspend function for the clocksource, if necessary
  * @resume:		resume function for the clocksource, if necessary
  * @owner:		module reference, must be set by clocksource in modules
+ *
+ * Note: This struct is not used in hotpathes of the timekeeping code
+ * because the timekeeper caches the hot path fields in its own data
+ * structure, so no line cache alignment is required,
+ *
+ * The pointer to the clocksource itself is handed to the read
+ * callback. If you need extra information there you can wrap struct
+ * clocksource into your own struct. Depending on the amount of
+ * information you need you should consider to cache line align that
+ * structure.
  */
 struct clocksource {
-	/*
-	 * Hotpath data, fits in a single cache line when the
-	 * clocksource itself is cacheline aligned.
-	 */
 	cycle_t (*read)(struct clocksource *cs);
 	cycle_t mask;
 	u32 mult;
@@ -95,7 +101,7 @@ struct clocksource {
 	cycle_t wd_last;
 #endif
 	struct module *owner;
-} ____cacheline_aligned;
+};
 
 /*
  * Clock source flags bits::

^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2015-11-25 21:34 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-18 13:43 [PATCH] clocksource: Store reg field within struct clocksource Marc Gonzalez
2015-11-18 13:51 ` Måns Rullgård
2015-11-18 17:21 ` Russell King - ARM Linux
2015-11-19  9:27   ` Marc Gonzalez
2015-11-19 10:33     ` Russell King - ARM Linux
2015-11-19 10:36       ` Thomas Gleixner
2015-11-19 10:40         ` Russell King - ARM Linux
2015-11-19 10:33   ` Thomas Gleixner
2015-11-19 10:36     ` Russell King - ARM Linux
2015-11-19 10:42       ` Thomas Gleixner
2015-11-19 11:08         ` Russell King - ARM Linux
2015-11-19 11:14           ` Thomas Gleixner
2015-11-19 12:26             ` Marc Gonzalez
2015-11-19 13:57               ` Thomas Gleixner
2015-11-24 12:36                 ` Marc Gonzalez
2015-11-25 21:33             ` [tip:timers/core] timekeeping: Lift clocksource cacheline restriction tip-bot for Thomas Gleixner
2015-11-19 10:55       ` [PATCH] clocksource: Store reg field within struct clocksource Marc Gonzalez
2015-11-19 11:21         ` Russell King - ARM Linux
2015-11-19 12:41           ` Marc Gonzalez

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.