All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched/fair: provide u64 read for 32-bits arch helper
@ 2020-07-27 10:59 vincent.donnefort
  2020-07-27 11:24 ` Ingo Molnar
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: vincent.donnefort @ 2020-07-27 10:59 UTC (permalink / raw)
  To: mingo, peterz, vincent.guittot
  Cc: linux-kernel, dietmar.eggemann, lukasz.luba, valentin.schneider,
	Vincent Donnefort

From: Vincent Donnefort <vincent.donnefort@arm.com>

Introducing two macro helpers u64_32read() and u64_32read_set_copy() to
factorize the u64 vminruntime and last_update_time read on a 32-bits
architecture. Those new helpers encapsulate smp_rmb() and smp_wmb()
synchronization and therefore, have a small penalty in set_task_rq_fair()
and init_cfs_rq().

The choice of using a macro over an inline function is driven by the
conditional u64 variable copy declarations.

  #ifndef CONFIG_64BIT
     u64 [vminruntime|last_update_time]_copy;
  #endif

Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1a68a05..00a825d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -563,10 +563,8 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
 
 	/* ensure we never gain time by being placed backwards. */
 	cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
-#ifndef CONFIG_64BIT
-	smp_wmb();
-	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
-#endif
+
+	u64_32read_set_copy(cfs_rq->min_vruntime, cfs_rq->min_vruntime_copy);
 }
 
 /*
@@ -3284,6 +3282,11 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
 }
 
 #ifdef CONFIG_SMP
+static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
+{
+	return u64_32read(cfs_rq->avg.last_update_time,
+			  cfs_rq->load_last_update_time_copy);
+}
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /**
  * update_tg_load_avg - update the tg's load avg
@@ -3340,27 +3343,9 @@ void set_task_rq_fair(struct sched_entity *se,
 	if (!(se->avg.last_update_time && prev))
 		return;
 
-#ifndef CONFIG_64BIT
-	{
-		u64 p_last_update_time_copy;
-		u64 n_last_update_time_copy;
-
-		do {
-			p_last_update_time_copy = prev->load_last_update_time_copy;
-			n_last_update_time_copy = next->load_last_update_time_copy;
-
-			smp_rmb();
+	p_last_update_time = cfs_rq_last_update_time(prev);
+	n_last_update_time = cfs_rq_last_update_time(next);
 
-			p_last_update_time = prev->avg.last_update_time;
-			n_last_update_time = next->avg.last_update_time;
-
-		} while (p_last_update_time != p_last_update_time_copy ||
-			 n_last_update_time != n_last_update_time_copy);
-	}
-#else
-	p_last_update_time = prev->avg.last_update_time;
-	n_last_update_time = next->avg.last_update_time;
-#endif
 	__update_load_avg_blocked_se(p_last_update_time, se);
 	se->avg.last_update_time = n_last_update_time;
 }
@@ -3681,10 +3666,8 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 
 	decayed |= __update_load_avg_cfs_rq(now, cfs_rq);
 
-#ifndef CONFIG_64BIT
-	smp_wmb();
-	cfs_rq->load_last_update_time_copy = sa->last_update_time;
-#endif
+	u64_32read_set_copy(sa->last_update_time,
+			    cfs_rq->load_last_update_time_copy);
 
 	return decayed;
 }
@@ -3810,27 +3793,6 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 	}
 }
 
-#ifndef CONFIG_64BIT
-static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
-{
-	u64 last_update_time_copy;
-	u64 last_update_time;
-
-	do {
-		last_update_time_copy = cfs_rq->load_last_update_time_copy;
-		smp_rmb();
-		last_update_time = cfs_rq->avg.last_update_time;
-	} while (last_update_time != last_update_time_copy);
-
-	return last_update_time;
-}
-#else
-static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
-{
-	return cfs_rq->avg.last_update_time;
-}
-#endif
-
 /*
  * Synchronize entity load avg of dequeued entity without locking
  * the previous rq.
@@ -6744,21 +6706,9 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
 	if (p->state == TASK_WAKING) {
 		struct sched_entity *se = &p->se;
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
-		u64 min_vruntime;
-
-#ifndef CONFIG_64BIT
-		u64 min_vruntime_copy;
-
-		do {
-			min_vruntime_copy = cfs_rq->min_vruntime_copy;
-			smp_rmb();
-			min_vruntime = cfs_rq->min_vruntime;
-		} while (min_vruntime != min_vruntime_copy);
-#else
-		min_vruntime = cfs_rq->min_vruntime;
-#endif
 
-		se->vruntime -= min_vruntime;
+		se->vruntime -= u64_32read(cfs_rq->min_vruntime,
+					   cfs_rq->min_vruntime_copy);
 	}
 
 	if (p->on_rq == TASK_ON_RQ_MIGRATING) {
@@ -10891,9 +10841,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
 	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
-#ifndef CONFIG_64BIT
-	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
-#endif
+	u64_32read_set_copy(cfs_rq->min_vruntime, cfs_rq->min_vruntime_copy);
 #ifdef CONFIG_SMP
 	raw_spin_lock_init(&cfs_rq->removed.lock);
 #endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9f33c77..c8c4646 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -605,6 +605,41 @@ struct cfs_rq {
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 };
 
+/*
+ * u64_32read() / u64_32read_set_copy()
+ *
+ * Use the copied u64 value to protect against data race. This is only
+ * applicable for 32-bits architectures.
+ */
+#if !defined(CONFIG_64BIT) && defined(CONFIG_SMP)
+# define u64_32read(val, copy)						\
+({									\
+	u64 _val;							\
+	u64 _val_copy;							\
+									\
+	do {								\
+		_val_copy = copy;					\
+		/*							\
+		 * paired with u64_32read_set_copy, ordering access	\
+		 * to val and copy.					\
+		 */							\
+		smp_rmb();						\
+		_val = val;						\
+	} while (_val != _val_copy);					\
+									\
+	_val;								\
+})
+# define u64_32read_set_copy(val, copy)					\
+do {									\
+	/* paired with u64_32read, ordering access to val and copy */	\
+	smp_wmb();							\
+	copy = val;							\
+} while (0)
+#else
+# define u64_32read(val, copy) (val)
+# define u64_32read_set_copy(val, copy) do { } while (0)
+#endif
+
 static inline int rt_bandwidth_enabled(void)
 {
 	return sysctl_sched_rt_runtime >= 0;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched/fair: provide u64 read for 32-bits arch helper
  2020-07-27 10:59 [PATCH] sched/fair: provide u64 read for 32-bits arch helper vincent.donnefort
@ 2020-07-27 11:24 ` Ingo Molnar
  2020-07-27 12:05   ` Vincent Donnefort
  2020-07-27 12:38 ` peterz
  2020-07-28  9:09 ` Lukasz Luba
  2 siblings, 1 reply; 10+ messages in thread
From: Ingo Molnar @ 2020-07-27 11:24 UTC (permalink / raw)
  To: vincent.donnefort
  Cc: mingo, peterz, vincent.guittot, linux-kernel, dietmar.eggemann,
	lukasz.luba, valentin.schneider


* vincent.donnefort@arm.com <vincent.donnefort@arm.com> wrote:

> From: Vincent Donnefort <vincent.donnefort@arm.com>
> 
> Introducing two macro helpers u64_32read() and u64_32read_set_copy() to
> factorize the u64 vminruntime and last_update_time read on a 32-bits
> architecture. Those new helpers encapsulate smp_rmb() and smp_wmb()
> synchronization and therefore, have a small penalty in set_task_rq_fair()
> and init_cfs_rq().
> 
> The choice of using a macro over an inline function is driven by the
> conditional u64 variable copy declarations.

Could you please explain how "conditional u64 variable copy 
declarations" prevents us to use an inline function for this:

> +/*
> + * u64_32read() / u64_32read_set_copy()
> + *
> + * Use the copied u64 value to protect against data race. This is only
> + * applicable for 32-bits architectures.
> + */
> +#if !defined(CONFIG_64BIT) && defined(CONFIG_SMP)
> +# define u64_32read(val, copy)						\
> +({									\
> +	u64 _val;							\
> +	u64 _val_copy;							\
> +									\
> +	do {								\
> +		_val_copy = copy;					\
> +		/*							\
> +		 * paired with u64_32read_set_copy, ordering access	\
> +		 * to val and copy.					\
> +		 */							\
> +		smp_rmb();						\
> +		_val = val;						\
> +	} while (_val != _val_copy);					\
> +									\
> +	_val;								\
> +})
> +# define u64_32read_set_copy(val, copy)					\
> +do {									\
> +	/* paired with u64_32read, ordering access to val and copy */	\
> +	smp_wmb();							\
> +	copy = val;							\
> +} while (0)
> +#else
> +# define u64_32read(val, copy) (val)
> +# define u64_32read_set_copy(val, copy) do { } while (0)
> +#endif
> +

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched/fair: provide u64 read for 32-bits arch helper
  2020-07-27 11:24 ` Ingo Molnar
@ 2020-07-27 12:05   ` Vincent Donnefort
  0 siblings, 0 replies; 10+ messages in thread
From: Vincent Donnefort @ 2020-07-27 12:05 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: mingo, peterz, vincent.guittot, linux-kernel, dietmar.eggemann,
	lukasz.luba, valentin.schneider

Hi,

On Mon, Jul 27, 2020 at 01:24:54PM +0200, Ingo Molnar wrote:
> 
> * vincent.donnefort@arm.com <vincent.donnefort@arm.com> wrote:
> 
> > From: Vincent Donnefort <vincent.donnefort@arm.com>
> > 
> > Introducing two macro helpers u64_32read() and u64_32read_set_copy() to
> > factorize the u64 vminruntime and last_update_time read on a 32-bits
> > architecture. Those new helpers encapsulate smp_rmb() and smp_wmb()
> > synchronization and therefore, have a small penalty in set_task_rq_fair()
> > and init_cfs_rq().
> > 
> > The choice of using a macro over an inline function is driven by the
> > conditional u64 variable copy declarations.
> 
> Could you please explain how "conditional u64 variable copy 
> declarations" prevents us to use an inline function for this

I meant, as the "copy" variable [vminruntime|last_update_time]_copy, is
declared only in the !CONFIG_64BIT case, a function call would fail when
CONFIG_64BIT is set.

   u64_32read(cfs_rq->min_vruntime, cfs_rq->min_vruntime_copy); 
						^
					  not declared

I could rephrase this part to something more understandable ?

> 
> > +/*
> > + * u64_32read() / u64_32read_set_copy()
> > + *
> > + * Use the copied u64 value to protect against data race. This is only
> > + * applicable for 32-bits architectures.
> > + */
> > +#if !defined(CONFIG_64BIT) && defined(CONFIG_SMP)
> > +# define u64_32read(val, copy)						\
> > +({									\
> > +	u64 _val;							\
> > +	u64 _val_copy;							\
> > +									\
> > +	do {								\
> > +		_val_copy = copy;					\
> > +		/*							\
> > +		 * paired with u64_32read_set_copy, ordering access	\
> > +		 * to val and copy.					\
> > +		 */							\
> > +		smp_rmb();						\
> > +		_val = val;						\
> > +	} while (_val != _val_copy);					\
> > +									\
> > +	_val;								\
> > +})
> > +# define u64_32read_set_copy(val, copy)					\
> > +do {									\
> > +	/* paired with u64_32read, ordering access to val and copy */	\
> > +	smp_wmb();							\
> > +	copy = val;							\
> > +} while (0)
> > +#else
> > +# define u64_32read(val, copy) (val)
> > +# define u64_32read_set_copy(val, copy) do { } while (0)
> > +#endif
> > +
> 
> Thanks,
> 
> 	Ingo

-- 
Vincent

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched/fair: provide u64 read for 32-bits arch helper
  2020-07-27 10:59 [PATCH] sched/fair: provide u64 read for 32-bits arch helper vincent.donnefort
  2020-07-27 11:24 ` Ingo Molnar
@ 2020-07-27 12:38 ` peterz
  2020-07-27 15:23   ` Vincent Donnefort
  2020-07-28  9:09 ` Lukasz Luba
  2 siblings, 1 reply; 10+ messages in thread
From: peterz @ 2020-07-27 12:38 UTC (permalink / raw)
  To: vincent.donnefort
  Cc: mingo, vincent.guittot, linux-kernel, dietmar.eggemann,
	lukasz.luba, valentin.schneider

On Mon, Jul 27, 2020 at 11:59:24AM +0100, vincent.donnefort@arm.com wrote:
> From: Vincent Donnefort <vincent.donnefort@arm.com>
> 
> Introducing two macro helpers u64_32read() and u64_32read_set_copy() to
> factorize the u64 vminruntime and last_update_time read on a 32-bits
> architecture. Those new helpers encapsulate smp_rmb() and smp_wmb()
> synchronization and therefore, have a small penalty in set_task_rq_fair()
> and init_cfs_rq().
> 
> The choice of using a macro over an inline function is driven by the
> conditional u64 variable copy declarations.
> 
>   #ifndef CONFIG_64BIT
>      u64 [vminruntime|last_update_time]_copy;
>   #endif

This lacks a *why*... why did you get up this morning and wrote us this
patch.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched/fair: provide u64 read for 32-bits arch helper
  2020-07-27 12:38 ` peterz
@ 2020-07-27 15:23   ` Vincent Donnefort
  2020-07-28 11:13     ` peterz
  0 siblings, 1 reply; 10+ messages in thread
From: Vincent Donnefort @ 2020-07-27 15:23 UTC (permalink / raw)
  To: peterz
  Cc: mingo, vincent.guittot, linux-kernel, dietmar.eggemann,
	lukasz.luba, valentin.schneider

On Mon, Jul 27, 2020 at 02:38:01PM +0200, peterz@infradead.org wrote:
> On Mon, Jul 27, 2020 at 11:59:24AM +0100, vincent.donnefort@arm.com wrote:
> > From: Vincent Donnefort <vincent.donnefort@arm.com>
> > 
> > Introducing two macro helpers u64_32read() and u64_32read_set_copy() to
> > factorize the u64 vminruntime and last_update_time read on a 32-bits
> > architecture. Those new helpers encapsulate smp_rmb() and smp_wmb()
> > synchronization and therefore, have a small penalty in set_task_rq_fair()
> > and init_cfs_rq().
> > 
> > The choice of using a macro over an inline function is driven by the
> > conditional u64 variable copy declarations.
> > 
> >   #ifndef CONFIG_64BIT
> >      u64 [vminruntime|last_update_time]_copy;
> >   #endif
> 
> This lacks a *why*... why did you get up this morning and wrote us this
> patch.
> 
>

For 32-bit architectures, both min_vruntime and last_update_time are using
similar access. This patch is simply an attempt to unify their usage by
introducing two macros to rely on when accessing those. At the same time, it
brings a comment regarding the barriers usage, as per the kernel policy. So
overall this is just a clean-up without any functional changes.

-- 
Vincent.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched/fair: provide u64 read for 32-bits arch helper
  2020-07-27 10:59 [PATCH] sched/fair: provide u64 read for 32-bits arch helper vincent.donnefort
  2020-07-27 11:24 ` Ingo Molnar
  2020-07-27 12:38 ` peterz
@ 2020-07-28  9:09 ` Lukasz Luba
  2 siblings, 0 replies; 10+ messages in thread
From: Lukasz Luba @ 2020-07-28  9:09 UTC (permalink / raw)
  To: vincent.donnefort
  Cc: mingo, peterz, vincent.guittot, linux-kernel, dietmar.eggemann,
	valentin.schneider

Hi Vincent,

On 7/27/20 11:59 AM, vincent.donnefort@arm.com wrote:
> From: Vincent Donnefort <vincent.donnefort@arm.com>
> 
> Introducing two macro helpers u64_32read() and u64_32read_set_copy() to
> factorize the u64 vminruntime and last_update_time read on a 32-bits
> architecture. Those new helpers encapsulate smp_rmb() and smp_wmb()
> synchronization and therefore, have a small penalty in set_task_rq_fair()
> and init_cfs_rq().
> 
> The choice of using a macro over an inline function is driven by the
> conditional u64 variable copy declarations.
> 
>    #ifndef CONFIG_64BIT
>       u64 [vminruntime|last_update_time]_copy;
>    #endif
> 
> Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
> 

I've run it on 32-bit ARM big.LITTLE platform (odroid xu3) with
the CONFIG_FAIR_GROUP_SCHED and CONFIG_CGROUP_SCHED.

I haven't observed any issues while running hackbench or booting
the kernel, the performance of affected function
set_task_rq_fair() is the same.

Function profile results after hackbench test:
--------w/ Vincent's patch + performance cpufreq gov------------
root@odroid:/sys/kernel/debug/tracing# cat trace_stat/function?
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      4068    3753.185 us     0.922 
us        1.239 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      4492    4180.133 us     0.930 
us        2.318 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      4208    3991.386 us     0.948 
us        13.552 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      4753    4432.231 us     0.932 
us        5.875 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      7980    5037.096 us     0.631 
us        1.690 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      8143    5078.515 us     0.623 
us        2.930 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      9721    6477.904 us     0.666 
us        2.425 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      7743    4896.002 us     0.632 
us        1.188 us


-----------w/o Vincent patch, performance cpufreq gov------------
root@odroid:/sys/kernel/debug/tracing# cat trace_stat/function?
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      6525    5830.450 us     0.893 
us        3.213 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      6646    6069.444 us     0.913 
us        9.651 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      5988    5646.133 us     0.942 
us        7.685 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      5883    5390.103 us     0.916 
us        29.283 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      3844    2561.186 us     0.666 
us        0.933 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      5515    3491.011 us     0.633 
us        9.845 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      6947    4808.522 us     0.692 
us        5.822 us
   Function                               Hit    Time            Avg 
         s^2
   --------                               ---    ----            --- 
         ---
   set_task_rq_fair                      4530    2810.000 us     0.620 
us        0.554 us


Tested-by: Lukasz Luba <lukasz.luba@arm.com>

Regards,
Lukasz

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched/fair: provide u64 read for 32-bits arch helper
  2020-07-27 15:23   ` Vincent Donnefort
@ 2020-07-28 11:13     ` peterz
  2020-07-28 12:00       ` peterz
  0 siblings, 1 reply; 10+ messages in thread
From: peterz @ 2020-07-28 11:13 UTC (permalink / raw)
  To: Vincent Donnefort
  Cc: mingo, vincent.guittot, linux-kernel, dietmar.eggemann,
	lukasz.luba, valentin.schneider

On Mon, Jul 27, 2020 at 04:23:03PM +0100, Vincent Donnefort wrote:

> For 32-bit architectures, both min_vruntime and last_update_time are using
> similar access. This patch is simply an attempt to unify their usage by
> introducing two macros to rely on when accessing those. At the same time, it
> brings a comment regarding the barriers usage, as per the kernel policy. So
> overall this is just a clean-up without any functional changes.

Ah, I though there was perhaps the idea to make use of armv7-lpae
instructions.

Aside of that, I think we need to spend a little time bike-shedding the
API/naming here:

> +# define u64_32read(val, copy) (val)
> +# define u64_32read_set_copy(val, copy) do { } while (0)

How about something like:

#ifdef CONFIG_64BIT

#define DEFINE_U64_U32(name)	u64 name
#define u64_u32_load(name)	name
#define u64_u32_store(name, val)name = val

#else

#define DEFINE_U64_U32(name)			\
	struct {				\
		u64 name;			\
		u64 name##_copy;		\
	}

#define u64_u32_load(name)			\
	({					\
		u64 val, copy;			\
		do {				\
			val = name;		\
			smb_rmb();		\
			copy = name##_copy;	\
		} while (val != copy);		\
		val;
	})

#define u64_u32_store(name, val)		\
	do {					\
		typeof(val) __val = (val);	\
		name = __val;			\
		smp_wmb();			\
		name##_copy = __val;		\
	} while (0)

#endif



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched/fair: provide u64 read for 32-bits arch helper
  2020-07-28 11:13     ` peterz
@ 2020-07-28 12:00       ` peterz
  2020-07-28 19:53         ` Vincent Donnefort
  0 siblings, 1 reply; 10+ messages in thread
From: peterz @ 2020-07-28 12:00 UTC (permalink / raw)
  To: Vincent Donnefort
  Cc: mingo, vincent.guittot, linux-kernel, dietmar.eggemann,
	lukasz.luba, valentin.schneider

On Tue, Jul 28, 2020 at 01:13:02PM +0200, peterz@infradead.org wrote:
> On Mon, Jul 27, 2020 at 04:23:03PM +0100, Vincent Donnefort wrote:
> 
> > For 32-bit architectures, both min_vruntime and last_update_time are using
> > similar access. This patch is simply an attempt to unify their usage by
> > introducing two macros to rely on when accessing those. At the same time, it
> > brings a comment regarding the barriers usage, as per the kernel policy. So
> > overall this is just a clean-up without any functional changes.
> 
> Ah, I though there was perhaps the idea to make use of armv7-lpae
> instructions.
> 
> Aside of that, I think we need to spend a little time bike-shedding the
> API/naming here:
> 
> > +# define u64_32read(val, copy) (val)
> > +# define u64_32read_set_copy(val, copy) do { } while (0)
> 
> How about something like:
> 
> #ifdef CONFIG_64BIT
> 
> #define DEFINE_U64_U32(name)	u64 name
> #define u64_u32_load(name)	name
> #define u64_u32_store(name, val)name = val
> 
> #else
> 
> #define DEFINE_U64_U32(name)			\
> 	struct {				\
> 		u64 name;			\
> 		u64 name##_copy;		\
> 	}
> 
> #define u64_u32_load(name)			\
> 	({					\
> 		u64 val, copy;			\
> 		do {				\
> 			val = name;		\
> 			smb_rmb();		\
> 			copy = name##_copy;	\
> 		} while (val != copy);		\

wrong order there; we should first read _copy and then the regular one
of course.

> 		val;
> 	})
> 
> #define u64_u32_store(name, val)		\
> 	do {					\
> 		typeof(val) __val = (val);	\
> 		name = __val;			\
> 		smp_wmb();			\
> 		name##_copy = __val;		\
> 	} while (0)
> 
> #endif

The other approach is making it a full type and inline functions I
suppose.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched/fair: provide u64 read for 32-bits arch helper
  2020-07-28 12:00       ` peterz
@ 2020-07-28 19:53         ` Vincent Donnefort
  2020-08-18 18:11           ` Vincent Donnefort
  0 siblings, 1 reply; 10+ messages in thread
From: Vincent Donnefort @ 2020-07-28 19:53 UTC (permalink / raw)
  To: peterz
  Cc: mingo, vincent.guittot, linux-kernel, dietmar.eggemann,
	lukasz.luba, valentin.schneider

Hi,

On Tue, Jul 28, 2020 at 02:00:27PM +0200, peterz@infradead.org wrote:
> On Tue, Jul 28, 2020 at 01:13:02PM +0200, peterz@infradead.org wrote:
> > On Mon, Jul 27, 2020 at 04:23:03PM +0100, Vincent Donnefort wrote:
> > 
> > > For 32-bit architectures, both min_vruntime and last_update_time are using
> > > similar access. This patch is simply an attempt to unify their usage by
> > > introducing two macros to rely on when accessing those. At the same time, it
> > > brings a comment regarding the barriers usage, as per the kernel policy. So
> > > overall this is just a clean-up without any functional changes.
> > 
> > Ah, I though there was perhaps the idea to make use of armv7-lpae
> > instructions.
> > 
> > Aside of that, I think we need to spend a little time bike-shedding the
> > API/naming here:
> > 
> > > +# define u64_32read(val, copy) (val)
> > > +# define u64_32read_set_copy(val, copy) do { } while (0)
> > 
> > How about something like:
> > 
> > #ifdef CONFIG_64BIT
> > 
> > #define DEFINE_U64_U32(name)	u64 name
> > #define u64_u32_load(name)	name
> > #define u64_u32_store(name, val)name = val
> > 
> > #else
> > 
> > #define DEFINE_U64_U32(name)			\
> > 	struct {				\
> > 		u64 name;			\
> > 		u64 name##_copy;		\
> > 	}
> > 
> > #define u64_u32_load(name)			\
> > 	({					\
> > 		u64 val, copy;			\
> > 		do {				\
> > 			val = name;		\
> > 			smb_rmb();		\
> > 			copy = name##_copy;	\
> > 		} while (val != copy);		\
> 
> wrong order there; we should first read _copy and then the regular one
> of course.
> 
> > 		val;
> > 	})
> > 
> > #define u64_u32_store(name, val)		\
> > 	do {					\
> > 		typeof(val) __val = (val);	\
> > 		name = __val;			\
> > 		smp_wmb();			\
> > 		name##_copy = __val;		\
> > 	} while (0)
> > 
> > #endif
> 
> The other approach is making it a full type and inline functions I
> suppose.

I didn't pick this approach originally, as I thought it would be way too
invasive. If the API looks way cleaner, it nonetheless doesn't match
last_update_time usage. The variable is declared in sched_avg while its copy
is in struct cfs_rq.

Moving the copy in sched_avg would mean:

 * Setting the copy for all struct sched_avg in ___update_load_sum(), instead
   of just the cfs_rq.avg in update_cfs_rq_load_avg().

 * Having the DEFINE_U64_U32 declaration in include/linux/sched.h to cover
   struct sched_avg.

-- 
Vincent

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] sched/fair: provide u64 read for 32-bits arch helper
  2020-07-28 19:53         ` Vincent Donnefort
@ 2020-08-18 18:11           ` Vincent Donnefort
  0 siblings, 0 replies; 10+ messages in thread
From: Vincent Donnefort @ 2020-08-18 18:11 UTC (permalink / raw)
  To: peterz
  Cc: mingo, vincent.guittot, linux-kernel, dietmar.eggemann,
	lukasz.luba, valentin.schneider

Hi Peter,

[...]

> > > 
> > > How about something like:
> > > 
> > > #ifdef CONFIG_64BIT
> > > 
> > > #define DEFINE_U64_U32(name)	u64 name
> > > #define u64_u32_load(name)	name
> > > #define u64_u32_store(name, val)name = val
> > > 
> > > #else
> > > 
> > > #define DEFINE_U64_U32(name)			\
> > > 	struct {				\
> > > 		u64 name;			\
> > > 		u64 name##_copy;		\
> > > 	}
> > > 
> > > #define u64_u32_load(name)			\
> > > 	({					\
> > > 		u64 val, copy;			\
> > > 		do {				\
> > > 			val = name;		\
> > > 			smb_rmb();		\
> > > 			copy = name##_copy;	\
> > > 		} while (val != copy);		\
> > 
> > wrong order there; we should first read _copy and then the regular one
> > of course.
> > 
> > > 		val;
> > > 	})
> > > 
> > > #define u64_u32_store(name, val)		\
> > > 	do {					\
> > > 		typeof(val) __val = (val);	\
> > > 		name = __val;			\
> > > 		smp_wmb();			\
> > > 		name##_copy = __val;		\
> > > 	} while (0)
> > > 
> > > #endif
> > 
> > The other approach is making it a full type and inline functions I
> > suppose.
> 
> I didn't pick this approach originally, as I thought it would be way too
> invasive. If the API looks way cleaner, it nonetheless doesn't match
> last_update_time usage. The variable is declared in sched_avg while its copy
> is in struct cfs_rq.
> 
> Moving the copy in sched_avg would mean:
> 
>  * Setting the copy for all struct sched_avg in ___update_load_sum(), instead
>    of just the cfs_rq.avg in update_cfs_rq_load_avg().
> 
>  * Having the DEFINE_U64_U32 declaration in include/linux/sched.h to cover
>    struct sched_avg.

Gentle ping


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-08-18 18:11 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-27 10:59 [PATCH] sched/fair: provide u64 read for 32-bits arch helper vincent.donnefort
2020-07-27 11:24 ` Ingo Molnar
2020-07-27 12:05   ` Vincent Donnefort
2020-07-27 12:38 ` peterz
2020-07-27 15:23   ` Vincent Donnefort
2020-07-28 11:13     ` peterz
2020-07-28 12:00       ` peterz
2020-07-28 19:53         ` Vincent Donnefort
2020-08-18 18:11           ` Vincent Donnefort
2020-07-28  9:09 ` Lukasz Luba

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.