All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release()
@ 2014-06-30 16:18 Oleg Nesterov
  2014-06-30 16:18 ` [PATCH v4 1/2] " Oleg Nesterov
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Oleg Nesterov @ 2014-06-30 16:18 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Josh Triplett, Lai Jiangshan, Peter Zijlstra, linux-kernel

Hello,

May be correct this time ;) Based on paulmck/linux-rcu.git rcu/next.

2/2 is new and hopefully trivial. But! the numbers look suspiciously
good, I do not understand where does the difference come from...

Oleg.

 include/linux/rcupdate.h |   66 +++++++++++++++++++++++----------------------
 include/linux/srcu.h     |    6 ++--
 kernel/rcu/rcu.h         |    6 ++--
 kernel/rcu/update.c      |   53 +++++++++++++++++++++++++++++++++++++
 4 files changed, 93 insertions(+), 38 deletions(-)


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v4 1/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release()
  2014-06-30 16:18 [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release() Oleg Nesterov
@ 2014-06-30 16:18 ` Oleg Nesterov
  2014-07-01 11:41   ` Peter Zijlstra
  2014-06-30 16:18 ` [PATCH v4 2/2] rcu: change rcu_dereference_check(c) to check "c" first Oleg Nesterov
  2014-06-30 20:24 ` [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release() Paul E. McKenney
  2 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2014-06-30 16:18 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Josh Triplett, Lai Jiangshan, Peter Zijlstra, linux-kernel

Uninline rcu_lock_acquire() and rcu_lock_release() to shrink .text/data.
The difference in "size vmlinux" looks good,

with CONFIG_DEBUG_LOCK_ALLOC

	- 5377829 3018320 14757888 23154037
	+ 5352229 3010160 14757888 23120277

33760 bytes saved.

with CONFIG_DEBUG_LOCK_ALLOC + CONFIG_PROVE_RCU + CONFIG_TREE_RCU_TRACE

	- 5678315 3027032 14757888 23463235
	+ 5578795 3026776 14757888 23363459

saves 99776 bytes.

However, this obviously means that the "warn once" logic is moved from
the current callers of rcu_lockdep_assert(rcu_is_watching()) to update.c.

Also, with this patch we do not bother to report which function abused
rcu_is_watching(), this should be clear from dump_stack().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/rcupdate.h |   60 +++++++++++++++++++++++----------------------
 include/linux/srcu.h     |    4 +-
 kernel/rcu/rcu.h         |    6 ++--
 kernel/rcu/update.c      |   53 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 89 insertions(+), 34 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d231aa1..b82d1d6 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -355,21 +355,28 @@ static inline bool rcu_lockdep_current_cpu_online(void)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
-static inline void rcu_lock_acquire(struct lockdep_map *map)
+extern struct lockdep_map rcu_lock_map;
+extern struct lockdep_map rcu_bh_lock_map;
+extern struct lockdep_map rcu_sched_lock_map;
+extern struct lockdep_map rcu_callback_map;
+int debug_lockdep_rcu_enabled(void);
+
+static inline void __rcu_lock_acquire(struct lockdep_map *map, unsigned long ip)
 {
-	lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_);
+	lock_acquire(map, 0, 0, 2, 0, NULL, ip);
 }
 
-static inline void rcu_lock_release(struct lockdep_map *map)
+static inline void __rcu_lock_release(struct lockdep_map *map, unsigned long ip)
 {
-	lock_release(map, 1, _THIS_IP_);
+	lock_release(map, 1, ip);
 }
 
-extern struct lockdep_map rcu_lock_map;
-extern struct lockdep_map rcu_bh_lock_map;
-extern struct lockdep_map rcu_sched_lock_map;
-extern struct lockdep_map rcu_callback_map;
-int debug_lockdep_rcu_enabled(void);
+extern void rcu_lock_acquire(void);
+extern void rcu_lock_release(void);
+extern void rcu_lock_acquire_bh(void);
+extern void rcu_lock_release_bh(void);
+extern void rcu_lock_acquire_sched(void);
+extern void rcu_lock_release_sched(void);
 
 /**
  * rcu_read_lock_held() - might we be in RCU read-side critical section?
@@ -463,8 +470,15 @@ static inline int rcu_read_lock_sched_held(void)
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-# define rcu_lock_acquire(a)		do { } while (0)
-# define rcu_lock_release(a)		do { } while (0)
+#define __rcu_lock_acquire(map, ip)	do { } while (0)
+#define __rcu_lock_release(map, ip)	do { } while (0)
+
+#define rcu_lock_acquire()		do { } while (0)
+#define rcu_lock_release()		do { } while (0)
+#define rcu_lock_acquire_bh()		do { } while (0)
+#define rcu_lock_release_bh()		do { } while (0)
+#define rcu_lock_acquire_sched()	do { } while (0)
+#define rcu_lock_release_sched()	do { } while (0)
 
 static inline int rcu_read_lock_held(void)
 {
@@ -839,9 +853,7 @@ static inline void rcu_read_lock(void)
 {
 	__rcu_read_lock();
 	__acquire(RCU);
-	rcu_lock_acquire(&rcu_lock_map);
-	rcu_lockdep_assert(rcu_is_watching(),
-			   "rcu_read_lock() used illegally while idle");
+	rcu_lock_acquire();
 }
 
 /*
@@ -889,9 +901,7 @@ static inline void rcu_read_lock(void)
  */
 static inline void rcu_read_unlock(void)
 {
-	rcu_lockdep_assert(rcu_is_watching(),
-			   "rcu_read_unlock() used illegally while idle");
-	rcu_lock_release(&rcu_lock_map);
+	rcu_lock_release();
 	__release(RCU);
 	__rcu_read_unlock();
 }
@@ -917,9 +927,7 @@ static inline void rcu_read_lock_bh(void)
 {
 	local_bh_disable();
 	__acquire(RCU_BH);
-	rcu_lock_acquire(&rcu_bh_lock_map);
-	rcu_lockdep_assert(rcu_is_watching(),
-			   "rcu_read_lock_bh() used illegally while idle");
+	rcu_lock_acquire_bh();
 }
 
 /*
@@ -929,9 +937,7 @@ static inline void rcu_read_lock_bh(void)
  */
 static inline void rcu_read_unlock_bh(void)
 {
-	rcu_lockdep_assert(rcu_is_watching(),
-			   "rcu_read_unlock_bh() used illegally while idle");
-	rcu_lock_release(&rcu_bh_lock_map);
+	rcu_lock_release_bh();
 	__release(RCU_BH);
 	local_bh_enable();
 }
@@ -953,9 +959,7 @@ static inline void rcu_read_lock_sched(void)
 {
 	preempt_disable();
 	__acquire(RCU_SCHED);
-	rcu_lock_acquire(&rcu_sched_lock_map);
-	rcu_lockdep_assert(rcu_is_watching(),
-			   "rcu_read_lock_sched() used illegally while idle");
+	rcu_lock_acquire_sched();
 }
 
 /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@ -972,9 +976,7 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
  */
 static inline void rcu_read_unlock_sched(void)
 {
-	rcu_lockdep_assert(rcu_is_watching(),
-			   "rcu_read_unlock_sched() used illegally while idle");
-	rcu_lock_release(&rcu_sched_lock_map);
+	rcu_lock_release_sched();
 	__release(RCU_SCHED);
 	preempt_enable();
 }
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index a2783cb..5c06289 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -219,7 +219,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 {
 	int retval = __srcu_read_lock(sp);
 
-	rcu_lock_acquire(&(sp)->dep_map);
+	__rcu_lock_acquire(&(sp)->dep_map, _THIS_IP_);
 	return retval;
 }
 
@@ -233,7 +233,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
 	__releases(sp)
 {
-	rcu_lock_release(&(sp)->dep_map);
+	__rcu_lock_release(&(sp)->dep_map, _THIS_IP_);
 	__srcu_read_unlock(sp, idx);
 }
 
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index ff1a6de..4782d2f 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -107,16 +107,16 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
 {
 	unsigned long offset = (unsigned long)head->func;
 
-	rcu_lock_acquire(&rcu_callback_map);
+	__rcu_lock_acquire(&rcu_callback_map, _THIS_IP_);
 	if (__is_kfree_rcu_offset(offset)) {
 		RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
 		kfree((void *)head - offset);
-		rcu_lock_release(&rcu_callback_map);
+		__rcu_lock_release(&rcu_callback_map, _THIS_IP_);
 		return true;
 	} else {
 		RCU_TRACE(trace_rcu_invoke_callback(rn, head));
 		head->func(head);
-		rcu_lock_release(&rcu_callback_map);
+		__rcu_lock_release(&rcu_callback_map, _THIS_IP_);
 		return false;
 	}
 }
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d79..c2209eb 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -163,6 +163,59 @@ int rcu_read_lock_bh_held(void)
 }
 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
 
+static void rcu_lockdep_assert_watching(void)
+{
+	rcu_lockdep_assert(rcu_is_watching(), "RCU used illegally while idle");
+}
+
+static void rcu_acquire_map(struct lockdep_map *map, unsigned long ip)
+{
+	__rcu_lock_acquire(map, ip);
+	rcu_lockdep_assert_watching();
+}
+
+static void rcu_release_map(struct lockdep_map *map, unsigned long ip)
+{
+	rcu_lockdep_assert_watching();
+	__rcu_lock_release(map, ip);
+}
+
+void rcu_lock_acquire(void)
+{
+	rcu_acquire_map(&rcu_lock_map, _RET_IP_);
+}
+EXPORT_SYMBOL(rcu_lock_acquire);
+
+void rcu_lock_release(void)
+{
+	rcu_release_map(&rcu_lock_map, _RET_IP_);
+}
+EXPORT_SYMBOL(rcu_lock_release);
+
+void rcu_lock_acquire_bh(void)
+{
+	rcu_acquire_map(&rcu_bh_lock_map, _RET_IP_);
+}
+EXPORT_SYMBOL(rcu_lock_acquire_bh);
+
+void rcu_lock_release_bh(void)
+{
+	rcu_release_map(&rcu_bh_lock_map, _RET_IP_);
+}
+EXPORT_SYMBOL(rcu_lock_release_bh);
+
+void rcu_lock_acquire_sched(void)
+{
+	rcu_acquire_map(&rcu_sched_lock_map, _RET_IP_);
+}
+EXPORT_SYMBOL(rcu_lock_acquire_sched);
+
+void rcu_lock_release_sched(void)
+{
+	rcu_release_map(&rcu_sched_lock_map, _RET_IP_);
+}
+EXPORT_SYMBOL(rcu_lock_release_sched);
+
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 struct rcu_synchronize {
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v4 2/2] rcu: change rcu_dereference_check(c) to check "c" first
  2014-06-30 16:18 [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release() Oleg Nesterov
  2014-06-30 16:18 ` [PATCH v4 1/2] " Oleg Nesterov
@ 2014-06-30 16:18 ` Oleg Nesterov
  2014-06-30 20:24 ` [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release() Paul E. McKenney
  2 siblings, 0 replies; 12+ messages in thread
From: Oleg Nesterov @ 2014-06-30 16:18 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Josh Triplett, Lai Jiangshan, Peter Zijlstra, linux-kernel

Change the order of "c" and "_held" checks in rcu_dereference_check(),
this can help if __builtin_constant_p(c).

CONFIG_DEBUG_LOCK_ALLOC + CONFIG_PROVE_RCU + CONFIG_TREE_RCU_TRACE

	- 5578795 3026776 14757888 23363459
	+ 5542443 3014040 14757888 23314371

saves 49088 bytes according to size vmlinux.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/rcupdate.h |    6 +++---
 include/linux/srcu.h     |    2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b82d1d6..e8c55d8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -694,7 +694,7 @@ static inline void rcu_preempt_sleep_check(void)
  * annotated as __rcu.
  */
 #define rcu_dereference_check(p, c) \
-	__rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu)
+	__rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu)
 
 /**
  * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
@@ -704,7 +704,7 @@ static inline void rcu_preempt_sleep_check(void)
  * This is the RCU-bh counterpart to rcu_dereference_check().
  */
 #define rcu_dereference_bh_check(p, c) \
-	__rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu)
+	__rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu)
 
 /**
  * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
@@ -714,7 +714,7 @@ static inline void rcu_preempt_sleep_check(void)
  * This is the RCU-sched counterpart to rcu_dereference_check().
  */
 #define rcu_dereference_sched_check(p, c) \
-	__rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \
+	__rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \
 				__rcu)
 
 #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 5c06289..eae58d4 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -184,7 +184,7 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
  * lockdep_is_held() calls.
  */
 #define srcu_dereference_check(p, sp, c) \
-	__rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu)
+	__rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu)
 
 /**
  * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
-- 
1.5.5.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release()
  2014-06-30 16:18 [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release() Oleg Nesterov
  2014-06-30 16:18 ` [PATCH v4 1/2] " Oleg Nesterov
  2014-06-30 16:18 ` [PATCH v4 2/2] rcu: change rcu_dereference_check(c) to check "c" first Oleg Nesterov
@ 2014-06-30 20:24 ` Paul E. McKenney
  2014-07-01 14:55   ` Oleg Nesterov
  2 siblings, 1 reply; 12+ messages in thread
From: Paul E. McKenney @ 2014-06-30 20:24 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Josh Triplett, Lai Jiangshan, Peter Zijlstra, linux-kernel

On Mon, Jun 30, 2014 at 06:18:37PM +0200, Oleg Nesterov wrote:
> Hello,
> 
> May be correct this time ;) Based on paulmck/linux-rcu.git rcu/next.
> 
> 2/2 is new and hopefully trivial. But! the numbers look suspiciously
> good, I do not understand where does the difference come from...

Probably from rcu_dereference_raw() and rcu_dereference_check(..., 1).  ;-)

Queued and kicked off testing, both mine and (indirectly) Fengguang's.

							Thanx, Paul


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 1/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release()
  2014-06-30 16:18 ` [PATCH v4 1/2] " Oleg Nesterov
@ 2014-07-01 11:41   ` Peter Zijlstra
  2014-07-01 16:40     ` Oleg Nesterov
  0 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2014-07-01 11:41 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Paul E. McKenney, Josh Triplett, Lai Jiangshan, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 1013 bytes --]

On Mon, Jun 30, 2014 at 06:18:49PM +0200, Oleg Nesterov wrote:
> +static inline void __rcu_lock_acquire(struct lockdep_map *map, unsigned long ip)
>  {
> +	lock_acquire(map, 0, 0, 2, 0, NULL, ip);
>  }

> +extern void rcu_lock_acquire(void);
> +extern void rcu_lock_release(void);
> +extern void rcu_lock_acquire_bh(void);
> +extern void rcu_lock_release_bh(void);
> +extern void rcu_lock_acquire_sched(void);
> +extern void rcu_lock_release_sched(void);

> diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> index a2783cb..5c06289 100644
> --- a/include/linux/srcu.h
> +++ b/include/linux/srcu.h
> @@ -219,7 +219,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
>  {
>  	int retval = __srcu_read_lock(sp);
>  
> -	rcu_lock_acquire(&(sp)->dep_map);
> +	__rcu_lock_acquire(&(sp)->dep_map, _THIS_IP_);
>  	return retval;
>  }

Would an srcu_lock_acquire() not make sense here?

In any case, not wrong per se, just a consistency thing that stood out.

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release()
  2014-06-30 20:24 ` [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release() Paul E. McKenney
@ 2014-07-01 14:55   ` Oleg Nesterov
  2014-07-02 15:39     ` Oleg Nesterov
  0 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2014-07-01 14:55 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Josh Triplett, Lai Jiangshan, Peter Zijlstra, linux-kernel

On 06/30, Paul E. McKenney wrote:
>
> On Mon, Jun 30, 2014 at 06:18:37PM +0200, Oleg Nesterov wrote:
> > Hello,
> >
> > May be correct this time ;) Based on paulmck/linux-rcu.git rcu/next.
> >
> > 2/2 is new and hopefully trivial. But! the numbers look suspiciously
> > good, I do not understand where does the difference come from...
>
> Probably from rcu_dereference_raw() and rcu_dereference_check(..., 1).  ;-)

Yes, sure, this was the motivation for the patch. But I didn't expect the
50k difference ;)

OK, I understand now. I forgot that every list_for_each_rcu/list_entry_rcu
has rcu_dereference_raw().

> Queued and kicked off testing, both mine and (indirectly) Fengguang's.

Thanks!

Oleg.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 1/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release()
  2014-07-01 11:41   ` Peter Zijlstra
@ 2014-07-01 16:40     ` Oleg Nesterov
  2014-07-08 22:22       ` Paul E. McKenney
  0 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2014-07-01 16:40 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul E. McKenney, Josh Triplett, Lai Jiangshan, linux-kernel

On 07/01, Peter Zijlstra wrote:
>
> On Mon, Jun 30, 2014 at 06:18:49PM +0200, Oleg Nesterov wrote:
> > +static inline void __rcu_lock_acquire(struct lockdep_map *map, unsigned long ip)
> >  {
> > +	lock_acquire(map, 0, 0, 2, 0, NULL, ip);
> >  }
>
> > +extern void rcu_lock_acquire(void);
> > +extern void rcu_lock_release(void);
> > +extern void rcu_lock_acquire_bh(void);
> > +extern void rcu_lock_release_bh(void);
> > +extern void rcu_lock_acquire_sched(void);
> > +extern void rcu_lock_release_sched(void);
>
> > diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> > index a2783cb..5c06289 100644
> > --- a/include/linux/srcu.h
> > +++ b/include/linux/srcu.h
> > @@ -219,7 +219,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
> >  {
> >  	int retval = __srcu_read_lock(sp);
> >
> > -	rcu_lock_acquire(&(sp)->dep_map);
> > +	__rcu_lock_acquire(&(sp)->dep_map, _THIS_IP_);
> >  	return retval;
> >  }
>
> Would an srcu_lock_acquire() not make sense here?
>
> In any case, not wrong per se, just a consistency thing that stood out.

Yes, I looked at this too...

But probably it would be better to just add __rcu_lock_acquire() into
__srcu_read_lock(), and kill that inline in srcu.h ?

Oleg.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release()
  2014-07-01 14:55   ` Oleg Nesterov
@ 2014-07-02 15:39     ` Oleg Nesterov
  2014-07-02 18:59       ` [PATCH 0/1] rcu: uninline rcu_read_lock_held() Oleg Nesterov
  0 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2014-07-02 15:39 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Josh Triplett, Lai Jiangshan, Peter Zijlstra, linux-kernel

On 07/01, Oleg Nesterov wrote:
>
> On 06/30, Paul E. McKenney wrote:
> >
> > On Mon, Jun 30, 2014 at 06:18:37PM +0200, Oleg Nesterov wrote:
> > >
> > > 2/2 is new and hopefully trivial. But! the numbers look suspiciously
> > > good, I do not understand where does the difference come from...
> >
> > Probably from rcu_dereference_raw() and rcu_dereference_check(..., 1).  ;-)
>
> Yes, sure, this was the motivation for the patch. But I didn't expect the
> 50k difference ;)
>
> OK, I understand now. I forgot that every list_for_each_rcu/list_entry_rcu
> has rcu_dereference_raw().

And this naturally suggests that rcu_read_lock_held() should be uninlined
too (and rcu_read_lock_sched_held(), but this needs another patch).

But I still can't understand the difference reported by "size vmlinux",

	- 5541731 3014560 14757888 23314179
	+ 5513182 3026848 14757888 23297918

it removes 28K from .text, but somehow it adds 12K to .data. I do not
see any difference in .data when I compare individual .o files before/
after this patch.

Confused.

Oleg.


diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e8c55d8..8980817 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -378,42 +378,8 @@ extern void rcu_lock_release_bh(void);
 extern void rcu_lock_acquire_sched(void);
 extern void rcu_lock_release_sched(void);
 
-/**
- * rcu_read_lock_held() - might we be in RCU read-side critical section?
- *
- * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
- * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
- * this assumes we are in an RCU read-side critical section unless it can
- * prove otherwise.  This is useful for debug checks in functions that
- * require that they be called within an RCU read-side critical section.
- *
- * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
- * and while lockdep is disabled.
- *
- * Note that rcu_read_lock() and the matching rcu_read_unlock() must
- * occur in the same context, for example, it is illegal to invoke
- * rcu_read_unlock() in process context if the matching rcu_read_lock()
- * was invoked from within an irq handler.
- *
- * Note that rcu_read_lock() is disallowed if the CPU is either idle or
- * offline from an RCU perspective, so check for those as well.
- */
-static inline int rcu_read_lock_held(void)
-{
-	if (!debug_lockdep_rcu_enabled())
-		return 1;
-	if (!rcu_is_watching())
-		return 0;
-	if (!rcu_lockdep_current_cpu_online())
-		return 0;
-	return lock_is_held(&rcu_lock_map);
-}
-
-/*
- * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
- * hell.
- */
-int rcu_read_lock_bh_held(void);
+extern int rcu_read_lock_held(void);
+extern int rcu_read_lock_bh_held(void);
 
 /**
  * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c2209eb..ea4af81 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -137,6 +137,38 @@ int notrace debug_lockdep_rcu_enabled(void)
 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
 
 /**
+ * rcu_read_lock_held() - might we be in RCU read-side critical section?
+ *
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
+ * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
+ * this assumes we are in an RCU read-side critical section unless it can
+ * prove otherwise.  This is useful for debug checks in functions that
+ * require that they be called within an RCU read-side critical section.
+ *
+ * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
+ *
+ * Note that rcu_read_lock() and the matching rcu_read_unlock() must
+ * occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock() in process context if the matching rcu_read_lock()
+ * was invoked from within an irq handler.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
+ */
+int rcu_read_lock_held(void)
+{
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	if (!rcu_is_watching())
+		return 0;
+	if (!rcu_lockdep_current_cpu_online())
+		return 0;
+	return lock_is_held(&rcu_lock_map);
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_held);
+
+/**
  * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  *
  * Check for bottom half being disabled, which covers both the


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 0/1] rcu: uninline rcu_read_lock_held()
  2014-07-02 15:39     ` Oleg Nesterov
@ 2014-07-02 18:59       ` Oleg Nesterov
  2014-07-02 18:59         ` [PATCH 1/1] " Oleg Nesterov
  0 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2014-07-02 18:59 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Josh Triplett, Lai Jiangshan, Peter Zijlstra, linux-kernel

On 07/02, Oleg Nesterov wrote:
>
> And this naturally suggests that rcu_read_lock_held() should be uninlined
> too (and rcu_read_lock_sched_held(), but this needs another patch).
>
> But I still can't understand the difference reported by "size vmlinux",
>
> 	- 5541731 3014560 14757888 23314179
> 	+ 5513182 3026848 14757888 23297918
>
> it removes 28K from .text, but somehow it adds 12K to .data. I do not
> see any difference in .data when I compare individual .o files before/
> after this patch.

OK, it doesn't add to .data. This is because of INIT_TASK_DATA(THREAD_SIZE)
in vmlinux.lds.S, see the changelog.

If you think this makes sense I will will spam you again. We can also
uninline rcu_read_lock_sched_held(), mostly for consistency. But this
needs a separate change.

Oleg.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/1] rcu: uninline rcu_read_lock_held()
  2014-07-02 18:59       ` [PATCH 0/1] rcu: uninline rcu_read_lock_held() Oleg Nesterov
@ 2014-07-02 18:59         ` Oleg Nesterov
  2014-07-08 22:20           ` Paul E. McKenney
  0 siblings, 1 reply; 12+ messages in thread
From: Oleg Nesterov @ 2014-07-02 18:59 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Josh Triplett, Lai Jiangshan, Peter Zijlstra, linux-kernel

Uninline rcu_read_lock_held(). According to size vmlinux this saves
28549 in .text:

	- 5541731 3014560 14757888 23314179
	+ 5513182 3026848 14757888 23297918

Note: it looks as if the data grows by 12288 bytes but this is not true,
it does not actually grow. But .data starts with ALIGN(THREAD_SIZE) and
since .text shrinks the padding grows, and thus .data grows too as it
seen by /bin/size. diff System.map:

	- ffffffff81510000 D _sdata
	- ffffffff81510000 D init_thread_union
	+ ffffffff81509000 D _sdata
	+ ffffffff8150c000 D init_thread_union

Perhaps we can change vmlinux.lds.S to .data itself, so that /bin/size
can't "wrongly" report that .data grows if .text shinks.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/rcupdate.h |   38 ++------------------------------------
 kernel/rcu/update.c      |   32 ++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 36 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e8c55d8..8980817 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -378,42 +378,8 @@ extern void rcu_lock_release_bh(void);
 extern void rcu_lock_acquire_sched(void);
 extern void rcu_lock_release_sched(void);
 
-/**
- * rcu_read_lock_held() - might we be in RCU read-side critical section?
- *
- * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
- * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
- * this assumes we are in an RCU read-side critical section unless it can
- * prove otherwise.  This is useful for debug checks in functions that
- * require that they be called within an RCU read-side critical section.
- *
- * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
- * and while lockdep is disabled.
- *
- * Note that rcu_read_lock() and the matching rcu_read_unlock() must
- * occur in the same context, for example, it is illegal to invoke
- * rcu_read_unlock() in process context if the matching rcu_read_lock()
- * was invoked from within an irq handler.
- *
- * Note that rcu_read_lock() is disallowed if the CPU is either idle or
- * offline from an RCU perspective, so check for those as well.
- */
-static inline int rcu_read_lock_held(void)
-{
-	if (!debug_lockdep_rcu_enabled())
-		return 1;
-	if (!rcu_is_watching())
-		return 0;
-	if (!rcu_lockdep_current_cpu_online())
-		return 0;
-	return lock_is_held(&rcu_lock_map);
-}
-
-/*
- * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
- * hell.
- */
-int rcu_read_lock_bh_held(void);
+extern int rcu_read_lock_held(void);
+extern int rcu_read_lock_bh_held(void);
 
 /**
  * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c2209eb..ea4af81 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -137,6 +137,38 @@ int notrace debug_lockdep_rcu_enabled(void)
 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
 
 /**
+ * rcu_read_lock_held() - might we be in RCU read-side critical section?
+ *
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
+ * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
+ * this assumes we are in an RCU read-side critical section unless it can
+ * prove otherwise.  This is useful for debug checks in functions that
+ * require that they be called within an RCU read-side critical section.
+ *
+ * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
+ *
+ * Note that rcu_read_lock() and the matching rcu_read_unlock() must
+ * occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock() in process context if the matching rcu_read_lock()
+ * was invoked from within an irq handler.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
+ */
+int rcu_read_lock_held(void)
+{
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	if (!rcu_is_watching())
+		return 0;
+	if (!rcu_lockdep_current_cpu_online())
+		return 0;
+	return lock_is_held(&rcu_lock_map);
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_held);
+
+/**
  * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  *
  * Check for bottom half being disabled, which covers both the
-- 
1.5.5.1



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] rcu: uninline rcu_read_lock_held()
  2014-07-02 18:59         ` [PATCH 1/1] " Oleg Nesterov
@ 2014-07-08 22:20           ` Paul E. McKenney
  0 siblings, 0 replies; 12+ messages in thread
From: Paul E. McKenney @ 2014-07-08 22:20 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Josh Triplett, Lai Jiangshan, Peter Zijlstra, linux-kernel

On Wed, Jul 02, 2014 at 08:59:35PM +0200, Oleg Nesterov wrote:
> Uninline rcu_read_lock_held(). According to size vmlinux this saves
> 28549 in .text:
> 
> 	- 5541731 3014560 14757888 23314179
> 	+ 5513182 3026848 14757888 23297918
> 
> Note: it looks as if the data grows by 12288 bytes but this is not true,
> it does not actually grow. But .data starts with ALIGN(THREAD_SIZE) and
> since .text shrinks the padding grows, and thus .data grows too as it
> seen by /bin/size. diff System.map:
> 
> 	- ffffffff81510000 D _sdata
> 	- ffffffff81510000 D init_thread_union
> 	+ ffffffff81509000 D _sdata
> 	+ ffffffff8150c000 D init_thread_union
> 
> Perhaps we can change vmlinux.lds.S to .data itself, so that /bin/size
> can't "wrongly" report that .data grows if .text shinks.
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Queued for 3.18, thank you, Oleg!

I removed the "extern" tags, apparently people don't like them on
function declarations anymore.

							Thanx, Paul

> ---
>  include/linux/rcupdate.h |   38 ++------------------------------------
>  kernel/rcu/update.c      |   32 ++++++++++++++++++++++++++++++++
>  2 files changed, 34 insertions(+), 36 deletions(-)
> 
> diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
> index e8c55d8..8980817 100644
> --- a/include/linux/rcupdate.h
> +++ b/include/linux/rcupdate.h
> @@ -378,42 +378,8 @@ extern void rcu_lock_release_bh(void);
>  extern void rcu_lock_acquire_sched(void);
>  extern void rcu_lock_release_sched(void);
> 
> -/**
> - * rcu_read_lock_held() - might we be in RCU read-side critical section?
> - *
> - * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
> - * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
> - * this assumes we are in an RCU read-side critical section unless it can
> - * prove otherwise.  This is useful for debug checks in functions that
> - * require that they be called within an RCU read-side critical section.
> - *
> - * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
> - * and while lockdep is disabled.
> - *
> - * Note that rcu_read_lock() and the matching rcu_read_unlock() must
> - * occur in the same context, for example, it is illegal to invoke
> - * rcu_read_unlock() in process context if the matching rcu_read_lock()
> - * was invoked from within an irq handler.
> - *
> - * Note that rcu_read_lock() is disallowed if the CPU is either idle or
> - * offline from an RCU perspective, so check for those as well.
> - */
> -static inline int rcu_read_lock_held(void)
> -{
> -	if (!debug_lockdep_rcu_enabled())
> -		return 1;
> -	if (!rcu_is_watching())
> -		return 0;
> -	if (!rcu_lockdep_current_cpu_online())
> -		return 0;
> -	return lock_is_held(&rcu_lock_map);
> -}
> -
> -/*
> - * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
> - * hell.
> - */
> -int rcu_read_lock_bh_held(void);
> +extern int rcu_read_lock_held(void);
> +extern int rcu_read_lock_bh_held(void);
> 
>  /**
>   * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
> diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
> index c2209eb..ea4af81 100644
> --- a/kernel/rcu/update.c
> +++ b/kernel/rcu/update.c
> @@ -137,6 +137,38 @@ int notrace debug_lockdep_rcu_enabled(void)
>  EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
> 
>  /**
> + * rcu_read_lock_held() - might we be in RCU read-side critical section?
> + *
> + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
> + * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
> + * this assumes we are in an RCU read-side critical section unless it can
> + * prove otherwise.  This is useful for debug checks in functions that
> + * require that they be called within an RCU read-side critical section.
> + *
> + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
> + * and while lockdep is disabled.
> + *
> + * Note that rcu_read_lock() and the matching rcu_read_unlock() must
> + * occur in the same context, for example, it is illegal to invoke
> + * rcu_read_unlock() in process context if the matching rcu_read_lock()
> + * was invoked from within an irq handler.
> + *
> + * Note that rcu_read_lock() is disallowed if the CPU is either idle or
> + * offline from an RCU perspective, so check for those as well.
> + */
> +int rcu_read_lock_held(void)
> +{
> +	if (!debug_lockdep_rcu_enabled())
> +		return 1;
> +	if (!rcu_is_watching())
> +		return 0;
> +	if (!rcu_lockdep_current_cpu_online())
> +		return 0;
> +	return lock_is_held(&rcu_lock_map);
> +}
> +EXPORT_SYMBOL_GPL(rcu_read_lock_held);
> +
> +/**
>   * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
>   *
>   * Check for bottom half being disabled, which covers both the
> -- 
> 1.5.5.1
> 
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v4 1/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release()
  2014-07-01 16:40     ` Oleg Nesterov
@ 2014-07-08 22:22       ` Paul E. McKenney
  0 siblings, 0 replies; 12+ messages in thread
From: Paul E. McKenney @ 2014-07-08 22:22 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Peter Zijlstra, Josh Triplett, Lai Jiangshan, linux-kernel

On Tue, Jul 01, 2014 at 06:40:02PM +0200, Oleg Nesterov wrote:
> On 07/01, Peter Zijlstra wrote:
> >
> > On Mon, Jun 30, 2014 at 06:18:49PM +0200, Oleg Nesterov wrote:
> > > +static inline void __rcu_lock_acquire(struct lockdep_map *map, unsigned long ip)
> > >  {
> > > +	lock_acquire(map, 0, 0, 2, 0, NULL, ip);
> > >  }
> >
> > > +extern void rcu_lock_acquire(void);
> > > +extern void rcu_lock_release(void);
> > > +extern void rcu_lock_acquire_bh(void);
> > > +extern void rcu_lock_release_bh(void);
> > > +extern void rcu_lock_acquire_sched(void);
> > > +extern void rcu_lock_release_sched(void);
> >
> > > diff --git a/include/linux/srcu.h b/include/linux/srcu.h
> > > index a2783cb..5c06289 100644
> > > --- a/include/linux/srcu.h
> > > +++ b/include/linux/srcu.h
> > > @@ -219,7 +219,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
> > >  {
> > >  	int retval = __srcu_read_lock(sp);
> > >
> > > -	rcu_lock_acquire(&(sp)->dep_map);
> > > +	__rcu_lock_acquire(&(sp)->dep_map, _THIS_IP_);
> > >  	return retval;
> > >  }
> >
> > Would an srcu_lock_acquire() not make sense here?
> >
> > In any case, not wrong per se, just a consistency thing that stood out.
> 
> Yes, I looked at this too...
> 
> But probably it would be better to just add __rcu_lock_acquire() into
> __srcu_read_lock(), and kill that inline in srcu.h ?

Makes sense to me!

							Thanx, Paul


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2014-07-08 22:22 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-30 16:18 [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release() Oleg Nesterov
2014-06-30 16:18 ` [PATCH v4 1/2] " Oleg Nesterov
2014-07-01 11:41   ` Peter Zijlstra
2014-07-01 16:40     ` Oleg Nesterov
2014-07-08 22:22       ` Paul E. McKenney
2014-06-30 16:18 ` [PATCH v4 2/2] rcu: change rcu_dereference_check(c) to check "c" first Oleg Nesterov
2014-06-30 20:24 ` [PATCH v4 0/2] rcu: uninline rcu_lock_acquire() and rcu_lock_release() Paul E. McKenney
2014-07-01 14:55   ` Oleg Nesterov
2014-07-02 15:39     ` Oleg Nesterov
2014-07-02 18:59       ` [PATCH 0/1] rcu: uninline rcu_read_lock_held() Oleg Nesterov
2014-07-02 18:59         ` [PATCH 1/1] " Oleg Nesterov
2014-07-08 22:20           ` Paul E. McKenney

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.