[v6,14/17] static_call: Handle tail-calls
diff mbox series

Message ID 20200710134336.977578032@infradead.org
State New
Headers show
Series
  • Add static_call
Related show

Commit Message

Peter Zijlstra July 10, 2020, 1:38 p.m. UTC
GCC can turn our static_call(name)(args...) into a tail call, in which
case we get a JMP.d32 into the trampoline (which then does a further
tail-call).

Teach objtool to recognise and mark these in .static_call_sites and
adjust the code patching to deal with this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/static_call.c           |   21 ++++++++++++++++++---
 include/linux/static_call.h             |    4 ++--
 include/linux/static_call_types.h       |    7 +++++++
 kernel/static_call.c                    |   21 +++++++++++++--------
 tools/include/linux/static_call_types.h |    7 +++++++
 tools/objtool/check.c                   |   18 +++++++++++++-----
 6 files changed, 60 insertions(+), 18 deletions(-)

Comments

Steven Rostedt July 11, 2020, 12:23 a.m. UTC | #1
On Fri, 10 Jul 2020 15:38:45 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> GCC can turn our static_call(name)(args...) into a tail call, in which
> case we get a JMP.d32 into the trampoline (which then does a further
> tail-call).
> 
> Teach objtool to recognise and mark these in .static_call_sites and
> adjust the code patching to deal with this.
> 

Hmm, were you able to trigger crashes before this patch?

> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/x86/kernel/static_call.c           |   21 ++++++++++++++++++---
>  include/linux/static_call.h             |    4 ++--
>  include/linux/static_call_types.h       |    7 +++++++
>  kernel/static_call.c                    |   21 +++++++++++++--------
>  tools/include/linux/static_call_types.h |    7 +++++++
>  tools/objtool/check.c                   |   18 +++++++++++++-----
>  6 files changed, 60 insertions(+), 18 deletions(-)
> 
> --- a/arch/x86/kernel/static_call.c
> +++ b/arch/x86/kernel/static_call.c
> @@ -41,15 +41,30 @@ static void __static_call_transform(void
>  	text_poke_bp(insn, code, size, NULL);
>  }
>  
> -void arch_static_call_transform(void *site, void *tramp, void *func)
> +static inline enum insn_type __sc_insn(bool null, bool tail)
> +{
> +	/*
> +	 * Encode the following table without branches:
> +	 *
> +	 *	tail	null	insn
> +	 *	-----+-------+------
> +	 *	  0  |   0   |  CALL
> +	 *	  0  |   1   |  NOP
> +	 *	  1  |   0   |  JMP
> +	 *	  1  |   1   |  RET
> +	 */
> +	return 2*tail + null;
> +}
> +
> +void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
>  {
>  	mutex_lock(&text_mutex);
>  
>  	if (tramp)
> -		__static_call_transform(tramp, func ? JMP : RET, func);
> +		__static_call_transform(tramp, __sc_insn(!func, true), func);
>  
>  	if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
> -		__static_call_transform(site, func ? CALL : NOP, func);
> +		__static_call_transform(site, __sc_insn(!func, tail), func);
>  
>  	mutex_unlock(&text_mutex);
>  }
> --- a/include/linux/static_call.h
> +++ b/include/linux/static_call.h
> @@ -103,7 +103,7 @@
>  /*
>   * Either @site or @tramp can be NULL.
>   */
> -extern void arch_static_call_transform(void *site, void *tramp, void *func);
> +extern void arch_static_call_transform(void *site, void *tramp, void *func, bool tail);
>  
>  #define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name)
>  
> @@ -206,7 +206,7 @@ void __static_call_update(struct static_
>  {
>  	cpus_read_lock();
>  	WRITE_ONCE(key->func, func);
> -	arch_static_call_transform(NULL, tramp, func);
> +	arch_static_call_transform(NULL, tramp, func, false);
>  	cpus_read_unlock();
>  }
>  
> --- a/include/linux/static_call_types.h
> +++ b/include/linux/static_call_types.h
> @@ -17,6 +17,13 @@
>  #define STATIC_CALL_TRAMP_STR(name)	__stringify(STATIC_CALL_TRAMP(name))
>  
>  /*
> + * Flags in the low bits of static_call_site::key.
> + */
> +#define STATIC_CALL_SITE_TAIL 1UL	/* tail call */
> +#define STATIC_CALL_SITE_INIT 2UL	/* init section */
> +#define STATIC_CALL_SITE_FLAGS 3UL
> +
> +/*
>   * The static call site table needs to be created by external tooling (objtool
>   * or a compiler plugin).
>   */
> --- a/kernel/static_call.c
> +++ b/kernel/static_call.c
> @@ -15,8 +15,6 @@ extern struct static_call_site __start_s
>  
>  static bool static_call_initialized;
>  
> -#define STATIC_CALL_INIT 1UL
> -
>  /* mutex to protect key modules/sites */
>  static DEFINE_MUTEX(static_call_mutex);
>  
> @@ -39,18 +37,23 @@ static inline void *static_call_addr(str
>  static inline struct static_call_key *static_call_key(const struct static_call_site *site)
>  {
>  	return (struct static_call_key *)
> -		(((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT);
> +		(((long)site->key + (long)&site->key) & ~STATIC_CALL_SITE_FLAGS);
>  }
>  
>  /* These assume the key is word-aligned. */
>  static inline bool static_call_is_init(struct static_call_site *site)
>  {
> -	return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT;
> +	return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_INIT;
> +}
> +
> +static inline bool static_call_is_tail(struct static_call_site *site)
> +{
> +	return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_TAIL;
>  }
>  
>  static inline void static_call_set_init(struct static_call_site *site)
>  {
> -	site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) -
> +	site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) -
>  		    (long)&site->key;
>  }
>  
> @@ -104,7 +107,7 @@ void __static_call_update(struct static_
>  
>  	key->func = func;
>  
> -	arch_static_call_transform(NULL, tramp, func);
> +	arch_static_call_transform(NULL, tramp, func, false);
>  
>  	/*
>  	 * If uninitialized, we'll not update the callsites, but they still
> @@ -154,7 +157,8 @@ void __static_call_update(struct static_
>  				continue;
>  			}
>  
> -			arch_static_call_transform(site_addr, NULL, func);
> +			arch_static_call_transform(site_addr, NULL, func,
> +				static_call_is_tail(site));
>  		}
>  	}
>  
> @@ -198,7 +202,8 @@ static int __static_call_init(struct mod
>  			key->mods = site_mod;
>  		}
>  
> -		arch_static_call_transform(site_addr, NULL, key->func);
> +		arch_static_call_transform(site_addr, NULL, key->func,
> +				static_call_is_tail(site));
>  	}
>  
>  	return 0;
> --- a/tools/include/linux/static_call_types.h
> +++ b/tools/include/linux/static_call_types.h
> @@ -17,6 +17,13 @@
>  #define STATIC_CALL_TRAMP_STR(name)	__stringify(STATIC_CALL_TRAMP(name))
>  
>  /*
> + * Flags in the low bits of static_call_site::key.
> + */
> +#define STATIC_CALL_SITE_TAIL 1UL	/* tail call */
> +#define STATIC_CALL_SITE_INIT 2UL	/* init section */
> +#define STATIC_CALL_SITE_FLAGS 3UL
> +
> +/*
>   * The static call site table needs to be created by external tooling (objtool
>   * or a compiler plugin).
>   */
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -511,7 +511,7 @@ static int create_static_call_sections(s
>  		}
>  		memset(reloc, 0, sizeof(*reloc));
>  		reloc->sym = key_sym;
> -		reloc->addend = 0;
> +		reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
>  		reloc->type = R_X86_64_PC32;
>  		reloc->offset = idx * sizeof(struct static_call_site) + 4;
>  		reloc->sec = reloc_sec;
> @@ -720,6 +720,10 @@ static int add_jump_destinations(struct
>  		} else {
>  			/* external sibling call */
>  			insn->call_dest = reloc->sym;
> +			if (insn->call_dest->static_call_tramp) {
> +				list_add_tail(&insn->static_call_node,
> +					      &file->static_call_list);
> +			}
>  			continue;
>  		}
>  
> @@ -771,6 +775,10 @@ static int add_jump_destinations(struct
>  
>  				/* internal sibling call */
>  				insn->call_dest = insn->jump_dest->func;
> +				if (insn->call_dest->static_call_tramp) {
> +					list_add_tail(&insn->static_call_node,
> +						      &file->static_call_list);
> +				}
>  			}
>  		}
>  	}
> @@ -1639,6 +1647,10 @@ static int decode_sections(struct objtoo
>  	if (ret)
>  		return ret;
>  
> +	ret = read_static_call_tramps(file);
> +	if (ret)
> +		return ret;
> +
>  	ret = add_jump_destinations(file);
>  	if (ret)
>  		return ret;
> @@ -1671,10 +1683,6 @@ static int decode_sections(struct objtoo
>  	if (ret)
>  		return ret;
>  
> -	ret = read_static_call_tramps(file);
> -	if (ret)
> -		return ret;

Hmm, what's the reason for moving this above? Should we have a comment
here if there's importance that read_static_call_trampoline() is done
earlier?

-- Steve


> -
>  	return 0;
>  }
>  
>
Peter Zijlstra July 11, 2020, 5:06 a.m. UTC | #2
On Fri, Jul 10, 2020 at 08:23:19PM -0400, Steven Rostedt wrote:
> On Fri, 10 Jul 2020 15:38:45 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
> 
> > GCC can turn our static_call(name)(args...) into a tail call, in which
> > case we get a JMP.d32 into the trampoline (which then does a further
> > tail-call).
> > 
> > Teach objtool to recognise and mark these in .static_call_sites and
> > adjust the code patching to deal with this.
> > 
> 
> Hmm, were you able to trigger crashes before this patch?

No, just a bunch of tail-calls that didn't get patched and would still
point to the trampoline.
Peter Zijlstra July 11, 2020, 5:08 a.m. UTC | #3
On Fri, Jul 10, 2020 at 08:23:19PM -0400, Steven Rostedt wrote:
> On Fri, 10 Jul 2020 15:38:45 +0200
> Peter Zijlstra <peterz@infradead.org> wrote:
> > @@ -1639,6 +1647,10 @@ static int decode_sections(struct objtoo
> >  	if (ret)
> >  		return ret;
> >  
> > +	ret = read_static_call_tramps(file);
> > +	if (ret)
> > +		return ret;
> > +
> >  	ret = add_jump_destinations(file);
> >  	if (ret)
> >  		return ret;
> > @@ -1671,10 +1683,6 @@ static int decode_sections(struct objtoo
> >  	if (ret)
> >  		return ret;
> >  
> > -	ret = read_static_call_tramps(file);
> > -	if (ret)
> > -		return ret;
> 
> Hmm, what's the reason for moving this above? Should we have a comment
> here if there's importance that read_static_call_trampoline() is done
> earlier?

I suppose comments is something objtool lacks more of.

The reason is that add_jump_destination() is the thing that does
tail-call detection, and if it wants to add static-call sites, it needs
to know about the trampolines.

Patch
diff mbox series

--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -41,15 +41,30 @@  static void __static_call_transform(void
 	text_poke_bp(insn, code, size, NULL);
 }
 
-void arch_static_call_transform(void *site, void *tramp, void *func)
+static inline enum insn_type __sc_insn(bool null, bool tail)
+{
+	/*
+	 * Encode the following table without branches:
+	 *
+	 *	tail	null	insn
+	 *	-----+-------+------
+	 *	  0  |   0   |  CALL
+	 *	  0  |   1   |  NOP
+	 *	  1  |   0   |  JMP
+	 *	  1  |   1   |  RET
+	 */
+	return 2*tail + null;
+}
+
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
 {
 	mutex_lock(&text_mutex);
 
 	if (tramp)
-		__static_call_transform(tramp, func ? JMP : RET, func);
+		__static_call_transform(tramp, __sc_insn(!func, true), func);
 
 	if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site)
-		__static_call_transform(site, func ? CALL : NOP, func);
+		__static_call_transform(site, __sc_insn(!func, tail), func);
 
 	mutex_unlock(&text_mutex);
 }
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -103,7 +103,7 @@ 
 /*
  * Either @site or @tramp can be NULL.
  */
-extern void arch_static_call_transform(void *site, void *tramp, void *func);
+extern void arch_static_call_transform(void *site, void *tramp, void *func, bool tail);
 
 #define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name)
 
@@ -206,7 +206,7 @@  void __static_call_update(struct static_
 {
 	cpus_read_lock();
 	WRITE_ONCE(key->func, func);
-	arch_static_call_transform(NULL, tramp, func);
+	arch_static_call_transform(NULL, tramp, func, false);
 	cpus_read_unlock();
 }
 
--- a/include/linux/static_call_types.h
+++ b/include/linux/static_call_types.h
@@ -17,6 +17,13 @@ 
 #define STATIC_CALL_TRAMP_STR(name)	__stringify(STATIC_CALL_TRAMP(name))
 
 /*
+ * Flags in the low bits of static_call_site::key.
+ */
+#define STATIC_CALL_SITE_TAIL 1UL	/* tail call */
+#define STATIC_CALL_SITE_INIT 2UL	/* init section */
+#define STATIC_CALL_SITE_FLAGS 3UL
+
+/*
  * The static call site table needs to be created by external tooling (objtool
  * or a compiler plugin).
  */
--- a/kernel/static_call.c
+++ b/kernel/static_call.c
@@ -15,8 +15,6 @@  extern struct static_call_site __start_s
 
 static bool static_call_initialized;
 
-#define STATIC_CALL_INIT 1UL
-
 /* mutex to protect key modules/sites */
 static DEFINE_MUTEX(static_call_mutex);
 
@@ -39,18 +37,23 @@  static inline void *static_call_addr(str
 static inline struct static_call_key *static_call_key(const struct static_call_site *site)
 {
 	return (struct static_call_key *)
-		(((long)site->key + (long)&site->key) & ~STATIC_CALL_INIT);
+		(((long)site->key + (long)&site->key) & ~STATIC_CALL_SITE_FLAGS);
 }
 
 /* These assume the key is word-aligned. */
 static inline bool static_call_is_init(struct static_call_site *site)
 {
-	return ((long)site->key + (long)&site->key) & STATIC_CALL_INIT;
+	return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_INIT;
+}
+
+static inline bool static_call_is_tail(struct static_call_site *site)
+{
+	return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_TAIL;
 }
 
 static inline void static_call_set_init(struct static_call_site *site)
 {
-	site->key = ((long)static_call_key(site) | STATIC_CALL_INIT) -
+	site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) -
 		    (long)&site->key;
 }
 
@@ -104,7 +107,7 @@  void __static_call_update(struct static_
 
 	key->func = func;
 
-	arch_static_call_transform(NULL, tramp, func);
+	arch_static_call_transform(NULL, tramp, func, false);
 
 	/*
 	 * If uninitialized, we'll not update the callsites, but they still
@@ -154,7 +157,8 @@  void __static_call_update(struct static_
 				continue;
 			}
 
-			arch_static_call_transform(site_addr, NULL, func);
+			arch_static_call_transform(site_addr, NULL, func,
+				static_call_is_tail(site));
 		}
 	}
 
@@ -198,7 +202,8 @@  static int __static_call_init(struct mod
 			key->mods = site_mod;
 		}
 
-		arch_static_call_transform(site_addr, NULL, key->func);
+		arch_static_call_transform(site_addr, NULL, key->func,
+				static_call_is_tail(site));
 	}
 
 	return 0;
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -17,6 +17,13 @@ 
 #define STATIC_CALL_TRAMP_STR(name)	__stringify(STATIC_CALL_TRAMP(name))
 
 /*
+ * Flags in the low bits of static_call_site::key.
+ */
+#define STATIC_CALL_SITE_TAIL 1UL	/* tail call */
+#define STATIC_CALL_SITE_INIT 2UL	/* init section */
+#define STATIC_CALL_SITE_FLAGS 3UL
+
+/*
  * The static call site table needs to be created by external tooling (objtool
  * or a compiler plugin).
  */
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -511,7 +511,7 @@  static int create_static_call_sections(s
 		}
 		memset(reloc, 0, sizeof(*reloc));
 		reloc->sym = key_sym;
-		reloc->addend = 0;
+		reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
 		reloc->type = R_X86_64_PC32;
 		reloc->offset = idx * sizeof(struct static_call_site) + 4;
 		reloc->sec = reloc_sec;
@@ -720,6 +720,10 @@  static int add_jump_destinations(struct
 		} else {
 			/* external sibling call */
 			insn->call_dest = reloc->sym;
+			if (insn->call_dest->static_call_tramp) {
+				list_add_tail(&insn->static_call_node,
+					      &file->static_call_list);
+			}
 			continue;
 		}
 
@@ -771,6 +775,10 @@  static int add_jump_destinations(struct
 
 				/* internal sibling call */
 				insn->call_dest = insn->jump_dest->func;
+				if (insn->call_dest->static_call_tramp) {
+					list_add_tail(&insn->static_call_node,
+						      &file->static_call_list);
+				}
 			}
 		}
 	}
@@ -1639,6 +1647,10 @@  static int decode_sections(struct objtoo
 	if (ret)
 		return ret;
 
+	ret = read_static_call_tramps(file);
+	if (ret)
+		return ret;
+
 	ret = add_jump_destinations(file);
 	if (ret)
 		return ret;
@@ -1671,10 +1683,6 @@  static int decode_sections(struct objtoo
 	if (ret)
 		return ret;
 
-	ret = read_static_call_tramps(file);
-	if (ret)
-		return ret;
-
 	return 0;
 }