linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] add binary printf
@ 2009-02-24  5:17 Frederic Weisbecker
  2009-02-26 13:02 ` Ingo Molnar
  0 siblings, 1 reply; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-24  5:17 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: linux-kernel, Steven Rostedt, Lai Jiangshan, Peter Zijlstra

From: Lai Jiangshan <laijs@cn.fujitsu.com>

Impact: Add APIs for binary trace printk infrastructure

vbin_printf(): write args to binary buffer, string is copied
when "%s" is occurred.

bstr_printf(): read from binary buffer for args and format a string

[fweisbec@gmail.com: ported to latest -tip]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/string.h |    7 +
 lib/Kconfig            |    3 +
 lib/vsprintf.c         |  442 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 452 insertions(+), 0 deletions(-)

diff --git a/include/linux/string.h b/include/linux/string.h
index d18fc19..27ac317 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -10,6 +10,7 @@
 #include <linux/compiler.h>	/* for inline */
 #include <linux/types.h>	/* for size_t */
 #include <linux/stddef.h>	/* for NULL */
+#include <stdarg.h>
 
 extern char *strndup_user(const char __user *, long);
 
@@ -111,6 +112,12 @@ extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
 
+#ifdef CONFIG_BINARY_PRINTF
+int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
+int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
+int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
+#endif
+
 extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
 			const void *from, size_t available);
 
diff --git a/lib/Kconfig b/lib/Kconfig
index fc8ea1c..cd57690 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -2,6 +2,9 @@
 # Library configuration
 #
 
+config BINARY_PRINTF
+	def_bool n
+
 menu "Library routines"
 
 config BITREVERSE
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 0fbd012..3543bbe 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1058,6 +1058,448 @@ int sprintf(char * buf, const char *fmt, ...)
 }
 EXPORT_SYMBOL(sprintf);
 
+#ifdef CONFIG_BINARY_PRINTF
+/*
+ * bprintf service:
+ * vbin_printf() - VA arguments to binary data
+ * bstr_printf() - Binary data to text string
+ */
+
+/**
+ * vbin_printf - Parse a format string and place args' binary value in a buffer
+ * @bin_buf: The buffer to place args' binary value
+ * @size: The size of the buffer(by words(32bits), not characters)
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The format follows C99 vsnprintf, except %n is ignored, and its argument
+ * is skiped.
+ *
+ * The return value is the number of words(32bits) which would be generated for
+ * the given input.
+ *
+ * NOTE:
+ * If the return value is greater than @size, the resulting bin_buf is NOT
+ * valid for bstr_printf().
+ */
+int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
+{
+	char *str, *end;
+	int qualifier;
+
+	str = (char *)bin_buf;
+	end = (char *)(bin_buf + size);
+
+#define save_arg(type)							\
+do {									\
+	if (sizeof(type) == 8) {					\
+		unsigned long long value;				\
+		str = PTR_ALIGN(str, sizeof(u32));			\
+		value = va_arg(args, unsigned long long);		\
+		if (str + sizeof(type) <= end) {			\
+			*(u32 *)str = *(u32 *)&value;			\
+			*(u32 *)(str + 4) = *((u32 *)&value + 1);	\
+		}							\
+	} else {							\
+		unsigned long value;					\
+		str = PTR_ALIGN(str, sizeof(type));			\
+		value = va_arg(args, int);				\
+		if (str + sizeof(type) <= end)				\
+			*(typeof(type) *)str = (type)value;		\
+	}								\
+	str += sizeof(type);						\
+} while (0)
+
+	for (; *fmt ; ++fmt) {
+		if (*fmt != '%')
+			continue;
+
+repeat:
+		/* parse flags */
+		++fmt;		/* this also skips first '%' */
+		if (*fmt == '-' || *fmt == '+' || *fmt == ' '
+				|| *fmt == '#' || *fmt == '0')
+			goto repeat;
+
+		/* parse field width */
+		if (isdigit(*fmt))
+			skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			save_arg(int);
+		}
+
+		/* parse the precision */
+		if (*fmt == '.') {
+			++fmt;
+			if (isdigit(*fmt))
+				skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				save_arg(int);
+			}
+		}
+
+		/* parse the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+		    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
+			qualifier = *fmt;
+			++fmt;
+			if (qualifier == 'l' && *fmt == 'l') {
+				qualifier = 'L';
+				++fmt;
+			}
+		}
+
+		/* parse format type */
+		switch (*fmt) {
+		case 'c':
+			save_arg(char);
+			continue;
+		case 's': {
+			/* save the string argument */
+			const char *save_str = va_arg(args, char *);
+			size_t len;
+			if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
+					|| (unsigned long)save_str < PAGE_SIZE)
+				save_str = "<NULL>";
+			len = strlen(save_str);
+			if (str + len + 1 < end)
+				memcpy(str, save_str, len + 1);
+			str += len + 1;
+			continue;
+		}
+		case 'p':
+			save_arg(void *);
+			/* skip all alphanumeric pointer suffixes */
+			while (isalnum(fmt[1]))
+				fmt++;
+			continue;
+		case 'n': {
+			/* skip %n 's argument */
+			void *skip_arg;
+			if (qualifier == 'l')
+				skip_arg = va_arg(args, long *);
+			else if (qualifier == 'Z' || qualifier == 'z')
+				skip_arg = va_arg(args, size_t *);
+			else
+				skip_arg = va_arg(args, int *);
+			continue;
+		}
+		case 'o':
+		case 'x':
+		case 'X':
+		case 'd':
+		case 'i':
+		case 'u':
+			/* save arg for case: 'o', 'x', 'X', 'd', 'i', 'u' */
+			if (qualifier == 'L')
+				save_arg(long long);
+			else if (qualifier == 'l')
+				save_arg(unsigned long);
+			else if (qualifier == 'Z' || qualifier == 'z')
+				save_arg(size_t);
+			else if (qualifier == 't')
+				save_arg(ptrdiff_t);
+			else if (qualifier == 'h')
+				save_arg(short);
+			else
+				save_arg(int);
+			continue;
+		default:
+			if (!*fmt)
+				--fmt;
+			continue;
+		}
+	}
+#undef save_arg
+
+	return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
+}
+EXPORT_SYMBOL_GPL(vbin_printf);
+
+/**
+ * bstr_printf - Format a string from binary arguments and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @bin_buf: Binary arguments for the format string
+ *
+ * This function like C99 vsnprintf, but the difference is that vsnprintf gets
+ * arguments from stack, and bstr_printf gets arguments from @bin_buf which is
+ * a binary buffer that generated by vbin_printf.
+ *
+ * The format follows C99 vsnprintf, but has some extensions:
+ * %pS output the name of a text symbol
+ * %pF output the name of a function pointer
+ * %pR output the address range in a struct resource
+ * %n is ignored
+ *
+ * The return value is the number of characters which would
+ * be generated for the given input, excluding the trailing
+ * '\0', as per ISO C99. If you want to have the exact
+ * number of characters written into @buf as return value
+ * (not including the trailing '\0'), use vscnprintf(). If the
+ * return is greater than or equal to @size, the resulting
+ * string is truncated.
+ */
+int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
+{
+	unsigned long long num;
+	int base;
+	char *str, *end, c;
+	const char *args = (const char *)bin_buf;
+
+	int flags;
+	int field_width;
+	int precision;
+	int qualifier;
+
+	if (unlikely((int) size < 0)) {
+		/* There can be only one.. */
+		static char warn = 1;
+		WARN_ON(warn);
+		warn = 0;
+		return 0;
+	}
+
+	str = buf;
+	end = buf + size;
+
+#define get_arg(type)							\
+({									\
+	typeof(type) value;						\
+	if (sizeof(type) == 8) {					\
+		args = PTR_ALIGN(args, sizeof(u32));			\
+		*(u32 *)&value = *(u32 *)args;				\
+		*((u32 *)&value + 1) = *(u32 *)(args + 4);		\
+	} else {							\
+		args = PTR_ALIGN(args, sizeof(type));			\
+		value = *(typeof(type) *)args;				\
+	}								\
+	args += sizeof(type);						\
+	value;								\
+})
+
+	/* Make sure end is always >= buf */
+	if (end < buf) {
+		end = ((void *)-1);
+		size = end - buf;
+	}
+
+	for (; *fmt ; ++fmt) {
+		if (*fmt != '%') {
+			if (str < end)
+				*str = *fmt;
+			++str;
+			continue;
+		}
+
+		/* process flags */
+		flags = 0;
+repeat:
+		++fmt;		/* this also skips first '%' */
+		switch (*fmt) {
+		case '-':
+			flags |= LEFT;
+			goto repeat;
+		case '+':
+			flags |= PLUS;
+			goto repeat;
+		case ' ':
+			flags |= SPACE;
+			goto repeat;
+		case '#':
+			flags |= SPECIAL;
+			goto repeat;
+		case '0':
+			flags |= ZEROPAD;
+			goto repeat;
+		}
+
+		/* get field width */
+		field_width = -1;
+		if (isdigit(*fmt))
+			field_width = skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			field_width = get_arg(int);
+			if (field_width < 0) {
+				field_width = -field_width;
+				flags |= LEFT;
+			}
+		}
+
+		/* get the precision */
+		precision = -1;
+		if (*fmt == '.') {
+			++fmt;
+			if (isdigit(*fmt))
+				precision = skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				precision = get_arg(int);
+			}
+			if (precision < 0)
+				precision = 0;
+		}
+
+		/* get the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+		    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
+			qualifier = *fmt;
+			++fmt;
+			if (qualifier == 'l' && *fmt == 'l') {
+				qualifier = 'L';
+				++fmt;
+			}
+		}
+
+		/* default base */
+		base = 10;
+
+		switch (*fmt) {
+		case 'c':
+			if (!(flags & LEFT)) {
+				while (--field_width > 0) {
+					if (str < end)
+						*str = ' ';
+					++str;
+				}
+			}
+			c = (unsigned char) get_arg(char);
+			if (str < end)
+				*str = c;
+			++str;
+			while (--field_width > 0) {
+				if (str < end)
+					*str = ' ';
+				++str;
+			}
+			continue;
+
+		case 's':{
+			const char *str_arg = args;
+			size_t len = strlen(str_arg);
+			args += len + 1;
+			str = string(str, end, (char *)str_arg, field_width,
+					precision, flags);
+			continue;
+		}
+
+		case 'p':
+			str = pointer(fmt+1, str, end, get_arg(void *),
+					field_width, precision, flags);
+			/* Skip all alphanumeric pointer suffixes */
+			while (isalnum(fmt[1]))
+				fmt++;
+			continue;
+
+		case 'n':
+			/* skip %n */
+			continue;
+
+		case '%':
+			if (str < end)
+				*str = '%';
+			++str;
+			continue;
+
+		/* integer number formats - set up the flags and "break" */
+		case 'o':
+			base = 8;
+			break;
+
+		case 'x':
+			flags |= SMALL;
+		case 'X':
+			base = 16;
+			break;
+
+		case 'd':
+		case 'i':
+			flags |= SIGN;
+		case 'u':
+			break;
+
+		default:
+			if (str < end)
+				*str = '%';
+			++str;
+			if (*fmt) {
+				if (str < end)
+					*str = *fmt;
+				++str;
+			} else {
+				--fmt;
+			}
+			continue;
+		}
+		if (qualifier == 'L')
+			num = get_arg(long long);
+		else if (qualifier == 'l') {
+			num = get_arg(unsigned long);
+			if (flags & SIGN)
+				num = (signed long) num;
+		} else if (qualifier == 'Z' || qualifier == 'z') {
+			num = get_arg(size_t);
+		} else if (qualifier == 't') {
+			num = get_arg(ptrdiff_t);
+		} else if (qualifier == 'h') {
+			num = (unsigned short) get_arg(short);
+			if (flags & SIGN)
+				num = (signed short) num;
+		} else {
+			num = get_arg(unsigned int);
+			if (flags & SIGN)
+				num = (signed int) num;
+		}
+		str = number(str, end, num, base,
+				field_width, precision, flags);
+	}
+	if (size > 0) {
+		if (str < end)
+			*str = '\0';
+		else
+			end[-1] = '\0';
+	}
+#undef get_arg
+
+	/* the trailing null byte doesn't count towards the total */
+	return str - buf;
+}
+EXPORT_SYMBOL_GPL(bstr_printf);
+
+/**
+ * bprintf - Parse a format string and place args' binary value in a buffer
+ * @bin_buf: The buffer to place args' binary value
+ * @size: The size of the buffer(by words(32bits), not characters)
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The function returns the number of words(u32) written
+ * into @bin_buf.
+ */
+int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = vbin_printf(bin_buf, size, fmt, args);
+	va_end(args);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bprintf);
+
+#endif /* CONFIG_BINARY_PRINTF */
+
 /**
  * vsscanf - Unformat a buffer into a list of arguments
  * @buf:	input buffer
-- 
1.6.1



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH 1/3] add binary printf
  2009-02-24  5:17 [PATCH 1/3] add binary printf Frederic Weisbecker
@ 2009-02-26 13:02 ` Ingo Molnar
  2009-02-26 17:05   ` Frederic Weisbecker
  0 siblings, 1 reply; 44+ messages in thread
From: Ingo Molnar @ 2009-02-26 13:02 UTC (permalink / raw)
  To: Frederic Weisbecker, Linus Torvalds, Andrew Morton
  Cc: linux-kernel, Steven Rostedt, Lai Jiangshan, Peter Zijlstra


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> From: Lai Jiangshan <laijs@cn.fujitsu.com>
> 
> Impact: Add APIs for binary trace printk infrastructure
> 
> vbin_printf(): write args to binary buffer, string is copied
> when "%s" is occurred.
> 
> bstr_printf(): read from binary buffer for args and format a string
> 
> [fweisbec@gmail.com: ported to latest -tip]
> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> ---
>  include/linux/string.h |    7 +
>  lib/Kconfig            |    3 +
>  lib/vsprintf.c         |  442 ++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 452 insertions(+), 0 deletions(-)

OK, it's a nice idea and speedup for printf based tracing - 
which is common and convenient. Would you mind to post the 
performance measurements you've done using the new bstr_printf() 
facility? (the nanoseconds latency figures you did in the timer 
irq in a system under load and on a system that is idle)

The new printf code itself should be done cleaner i think and is 
not acceptable in its current form.

These two new functions:

> +#ifdef CONFIG_BINARY_PRINTF
> +/*
> + * bprintf service:
> + * vbin_printf() - VA arguments to binary data
> + * bstr_printf() - Binary data to text string
> + */

Duplicate hundreds of lines of code into three large functions 
(vsnprintf, vbin_printf, bstr_printf). These functions only have 
a difference in the way the argument list is iterated and the 
way the parsed result is stored:

  vsnprintf:   iterates va_list, stores into string
  bstr_printf: iterates bin_buf, stores into string
  vbin_printf: iterates va_list, stores into bin_buf

We should try _much_ harder at unifying these functions before 
giving up and duplicating them...

An opaque in_buf/out_buf handle plus two helper function 
pointers passed in would be an obvious implementation.

That way we'd have a single generic (inline) function that knows 
about the printf format itself:

 __generic_printf(void *in_buf,
		  void *out_buf,
		  void * (*read_in_buf)(void **),
		  void * (*store_out_buf)(void **));

And we'd have various variants for read_in_buf and 
store_out_buf. The generic function iterates the following way:

	in_val = read_in_buf(&in_buf);
	...
	store_out_buf(&out_buf, in_val);

(where in_val is wide enough to store a single argument.) The 
iterators modify the in_buf / out_buf pointers. Argument 
skipping can be done by reading the in-buf and not using it. I 
think we can do it with just two iterator methods.

Or something like that - you get the idea. It can all be inlined 
so that we'd end up with essentially the same vsnprint() 
instruction sequence we have today.

	Ingo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 1/3] add binary printf
  2009-02-26 13:02 ` Ingo Molnar
@ 2009-02-26 17:05   ` Frederic Weisbecker
  2009-02-26 17:43     ` Ingo Molnar
  0 siblings, 1 reply; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-26 17:05 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Thu, Feb 26, 2009 at 02:02:43PM +0100, Ingo Molnar wrote:
> 
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > From: Lai Jiangshan <laijs@cn.fujitsu.com>
> > 
> > Impact: Add APIs for binary trace printk infrastructure
> > 
> > vbin_printf(): write args to binary buffer, string is copied
> > when "%s" is occurred.
> > 
> > bstr_printf(): read from binary buffer for args and format a string
> > 
> > [fweisbec@gmail.com: ported to latest -tip]
> > 
> > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> > Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> > Cc: Steven Rostedt <rostedt@goodmis.org>
> > ---
> >  include/linux/string.h |    7 +
> >  lib/Kconfig            |    3 +
> >  lib/vsprintf.c         |  442 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 452 insertions(+), 0 deletions(-)
> 
> OK, it's a nice idea and speedup for printf based tracing - 
> which is common and convenient. Would you mind to post the 
> performance measurements you've done using the new bstr_printf() 
> facility? (the nanoseconds latency figures you did in the timer 
> irq in a system under load and on a system that is idle)


Ok.

 
> The new printf code itself should be done cleaner i think and is 
> not acceptable in its current form.
> 
> These two new functions:
> 
> > +#ifdef CONFIG_BINARY_PRINTF
> > +/*
> > + * bprintf service:
> > + * vbin_printf() - VA arguments to binary data
> > + * bstr_printf() - Binary data to text string
> > + */
> 
> Duplicate hundreds of lines of code into three large functions 
> (vsnprintf, vbin_printf, bstr_printf). These functions only have 
> a difference in the way the argument list is iterated and the 
> way the parsed result is stored:
> 
>   vsnprintf:   iterates va_list, stores into string
>   bstr_printf: iterates bin_buf, stores into string
>   vbin_printf: iterates va_list, stores into bin_buf
> 
> We should try _much_ harder at unifying these functions before 
> giving up and duplicating them...
> 
> An opaque in_buf/out_buf handle plus two helper function 
> pointers passed in would be an obvious implementation.
> 
> That way we'd have a single generic (inline) function that knows 
> about the printf format itself:
> 
>  __generic_printf(void *in_buf,
> 		  void *out_buf,
> 		  void * (*read_in_buf)(void **),
> 		  void * (*store_out_buf)(void **));
> 
> And we'd have various variants for read_in_buf and 
> store_out_buf. The generic function iterates the following way:
> 
> 	in_val = read_in_buf(&in_buf);
> 	...
> 	store_out_buf(&out_buf, in_val);
> 
> (where in_val is wide enough to store a single argument.) The 
> iterators modify the in_buf / out_buf pointers. Argument 
> skipping can be done by reading the in-buf and not using it. I 
> think we can do it with just two iterator methods.
> 
> Or something like that - you get the idea. It can all be inlined 
> so that we'd end up with essentially the same vsnprint() 
> instruction sequence we have today.
> 
> 	Ingo


Ok, I just looked deeply inside vsnprintf, and I don't think such
a generic interface would allow that. We need to know the size of the argument,
it's precision, width and flags.... And we need to know if we want to skip the non
format char.


What do you think of the following:

__ generic_printf(void *in,
		  void *out,
		  void *(*read_in)(void **buf, int size),
		  void *(store_char)(char *dst, char *end, char val, int field_width, int flags),
		  void *(*store_string)(char *dst, char *end, char *val, int field_width, int precision, int flags),
		  void *(*store_pointer)(char type, char *dst, char *end, void *val,
					int field_width, int precision, int flags),
		  void *(*store_number)(char *dst, char *size, int base,int field_width, int precision, int flags),
		  bool skip_non_format
		  )


Well, something like that...

read_in can advance the pointer to the buffer itself (buf can be a va_args or u32 *)
and it returns a value, void * is generic for the type.

The storage functions are more specialized because of the interpretation of flags, precision...
So we can easily pass the usual string(), pointer(), .... that are already present in vsnprintf.c
or use custom ones.
They return the advanced dst pointer.

And at last, skip_non_format will decide if we want to consider non-format characters from fmt
to be copied as common %c characters or if we want to ignore them (useful for vbin_printf()).

Hmm?


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 1/3] add binary printf
  2009-02-26 17:05   ` Frederic Weisbecker
@ 2009-02-26 17:43     ` Ingo Molnar
  2009-02-26 17:45       ` Frederic Weisbecker
  0 siblings, 1 reply; 44+ messages in thread
From: Ingo Molnar @ 2009-02-26 17:43 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> On Thu, Feb 26, 2009 at 02:02:43PM +0100, Ingo Molnar wrote:
> > 
> > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > 
> > > From: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > 
> > > Impact: Add APIs for binary trace printk infrastructure
> > > 
> > > vbin_printf(): write args to binary buffer, string is copied
> > > when "%s" is occurred.
> > > 
> > > bstr_printf(): read from binary buffer for args and format a string
> > > 
> > > [fweisbec@gmail.com: ported to latest -tip]
> > > 
> > > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > ---
> > >  include/linux/string.h |    7 +
> > >  lib/Kconfig            |    3 +
> > >  lib/vsprintf.c         |  442 ++++++++++++++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 452 insertions(+), 0 deletions(-)
> > 
> > OK, it's a nice idea and speedup for printf based tracing - 
> > which is common and convenient. Would you mind to post the 
> > performance measurements you've done using the new bstr_printf() 
> > facility? (the nanoseconds latency figures you did in the timer 
> > irq in a system under load and on a system that is idle)
> 
> 
> Ok.
> 
>  
> > The new printf code itself should be done cleaner i think and is 
> > not acceptable in its current form.
> > 
> > These two new functions:
> > 
> > > +#ifdef CONFIG_BINARY_PRINTF
> > > +/*
> > > + * bprintf service:
> > > + * vbin_printf() - VA arguments to binary data
> > > + * bstr_printf() - Binary data to text string
> > > + */
> > 
> > Duplicate hundreds of lines of code into three large functions 
> > (vsnprintf, vbin_printf, bstr_printf). These functions only have 
> > a difference in the way the argument list is iterated and the 
> > way the parsed result is stored:
> > 
> >   vsnprintf:   iterates va_list, stores into string
> >   bstr_printf: iterates bin_buf, stores into string
> >   vbin_printf: iterates va_list, stores into bin_buf
> > 
> > We should try _much_ harder at unifying these functions before 
> > giving up and duplicating them...
> > 
> > An opaque in_buf/out_buf handle plus two helper function 
> > pointers passed in would be an obvious implementation.
> > 
> > That way we'd have a single generic (inline) function that knows 
> > about the printf format itself:
> > 
> >  __generic_printf(void *in_buf,
> > 		  void *out_buf,
> > 		  void * (*read_in_buf)(void **),
> > 		  void * (*store_out_buf)(void **));
> > 
> > And we'd have various variants for read_in_buf and 
> > store_out_buf. The generic function iterates the following way:
> > 
> > 	in_val = read_in_buf(&in_buf);
> > 	...
> > 	store_out_buf(&out_buf, in_val);
> > 
> > (where in_val is wide enough to store a single argument.) The 
> > iterators modify the in_buf / out_buf pointers. Argument 
> > skipping can be done by reading the in-buf and not using it. I 
> > think we can do it with just two iterator methods.
> > 
> > Or something like that - you get the idea. It can all be inlined 
> > so that we'd end up with essentially the same vsnprint() 
> > instruction sequence we have today.
> > 
> > 	Ingo
> 
> 
> Ok, I just looked deeply inside vsnprintf, and I don't think 
> such a generic interface would allow that. We need to know the 
> size of the argument, it's precision, width and flags.... And 
> we need to know if we want to skip the non format char.
> 
> 
> What do you think of the following:
> 
> __ generic_printf(void *in,
> 		  void *out,
> 		  void *(*read_in)(void **buf, int size),
> 		  void *(store_char)(char *dst, char *end, char val, int field_width, int flags),
> 		  void *(*store_string)(char *dst, char *end, char *val, int field_width, int precision, int flags),
> 		  void *(*store_pointer)(char type, char *dst, char *end, void *val,
> 					int field_width, int precision, int flags),
> 		  void *(*store_number)(char *dst, char *size, int base,int field_width, int precision, int flags),
> 		  bool skip_non_format
> 		  )
> 
> 
> Well, something like that...
> 
> read_in can advance the pointer to the buffer itself (buf can 
> be a va_args or u32 *) and it returns a value, void * is 
> generic for the type.
> 
> The storage functions are more specialized because of the 
> interpretation of flags, precision... So we can easily pass 
> the usual string(), pointer(), .... that are already present 
> in vsnprintf.c or use custom ones. They return the advanced 
> dst pointer.
> 
> And at last, skip_non_format will decide if we want to 
> consider non-format characters from fmt to be copied as common 
> %c characters or if we want to ignore them (useful for 
> vbin_printf()).

hm, that indeed looks very wide - storing into a binary buffer 
does complicate the iterator interface significantly.

But at least vsnprintf() and bstr_printf() could be unified - 
they both output into a string buffer, just have different 
methods to iterate arguments.

	Ingo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 1/3] add binary printf
  2009-02-26 17:43     ` Ingo Molnar
@ 2009-02-26 17:45       ` Frederic Weisbecker
  2009-02-26 17:52         ` Ingo Molnar
  0 siblings, 1 reply; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-26 17:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Thu, Feb 26, 2009 at 06:43:03PM +0100, Ingo Molnar wrote:
> 
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > On Thu, Feb 26, 2009 at 02:02:43PM +0100, Ingo Molnar wrote:
> > > 
> > > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > > 
> > > > From: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > > 
> > > > Impact: Add APIs for binary trace printk infrastructure
> > > > 
> > > > vbin_printf(): write args to binary buffer, string is copied
> > > > when "%s" is occurred.
> > > > 
> > > > bstr_printf(): read from binary buffer for args and format a string
> > > > 
> > > > [fweisbec@gmail.com: ported to latest -tip]
> > > > 
> > > > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > > Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> > > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > > ---
> > > >  include/linux/string.h |    7 +
> > > >  lib/Kconfig            |    3 +
> > > >  lib/vsprintf.c         |  442 ++++++++++++++++++++++++++++++++++++++++++++++++
> > > >  3 files changed, 452 insertions(+), 0 deletions(-)
> > > 
> > > OK, it's a nice idea and speedup for printf based tracing - 
> > > which is common and convenient. Would you mind to post the 
> > > performance measurements you've done using the new bstr_printf() 
> > > facility? (the nanoseconds latency figures you did in the timer 
> > > irq in a system under load and on a system that is idle)
> > 
> > 
> > Ok.
> > 
> >  
> > > The new printf code itself should be done cleaner i think and is 
> > > not acceptable in its current form.
> > > 
> > > These two new functions:
> > > 
> > > > +#ifdef CONFIG_BINARY_PRINTF
> > > > +/*
> > > > + * bprintf service:
> > > > + * vbin_printf() - VA arguments to binary data
> > > > + * bstr_printf() - Binary data to text string
> > > > + */
> > > 
> > > Duplicate hundreds of lines of code into three large functions 
> > > (vsnprintf, vbin_printf, bstr_printf). These functions only have 
> > > a difference in the way the argument list is iterated and the 
> > > way the parsed result is stored:
> > > 
> > >   vsnprintf:   iterates va_list, stores into string
> > >   bstr_printf: iterates bin_buf, stores into string
> > >   vbin_printf: iterates va_list, stores into bin_buf
> > > 
> > > We should try _much_ harder at unifying these functions before 
> > > giving up and duplicating them...
> > > 
> > > An opaque in_buf/out_buf handle plus two helper function 
> > > pointers passed in would be an obvious implementation.
> > > 
> > > That way we'd have a single generic (inline) function that knows 
> > > about the printf format itself:
> > > 
> > >  __generic_printf(void *in_buf,
> > > 		  void *out_buf,
> > > 		  void * (*read_in_buf)(void **),
> > > 		  void * (*store_out_buf)(void **));
> > > 
> > > And we'd have various variants for read_in_buf and 
> > > store_out_buf. The generic function iterates the following way:
> > > 
> > > 	in_val = read_in_buf(&in_buf);
> > > 	...
> > > 	store_out_buf(&out_buf, in_val);
> > > 
> > > (where in_val is wide enough to store a single argument.) The 
> > > iterators modify the in_buf / out_buf pointers. Argument 
> > > skipping can be done by reading the in-buf and not using it. I 
> > > think we can do it with just two iterator methods.
> > > 
> > > Or something like that - you get the idea. It can all be inlined 
> > > so that we'd end up with essentially the same vsnprint() 
> > > instruction sequence we have today.
> > > 
> > > 	Ingo
> > 
> > 
> > Ok, I just looked deeply inside vsnprintf, and I don't think 
> > such a generic interface would allow that. We need to know the 
> > size of the argument, it's precision, width and flags.... And 
> > we need to know if we want to skip the non format char.
> > 
> > 
> > What do you think of the following:
> > 
> > __ generic_printf(void *in,
> > 		  void *out,
> > 		  void *(*read_in)(void **buf, int size),
> > 		  void *(store_char)(char *dst, char *end, char val, int field_width, int flags),
> > 		  void *(*store_string)(char *dst, char *end, char *val, int field_width, int precision, int flags),
> > 		  void *(*store_pointer)(char type, char *dst, char *end, void *val,
> > 					int field_width, int precision, int flags),
> > 		  void *(*store_number)(char *dst, char *size, int base,int field_width, int precision, int flags),
> > 		  bool skip_non_format
> > 		  )
> > 
> > 
> > Well, something like that...
> > 
> > read_in can advance the pointer to the buffer itself (buf can 
> > be a va_args or u32 *) and it returns a value, void * is 
> > generic for the type.
> > 
> > The storage functions are more specialized because of the 
> > interpretation of flags, precision... So we can easily pass 
> > the usual string(), pointer(), .... that are already present 
> > in vsnprintf.c or use custom ones. They return the advanced 
> > dst pointer.
> > 
> > And at last, skip_non_format will decide if we want to 
> > consider non-format characters from fmt to be copied as common 
> > %c characters or if we want to ignore them (useful for 
> > vbin_printf()).
> 
> hm, that indeed looks very wide - storing into a binary buffer 
> does complicate the iterator interface significantly.
> 
> But at least vsnprintf() and bstr_printf() could be unified - 
> they both output into a string buffer, just have different 
> methods to iterate arguments.
> 
> 	Ingo

Right, ok I'm on it.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 1/3] add binary printf
  2009-02-26 17:45       ` Frederic Weisbecker
@ 2009-02-26 17:52         ` Ingo Molnar
  2009-02-26 18:34           ` Frederic Weisbecker
  0 siblings, 1 reply; 44+ messages in thread
From: Ingo Molnar @ 2009-02-26 17:52 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> On Thu, Feb 26, 2009 at 06:43:03PM +0100, Ingo Molnar wrote:
> > 
> > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > 
> > > On Thu, Feb 26, 2009 at 02:02:43PM +0100, Ingo Molnar wrote:
> > > > 
> > > > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > > > 
> > > > > From: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > > > 
> > > > > Impact: Add APIs for binary trace printk infrastructure
> > > > > 
> > > > > vbin_printf(): write args to binary buffer, string is copied
> > > > > when "%s" is occurred.
> > > > > 
> > > > > bstr_printf(): read from binary buffer for args and format a string
> > > > > 
> > > > > [fweisbec@gmail.com: ported to latest -tip]
> > > > > 
> > > > > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > > > Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> > > > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > > > ---
> > > > >  include/linux/string.h |    7 +
> > > > >  lib/Kconfig            |    3 +
> > > > >  lib/vsprintf.c         |  442 ++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >  3 files changed, 452 insertions(+), 0 deletions(-)
> > > > 
> > > > OK, it's a nice idea and speedup for printf based tracing - 
> > > > which is common and convenient. Would you mind to post the 
> > > > performance measurements you've done using the new bstr_printf() 
> > > > facility? (the nanoseconds latency figures you did in the timer 
> > > > irq in a system under load and on a system that is idle)
> > > 
> > > 
> > > Ok.
> > > 
> > >  
> > > > The new printf code itself should be done cleaner i think and is 
> > > > not acceptable in its current form.
> > > > 
> > > > These two new functions:
> > > > 
> > > > > +#ifdef CONFIG_BINARY_PRINTF
> > > > > +/*
> > > > > + * bprintf service:
> > > > > + * vbin_printf() - VA arguments to binary data
> > > > > + * bstr_printf() - Binary data to text string
> > > > > + */
> > > > 
> > > > Duplicate hundreds of lines of code into three large functions 
> > > > (vsnprintf, vbin_printf, bstr_printf). These functions only have 
> > > > a difference in the way the argument list is iterated and the 
> > > > way the parsed result is stored:
> > > > 
> > > >   vsnprintf:   iterates va_list, stores into string
> > > >   bstr_printf: iterates bin_buf, stores into string
> > > >   vbin_printf: iterates va_list, stores into bin_buf
> > > > 
> > > > We should try _much_ harder at unifying these functions before 
> > > > giving up and duplicating them...
> > > > 
> > > > An opaque in_buf/out_buf handle plus two helper function 
> > > > pointers passed in would be an obvious implementation.
> > > > 
> > > > That way we'd have a single generic (inline) function that knows 
> > > > about the printf format itself:
> > > > 
> > > >  __generic_printf(void *in_buf,
> > > > 		  void *out_buf,
> > > > 		  void * (*read_in_buf)(void **),
> > > > 		  void * (*store_out_buf)(void **));
> > > > 
> > > > And we'd have various variants for read_in_buf and 
> > > > store_out_buf. The generic function iterates the following way:
> > > > 
> > > > 	in_val = read_in_buf(&in_buf);
> > > > 	...
> > > > 	store_out_buf(&out_buf, in_val);
> > > > 
> > > > (where in_val is wide enough to store a single argument.) The 
> > > > iterators modify the in_buf / out_buf pointers. Argument 
> > > > skipping can be done by reading the in-buf and not using it. I 
> > > > think we can do it with just two iterator methods.
> > > > 
> > > > Or something like that - you get the idea. It can all be inlined 
> > > > so that we'd end up with essentially the same vsnprint() 
> > > > instruction sequence we have today.
> > > > 
> > > > 	Ingo
> > > 
> > > 
> > > Ok, I just looked deeply inside vsnprintf, and I don't think 
> > > such a generic interface would allow that. We need to know the 
> > > size of the argument, it's precision, width and flags.... And 
> > > we need to know if we want to skip the non format char.
> > > 
> > > 
> > > What do you think of the following:
> > > 
> > > __ generic_printf(void *in,
> > > 		  void *out,
> > > 		  void *(*read_in)(void **buf, int size),
> > > 		  void *(store_char)(char *dst, char *end, char val, int field_width, int flags),
> > > 		  void *(*store_string)(char *dst, char *end, char *val, int field_width, int precision, int flags),
> > > 		  void *(*store_pointer)(char type, char *dst, char *end, void *val,
> > > 					int field_width, int precision, int flags),
> > > 		  void *(*store_number)(char *dst, char *size, int base,int field_width, int precision, int flags),
> > > 		  bool skip_non_format
> > > 		  )
> > > 
> > > 
> > > Well, something like that...
> > > 
> > > read_in can advance the pointer to the buffer itself (buf can 
> > > be a va_args or u32 *) and it returns a value, void * is 
> > > generic for the type.
> > > 
> > > The storage functions are more specialized because of the 
> > > interpretation of flags, precision... So we can easily pass 
> > > the usual string(), pointer(), .... that are already present 
> > > in vsnprintf.c or use custom ones. They return the advanced 
> > > dst pointer.
> > > 
> > > And at last, skip_non_format will decide if we want to 
> > > consider non-format characters from fmt to be copied as common 
> > > %c characters or if we want to ignore them (useful for 
> > > vbin_printf()).
> > 
> > hm, that indeed looks very wide - storing into a binary buffer 
> > does complicate the iterator interface significantly.
> > 
> > But at least vsnprintf() and bstr_printf() could be unified - 
> > they both output into a string buffer, just have different 
> > methods to iterate arguments.
> > 
> > 	Ingo
> 
> Right, ok I'm on it.

hm, it would still be nice to get vbin_printf() into the same 
scheme too.

And i think we can do it by unconditionally tracking field type 
and width in the generic helper - instead of passing this across 
the abstraction barrier like your proposal did. vsnprintf() and 
bstr_printf() wont make use of it - but vbin_printf() will.

Am i missing anything?

	Ingo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 1/3] add binary printf
  2009-02-26 17:52         ` Ingo Molnar
@ 2009-02-26 18:34           ` Frederic Weisbecker
  2009-02-26 18:45             ` Ingo Molnar
  2009-02-26 18:52             ` Frederic Weisbecker
  0 siblings, 2 replies; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-26 18:34 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Thu, Feb 26, 2009 at 06:52:25PM +0100, Ingo Molnar wrote:
> 
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > On Thu, Feb 26, 2009 at 06:43:03PM +0100, Ingo Molnar wrote:
> > > 
> > > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > > 
> > > > On Thu, Feb 26, 2009 at 02:02:43PM +0100, Ingo Molnar wrote:
> > > > > 
> > > > > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > > > > 
> > > > > > From: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > > > > 
> > > > > > Impact: Add APIs for binary trace printk infrastructure
> > > > > > 
> > > > > > vbin_printf(): write args to binary buffer, string is copied
> > > > > > when "%s" is occurred.
> > > > > > 
> > > > > > bstr_printf(): read from binary buffer for args and format a string
> > > > > > 
> > > > > > [fweisbec@gmail.com: ported to latest -tip]
> > > > > > 
> > > > > > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > > > > Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> > > > > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > > > > ---
> > > > > >  include/linux/string.h |    7 +
> > > > > >  lib/Kconfig            |    3 +
> > > > > >  lib/vsprintf.c         |  442 ++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > >  3 files changed, 452 insertions(+), 0 deletions(-)
> > > > > 
> > > > > OK, it's a nice idea and speedup for printf based tracing - 
> > > > > which is common and convenient. Would you mind to post the 
> > > > > performance measurements you've done using the new bstr_printf() 
> > > > > facility? (the nanoseconds latency figures you did in the timer 
> > > > > irq in a system under load and on a system that is idle)
> > > > 
> > > > 
> > > > Ok.
> > > > 
> > > >  
> > > > > The new printf code itself should be done cleaner i think and is 
> > > > > not acceptable in its current form.
> > > > > 
> > > > > These two new functions:
> > > > > 
> > > > > > +#ifdef CONFIG_BINARY_PRINTF
> > > > > > +/*
> > > > > > + * bprintf service:
> > > > > > + * vbin_printf() - VA arguments to binary data
> > > > > > + * bstr_printf() - Binary data to text string
> > > > > > + */
> > > > > 
> > > > > Duplicate hundreds of lines of code into three large functions 
> > > > > (vsnprintf, vbin_printf, bstr_printf). These functions only have 
> > > > > a difference in the way the argument list is iterated and the 
> > > > > way the parsed result is stored:
> > > > > 
> > > > >   vsnprintf:   iterates va_list, stores into string
> > > > >   bstr_printf: iterates bin_buf, stores into string
> > > > >   vbin_printf: iterates va_list, stores into bin_buf
> > > > > 
> > > > > We should try _much_ harder at unifying these functions before 
> > > > > giving up and duplicating them...
> > > > > 
> > > > > An opaque in_buf/out_buf handle plus two helper function 
> > > > > pointers passed in would be an obvious implementation.
> > > > > 
> > > > > That way we'd have a single generic (inline) function that knows 
> > > > > about the printf format itself:
> > > > > 
> > > > >  __generic_printf(void *in_buf,
> > > > > 		  void *out_buf,
> > > > > 		  void * (*read_in_buf)(void **),
> > > > > 		  void * (*store_out_buf)(void **));
> > > > > 
> > > > > And we'd have various variants for read_in_buf and 
> > > > > store_out_buf. The generic function iterates the following way:
> > > > > 
> > > > > 	in_val = read_in_buf(&in_buf);
> > > > > 	...
> > > > > 	store_out_buf(&out_buf, in_val);
> > > > > 
> > > > > (where in_val is wide enough to store a single argument.) The 
> > > > > iterators modify the in_buf / out_buf pointers. Argument 
> > > > > skipping can be done by reading the in-buf and not using it. I 
> > > > > think we can do it with just two iterator methods.
> > > > > 
> > > > > Or something like that - you get the idea. It can all be inlined 
> > > > > so that we'd end up with essentially the same vsnprint() 
> > > > > instruction sequence we have today.
> > > > > 
> > > > > 	Ingo
> > > > 
> > > > 
> > > > Ok, I just looked deeply inside vsnprintf, and I don't think 
> > > > such a generic interface would allow that. We need to know the 
> > > > size of the argument, it's precision, width and flags.... And 
> > > > we need to know if we want to skip the non format char.
> > > > 
> > > > 
> > > > What do you think of the following:
> > > > 
> > > > __ generic_printf(void *in,
> > > > 		  void *out,
> > > > 		  void *(*read_in)(void **buf, int size),
> > > > 		  void *(store_char)(char *dst, char *end, char val, int field_width, int flags),
> > > > 		  void *(*store_string)(char *dst, char *end, char *val, int field_width, int precision, int flags),
> > > > 		  void *(*store_pointer)(char type, char *dst, char *end, void *val,
> > > > 					int field_width, int precision, int flags),
> > > > 		  void *(*store_number)(char *dst, char *size, int base,int field_width, int precision, int flags),
> > > > 		  bool skip_non_format
> > > > 		  )
> > > > 
> > > > 
> > > > Well, something like that...
> > > > 
> > > > read_in can advance the pointer to the buffer itself (buf can 
> > > > be a va_args or u32 *) and it returns a value, void * is 
> > > > generic for the type.
> > > > 
> > > > The storage functions are more specialized because of the 
> > > > interpretation of flags, precision... So we can easily pass 
> > > > the usual string(), pointer(), .... that are already present 
> > > > in vsnprintf.c or use custom ones. They return the advanced 
> > > > dst pointer.
> > > > 
> > > > And at last, skip_non_format will decide if we want to 
> > > > consider non-format characters from fmt to be copied as common 
> > > > %c characters or if we want to ignore them (useful for 
> > > > vbin_printf()).
> > > 
> > > hm, that indeed looks very wide - storing into a binary buffer 
> > > does complicate the iterator interface significantly.
> > > 
> > > But at least vsnprintf() and bstr_printf() could be unified - 
> > > they both output into a string buffer, just have different 
> > > methods to iterate arguments.
> > > 
> > > 	Ingo
> > 
> > Right, ok I'm on it.
> 
> hm, it would still be nice to get vbin_printf() into the same 
> scheme too.
> 
> And i think we can do it by unconditionally tracking field type 
> and width in the generic helper - instead of passing this across 
> the abstraction barrier like your proposal did. vsnprintf() and 
> bstr_printf() wont make use of it - but vbin_printf() will.
> 
> Am i missing anything?
> 
> 	Ingo

Hm, I have some trouble to visualize it globally.

vsnprintf: takes char * for dest and va_list as src, write formatted in dest
bstr_printf: takes char * as dest and u32 * as src, read from src is specialized
vbin_printf: takes u32 * buf as dest and va_list as src, write to dest is specialized

The only thing that can be generic between the three is the format decoding.
All in-out operations must be abstracted if we want a common interface from the three.

So I'm not sure I have the choice.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 1/3] add binary printf
  2009-02-26 18:34           ` Frederic Weisbecker
@ 2009-02-26 18:45             ` Ingo Molnar
  2009-02-26 18:52             ` Frederic Weisbecker
  1 sibling, 0 replies; 44+ messages in thread
From: Ingo Molnar @ 2009-02-26 18:45 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> On Thu, Feb 26, 2009 at 06:52:25PM +0100, Ingo Molnar wrote:
> > 
> > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > 
> > > On Thu, Feb 26, 2009 at 06:43:03PM +0100, Ingo Molnar wrote:
> > > > 
> > > > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > > > 
> > > > > On Thu, Feb 26, 2009 at 02:02:43PM +0100, Ingo Molnar wrote:
> > > > > > 
> > > > > > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > > > > > 
> > > > > > > From: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > > > > > 
> > > > > > > Impact: Add APIs for binary trace printk infrastructure
> > > > > > > 
> > > > > > > vbin_printf(): write args to binary buffer, string is copied
> > > > > > > when "%s" is occurred.
> > > > > > > 
> > > > > > > bstr_printf(): read from binary buffer for args and format a string
> > > > > > > 
> > > > > > > [fweisbec@gmail.com: ported to latest -tip]
> > > > > > > 
> > > > > > > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > > > > > Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> > > > > > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > > > > > ---
> > > > > > >  include/linux/string.h |    7 +
> > > > > > >  lib/Kconfig            |    3 +
> > > > > > >  lib/vsprintf.c         |  442 ++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > > >  3 files changed, 452 insertions(+), 0 deletions(-)
> > > > > > 
> > > > > > OK, it's a nice idea and speedup for printf based tracing - 
> > > > > > which is common and convenient. Would you mind to post the 
> > > > > > performance measurements you've done using the new bstr_printf() 
> > > > > > facility? (the nanoseconds latency figures you did in the timer 
> > > > > > irq in a system under load and on a system that is idle)
> > > > > 
> > > > > 
> > > > > Ok.
> > > > > 
> > > > >  
> > > > > > The new printf code itself should be done cleaner i think and is 
> > > > > > not acceptable in its current form.
> > > > > > 
> > > > > > These two new functions:
> > > > > > 
> > > > > > > +#ifdef CONFIG_BINARY_PRINTF
> > > > > > > +/*
> > > > > > > + * bprintf service:
> > > > > > > + * vbin_printf() - VA arguments to binary data
> > > > > > > + * bstr_printf() - Binary data to text string
> > > > > > > + */
> > > > > > 
> > > > > > Duplicate hundreds of lines of code into three large functions 
> > > > > > (vsnprintf, vbin_printf, bstr_printf). These functions only have 
> > > > > > a difference in the way the argument list is iterated and the 
> > > > > > way the parsed result is stored:
> > > > > > 
> > > > > >   vsnprintf:   iterates va_list, stores into string
> > > > > >   bstr_printf: iterates bin_buf, stores into string
> > > > > >   vbin_printf: iterates va_list, stores into bin_buf
> > > > > > 
> > > > > > We should try _much_ harder at unifying these functions before 
> > > > > > giving up and duplicating them...
> > > > > > 
> > > > > > An opaque in_buf/out_buf handle plus two helper function 
> > > > > > pointers passed in would be an obvious implementation.
> > > > > > 
> > > > > > That way we'd have a single generic (inline) function that knows 
> > > > > > about the printf format itself:
> > > > > > 
> > > > > >  __generic_printf(void *in_buf,
> > > > > > 		  void *out_buf,
> > > > > > 		  void * (*read_in_buf)(void **),
> > > > > > 		  void * (*store_out_buf)(void **));
> > > > > > 
> > > > > > And we'd have various variants for read_in_buf and 
> > > > > > store_out_buf. The generic function iterates the following way:
> > > > > > 
> > > > > > 	in_val = read_in_buf(&in_buf);
> > > > > > 	...
> > > > > > 	store_out_buf(&out_buf, in_val);
> > > > > > 
> > > > > > (where in_val is wide enough to store a single argument.) The 
> > > > > > iterators modify the in_buf / out_buf pointers. Argument 
> > > > > > skipping can be done by reading the in-buf and not using it. I 
> > > > > > think we can do it with just two iterator methods.
> > > > > > 
> > > > > > Or something like that - you get the idea. It can all be inlined 
> > > > > > so that we'd end up with essentially the same vsnprint() 
> > > > > > instruction sequence we have today.
> > > > > > 
> > > > > > 	Ingo
> > > > > 
> > > > > 
> > > > > Ok, I just looked deeply inside vsnprintf, and I don't think 
> > > > > such a generic interface would allow that. We need to know the 
> > > > > size of the argument, it's precision, width and flags.... And 
> > > > > we need to know if we want to skip the non format char.
> > > > > 
> > > > > 
> > > > > What do you think of the following:
> > > > > 
> > > > > __ generic_printf(void *in,
> > > > > 		  void *out,
> > > > > 		  void *(*read_in)(void **buf, int size),
> > > > > 		  void *(store_char)(char *dst, char *end, char val, int field_width, int flags),
> > > > > 		  void *(*store_string)(char *dst, char *end, char *val, int field_width, int precision, int flags),
> > > > > 		  void *(*store_pointer)(char type, char *dst, char *end, void *val,
> > > > > 					int field_width, int precision, int flags),
> > > > > 		  void *(*store_number)(char *dst, char *size, int base,int field_width, int precision, int flags),
> > > > > 		  bool skip_non_format
> > > > > 		  )
> > > > > 
> > > > > 
> > > > > Well, something like that...
> > > > > 
> > > > > read_in can advance the pointer to the buffer itself (buf can 
> > > > > be a va_args or u32 *) and it returns a value, void * is 
> > > > > generic for the type.
> > > > > 
> > > > > The storage functions are more specialized because of the 
> > > > > interpretation of flags, precision... So we can easily pass 
> > > > > the usual string(), pointer(), .... that are already present 
> > > > > in vsnprintf.c or use custom ones. They return the advanced 
> > > > > dst pointer.
> > > > > 
> > > > > And at last, skip_non_format will decide if we want to 
> > > > > consider non-format characters from fmt to be copied as common 
> > > > > %c characters or if we want to ignore them (useful for 
> > > > > vbin_printf()).
> > > > 
> > > > hm, that indeed looks very wide - storing into a binary buffer 
> > > > does complicate the iterator interface significantly.
> > > > 
> > > > But at least vsnprintf() and bstr_printf() could be unified - 
> > > > they both output into a string buffer, just have different 
> > > > methods to iterate arguments.
> > > > 
> > > > 	Ingo
> > > 
> > > Right, ok I'm on it.
> > 
> > hm, it would still be nice to get vbin_printf() into the same 
> > scheme too.
> > 
> > And i think we can do it by unconditionally tracking field type 
> > and width in the generic helper - instead of passing this across 
> > the abstraction barrier like your proposal did. vsnprintf() and 
> > bstr_printf() wont make use of it - but vbin_printf() will.
> > 
> > Am i missing anything?
> > 
> > 	Ingo
> 
> Hm, I have some trouble to visualize it globally.
> 
> vsnprintf: takes char * for dest and va_list as src, write formatted in dest
> bstr_printf: takes char * as dest and u32 * as src, read from src is specialized
> vbin_printf: takes u32 * buf as dest and va_list as src, write to dest is specialized
> 
> The only thing that can be generic between the three is the 
> format decoding. All in-out operations must be abstracted if 
> we want a common interface from the three.

the thing is, the format decoding (and subsequent rendering of 
characters) is the only _interesting_ thing that happens here. 
So that is the thing we want to duplicate the least.

But yeah, i can see the complexity explosion with the 
abstraction interface. Too bad. And we use both new APIs.

	Ingo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 1/3] add binary printf
  2009-02-26 18:34           ` Frederic Weisbecker
  2009-02-26 18:45             ` Ingo Molnar
@ 2009-02-26 18:52             ` Frederic Weisbecker
  2009-02-26 18:56               ` Ingo Molnar
  1 sibling, 1 reply; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-26 18:52 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Thu, Feb 26, 2009 at 07:34:16PM +0100, Frederic Weisbecker wrote:
> On Thu, Feb 26, 2009 at 06:52:25PM +0100, Ingo Molnar wrote:
> > 
> > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > 
> > > On Thu, Feb 26, 2009 at 06:43:03PM +0100, Ingo Molnar wrote:
> > > > 
> > > > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > > > 
> > > > > On Thu, Feb 26, 2009 at 02:02:43PM +0100, Ingo Molnar wrote:
> > > > > > 
> > > > > > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > > > > > 
> > > > > > > From: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > > > > > 
> > > > > > > Impact: Add APIs for binary trace printk infrastructure
> > > > > > > 
> > > > > > > vbin_printf(): write args to binary buffer, string is copied
> > > > > > > when "%s" is occurred.
> > > > > > > 
> > > > > > > bstr_printf(): read from binary buffer for args and format a string
> > > > > > > 
> > > > > > > [fweisbec@gmail.com: ported to latest -tip]
> > > > > > > 
> > > > > > > Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> > > > > > > Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> > > > > > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > > > > > ---
> > > > > > >  include/linux/string.h |    7 +
> > > > > > >  lib/Kconfig            |    3 +
> > > > > > >  lib/vsprintf.c         |  442 ++++++++++++++++++++++++++++++++++++++++++++++++
> > > > > > >  3 files changed, 452 insertions(+), 0 deletions(-)
> > > > > > 
> > > > > > OK, it's a nice idea and speedup for printf based tracing - 
> > > > > > which is common and convenient. Would you mind to post the 
> > > > > > performance measurements you've done using the new bstr_printf() 
> > > > > > facility? (the nanoseconds latency figures you did in the timer 
> > > > > > irq in a system under load and on a system that is idle)
> > > > > 
> > > > > 
> > > > > Ok.
> > > > > 
> > > > >  
> > > > > > The new printf code itself should be done cleaner i think and is 
> > > > > > not acceptable in its current form.
> > > > > > 
> > > > > > These two new functions:
> > > > > > 
> > > > > > > +#ifdef CONFIG_BINARY_PRINTF
> > > > > > > +/*
> > > > > > > + * bprintf service:
> > > > > > > + * vbin_printf() - VA arguments to binary data
> > > > > > > + * bstr_printf() - Binary data to text string
> > > > > > > + */
> > > > > > 
> > > > > > Duplicate hundreds of lines of code into three large functions 
> > > > > > (vsnprintf, vbin_printf, bstr_printf). These functions only have 
> > > > > > a difference in the way the argument list is iterated and the 
> > > > > > way the parsed result is stored:
> > > > > > 
> > > > > >   vsnprintf:   iterates va_list, stores into string
> > > > > >   bstr_printf: iterates bin_buf, stores into string
> > > > > >   vbin_printf: iterates va_list, stores into bin_buf
> > > > > > 
> > > > > > We should try _much_ harder at unifying these functions before 
> > > > > > giving up and duplicating them...
> > > > > > 
> > > > > > An opaque in_buf/out_buf handle plus two helper function 
> > > > > > pointers passed in would be an obvious implementation.
> > > > > > 
> > > > > > That way we'd have a single generic (inline) function that knows 
> > > > > > about the printf format itself:
> > > > > > 
> > > > > >  __generic_printf(void *in_buf,
> > > > > > 		  void *out_buf,
> > > > > > 		  void * (*read_in_buf)(void **),
> > > > > > 		  void * (*store_out_buf)(void **));
> > > > > > 
> > > > > > And we'd have various variants for read_in_buf and 
> > > > > > store_out_buf. The generic function iterates the following way:
> > > > > > 
> > > > > > 	in_val = read_in_buf(&in_buf);
> > > > > > 	...
> > > > > > 	store_out_buf(&out_buf, in_val);
> > > > > > 
> > > > > > (where in_val is wide enough to store a single argument.) The 
> > > > > > iterators modify the in_buf / out_buf pointers. Argument 
> > > > > > skipping can be done by reading the in-buf and not using it. I 
> > > > > > think we can do it with just two iterator methods.
> > > > > > 
> > > > > > Or something like that - you get the idea. It can all be inlined 
> > > > > > so that we'd end up with essentially the same vsnprint() 
> > > > > > instruction sequence we have today.
> > > > > > 
> > > > > > 	Ingo
> > > > > 
> > > > > 
> > > > > Ok, I just looked deeply inside vsnprintf, and I don't think 
> > > > > such a generic interface would allow that. We need to know the 
> > > > > size of the argument, it's precision, width and flags.... And 
> > > > > we need to know if we want to skip the non format char.
> > > > > 
> > > > > 
> > > > > What do you think of the following:
> > > > > 
> > > > > __ generic_printf(void *in,
> > > > > 		  void *out,
> > > > > 		  void *(*read_in)(void **buf, int size),
> > > > > 		  void *(store_char)(char *dst, char *end, char val, int field_width, int flags),
> > > > > 		  void *(*store_string)(char *dst, char *end, char *val, int field_width, int precision, int flags),
> > > > > 		  void *(*store_pointer)(char type, char *dst, char *end, void *val,
> > > > > 					int field_width, int precision, int flags),
> > > > > 		  void *(*store_number)(char *dst, char *size, int base,int field_width, int precision, int flags),
> > > > > 		  bool skip_non_format
> > > > > 		  )
> > > > > 
> > > > > 
> > > > > Well, something like that...
> > > > > 
> > > > > read_in can advance the pointer to the buffer itself (buf can 
> > > > > be a va_args or u32 *) and it returns a value, void * is 
> > > > > generic for the type.
> > > > > 
> > > > > The storage functions are more specialized because of the 
> > > > > interpretation of flags, precision... So we can easily pass 
> > > > > the usual string(), pointer(), .... that are already present 
> > > > > in vsnprintf.c or use custom ones. They return the advanced 
> > > > > dst pointer.
> > > > > 
> > > > > And at last, skip_non_format will decide if we want to 
> > > > > consider non-format characters from fmt to be copied as common 
> > > > > %c characters or if we want to ignore them (useful for 
> > > > > vbin_printf()).
> > > > 
> > > > hm, that indeed looks very wide - storing into a binary buffer 
> > > > does complicate the iterator interface significantly.
> > > > 
> > > > But at least vsnprintf() and bstr_printf() could be unified - 
> > > > they both output into a string buffer, just have different 
> > > > methods to iterate arguments.
> > > > 
> > > > 	Ingo
> > > 
> > > Right, ok I'm on it.
> > 
> > hm, it would still be nice to get vbin_printf() into the same 
> > scheme too.
> > 
> > And i think we can do it by unconditionally tracking field type 
> > and width in the generic helper - instead of passing this across 
> > the abstraction barrier like your proposal did. vsnprintf() and 
> > bstr_printf() wont make use of it - but vbin_printf() will.
> > 
> > Am i missing anything?
> > 
> > 	Ingo
> 
> Hm, I have some trouble to visualize it globally.
> 
> vsnprintf: takes char * for dest and va_list as src, write formatted in dest
> bstr_printf: takes char * as dest and u32 * as src, read from src is specialized
> vbin_printf: takes u32 * buf as dest and va_list as src, write to dest is specialized
> 
> The only thing that can be generic between the three is the format decoding.
> All in-out operations must be abstracted if we want a common interface from the three.
> 
> So I'm not sure I have the choice.
> 


Instead of calling the in/out helper from the decoder,
why not calling the decoder from these three functions and let them
take the appropriate actions for each decoded format token?

Naive example:

bstr_printf() {
	while (decode_format(...))
		if (type == number)
			read_number_from_buf()
			str = number(....)
		....
}

vsnprintf {
	while (decode_format(...))
		if (type == number)
			var_arg(...)
			str = number(....)
		....
}

vbin_printf {
	while (decode_format(...))
		if (type == number)
			var_arg(...)
			write_number_to_buffer()
		...
}

And the standard in/out pieces can be invoked through helpers.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 1/3] add binary printf
  2009-02-26 18:52             ` Frederic Weisbecker
@ 2009-02-26 18:56               ` Ingo Molnar
  2009-02-26 19:05                 ` Frederic Weisbecker
  2009-02-27  6:19                 ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Frederic Weisbecker
  0 siblings, 2 replies; 44+ messages in thread
From: Ingo Molnar @ 2009-02-26 18:56 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> Instead of calling the in/out helper from the decoder, why not 
> calling the decoder from these three functions and let them 
> take the appropriate actions for each decoded format token?
> 
> Naive example:
> 
> bstr_printf() {
> 	while (decode_format(...))
> 		if (type == number)
> 			read_number_from_buf()
> 			str = number(....)
> 		....
> }
> 
> vsnprintf {
> 	while (decode_format(...))
> 		if (type == number)
> 			var_arg(...)
> 			str = number(....)
> 		....
> }
> 
> vbin_printf {
> 	while (decode_format(...))
> 		if (type == number)
> 			var_arg(...)
> 			write_number_to_buffer()
> 		...
> }
> 
> And the standard in/out pieces can be invoked through helpers.

ok, that looks rather clean too. And maybe vsnprintf() and 
bstr_printf() could be further unified?

I guess it depends on how it all looks like in the end. Only one 
way to find out ...

	Ingo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 1/3] add binary printf
  2009-02-26 18:56               ` Ingo Molnar
@ 2009-02-26 19:05                 ` Frederic Weisbecker
  2009-02-27  6:19                 ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Frederic Weisbecker
  1 sibling, 0 replies; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-26 19:05 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Thu, Feb 26, 2009 at 07:56:35PM +0100, Ingo Molnar wrote:
> 
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > Instead of calling the in/out helper from the decoder, why not 
> > calling the decoder from these three functions and let them 
> > take the appropriate actions for each decoded format token?
> > 
> > Naive example:
> > 
> > bstr_printf() {
> > 	while (decode_format(...))
> > 		if (type == number)
> > 			read_number_from_buf()
> > 			str = number(....)
> > 		....
> > }
> > 
> > vsnprintf {
> > 	while (decode_format(...))
> > 		if (type == number)
> > 			var_arg(...)
> > 			str = number(....)
> > 		....
> > }
> > 
> > vbin_printf {
> > 	while (decode_format(...))
> > 		if (type == number)
> > 			var_arg(...)
> > 			write_number_to_buffer()
> > 		...
> > }
> > 
> > And the standard in/out pieces can be invoked through helpers.
> 
> ok, that looks rather clean too. And maybe vsnprintf() and 
> bstr_printf() could be further unified?


Yes, read process can be unified between vbin_printf and vsnprintf
and write process can be unified between bstr_printf and vsnprintf,
both further than the above example.


> I guess it depends on how it all looks like in the end. Only one 
> way to find out ...


Let's try, the practice will tell us more, I'm on it.

Thanks.

> 	Ingo


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-26 18:56               ` Ingo Molnar
  2009-02-26 19:05                 ` Frederic Weisbecker
@ 2009-02-27  6:19                 ` Frederic Weisbecker
  2009-02-27  6:46                   ` Andrew Morton
  2009-02-28  0:39                   ` Linus Torvalds
  1 sibling, 2 replies; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-27  6:19 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Thu, Feb 26, 2009 at 07:56:35PM +0100, Ingo Molnar wrote:
> 
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > Instead of calling the in/out helper from the decoder, why not 
> > calling the decoder from these three functions and let them 
> > take the appropriate actions for each decoded format token?
> > 
> > Naive example:
> > 
> > bstr_printf() {
> > 	while (decode_format(...))
> > 		if (type == number)
> > 			read_number_from_buf()
> > 			str = number(....)
> > 		....
> > }
> > 
> > vsnprintf {
> > 	while (decode_format(...))
> > 		if (type == number)
> > 			var_arg(...)
> > 			str = number(....)
> > 		....
> > }
> > 
> > vbin_printf {
> > 	while (decode_format(...))
> > 		if (type == number)
> > 			var_arg(...)
> > 			write_number_to_buffer()
> > 		...
> > }
> > 
> > And the standard in/out pieces can be invoked through helpers.
> 
> ok, that looks rather clean too. And maybe vsnprintf() and 
> bstr_printf() could be further unified?
> 
> I guess it depends on how it all looks like in the end. Only one 
> way to find out ...
> 
> 	Ingo


Ok, here is a try, I've mostly unified the format decoding bits.
Most interesting other helpers were already implemented (pointer(),
string() and number()

Note: it is based on top of Lai's work, which means you will need the ftrace_bprintk()
patches if you want to give it a try.

I've tested it successfully (through the usual printk messages and the events tracing).

---
Subject: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users


An new optimization is making its way to ftrace. Its purpose is to
make ftrace_printk() consuming less memory and be faster.

Written by Lai Jiangshan, the approach is to delay the formatting
job from tracing time to output time.
Currently, a call to ftrace_printk will format the whole string and
insert it into the ring buffer. Then you can read it on /debug/tracing/trace
file.

The new implementation stores the address of the format string and
the binary parameters into the ring buffer, making the packet more compact
and faster to insert.
Later, when the user exports the traces, the format string is retrieved
with the binary parameters and the formatting job is eventually done.

Here is the result of a small comparative benchmark while putting the following
ftrace_printk on the timer interrupt. ftrace_printk is the old implementation,
ftrace_bprintk is a the new one:

ftrace_printk("This is the timer interrupt: %llu", jiffies_64);

After some time running on low load (no X, no really active processes):

ftrace_printk:  duration average: 2044 ns, avg of bytes stored per entry: 39
ftrace_bprintk: duration average: 1426 ns, avg of bytes stored per entry: 16

Higher load (started X and launched a cat running on an X console looping on
traces printing):

ftrace_printk:  duration average: 8812 ns
ftrace_bprintk: duration average: 2611 ns

Which means the new implementation can be 70 % faster on higher load.
And it consumes lesser memory on the ring buffer.

The curent implementation rewrites a lot of format decoding bits from
vsnprintf() function, making now 3 differents functions to maintain
in their duplicated parts of printf format decoding bits.

Suggested by Ingo Molnar, this patch tries to factorize the most
possible common bits from these functions.
The real common part between them is the format decoding. Although
they do somewhat similar jobs, their way to export or import the parameters
is very different. Thus, only the decoding layer is extracted, unless you see
other parts that could be worth factorized.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 lib/vsprintf.c |  799 +++++++++++++++++++++++++++++++-------------------------
 1 files changed, 448 insertions(+), 351 deletions(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3543bbe..1c32ca2 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -700,6 +700,229 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
 	return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags);
 }
 
+
+enum format_type {
+	FORMAT_TYPE_NONE, /* Just a string part */
+	FORMAT_TYPE_WITDH,
+	FORMAT_TYPE_PRECISION,
+	FORMAT_TYPE_CHAR,
+	FORMAT_TYPE_STR,
+	FORMAT_TYPE_PTR,
+	FORMAT_TYPE_PERCENT_CHAR,
+	FORMAT_TYPE_INVALID,
+	FORMAT_TYPE_LONG_LONG,
+	FORMAT_TYPE_ULONG,
+	FORMAT_TYPE_LONG,
+	FORMAT_TYPE_USHORT,
+	FORMAT_TYPE_SHORT,
+	FORMAT_TYPE_UINT,
+	FORMAT_TYPE_INT,
+	FORMAT_TYPE_NRCHARS,
+	FORMAT_TYPE_SIZE_T,
+	FORMAT_TYPE_PTRDIFF
+};
+
+/*
+ * Helper function to decode printf style format.
+ * Each call decode a token from the format and return the
+ * number of characters read (or likely the delta where it wants
+ * to go on the next call).
+ * The decoded token is returned through the parameters
+ *
+ * @fmt: the format string
+ * @type of the token returned
+ * @flags: various flags such as +, -, # tokens..
+ * @field_width: overwritten width
+ * @base: base of the number (octal, hex, ...)
+ * @precision: precision of a number
+ * @qualifier: qualifier of a number (long, size_t, ...)
+ */
+static int format_decode(const char *fmt, enum format_type *type, int *flags,
+			  int *field_width, int *base, int *precision,
+			  int *qualifier)
+{
+	const char *start = fmt;
+	bool sign = false;
+
+	/* we finished early by reading the field width */
+	if (*type == FORMAT_TYPE_WITDH) {
+		if (*field_width < 0) {
+			*field_width = -*field_width;
+			*flags |= LEFT;
+		}
+		*type = FORMAT_TYPE_NONE;
+		goto precision;
+	}
+
+	/* we finished early by reading the precision */
+	if (*type == FORMAT_TYPE_PRECISION) {
+		if (*precision < 0)
+			*precision = 0;
+
+		*type = FORMAT_TYPE_NONE;
+		goto qualifier;
+	}
+
+	/* By default */
+	*type = FORMAT_TYPE_NONE;
+
+	for (; *fmt ; ++fmt) {
+		if (*fmt == '%')
+			break;
+	}
+
+	/* Return the current non-format string */
+	if (fmt != start || !*fmt)
+		return fmt - start;
+
+	/* Process flags */
+	*flags = 0;
+
+	while (1) { /* this also skips first '%' */
+		bool found = true;
+
+		++fmt;
+
+		switch (*fmt) {
+		case '-':
+			*flags |= LEFT;
+			break;
+		case '+':
+			*flags |= PLUS;
+			break;
+		case ' ':
+			*flags |= SPACE;
+			break;
+		case '#':
+			*flags |= SPECIAL;
+			break;
+		case '0':
+			*flags |= ZEROPAD;
+			break;
+		default:
+			found = false;
+		}
+
+		if (!found)
+			break;
+	}
+
+	/* get field width */
+	*field_width = -1;
+
+	if (isdigit(*fmt))
+		*field_width = skip_atoi(&fmt);
+	else if (*fmt == '*') {
+		/* it's the next argument */
+		*type = FORMAT_TYPE_WITDH;
+		return ++fmt - start;
+	}
+
+precision:
+	/* get the precision */
+	*precision = -1;
+	if (*fmt == '.') {
+		++fmt;
+		if (isdigit(*fmt)) {
+			*precision = skip_atoi(&fmt);
+			if (*precision < 0)
+				*precision = 0;
+		} else if (*fmt == '*') {
+			/* it's the next argument */
+			*type = FORMAT_TYPE_WITDH;
+			return ++fmt - start;
+		}
+	}
+
+qualifier:
+	/* get the conversion qualifier */
+	*qualifier = -1;
+	if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+	    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
+		*qualifier = *fmt;
+		++fmt;
+		if (*qualifier == 'l' && *fmt == 'l') {
+			*qualifier = 'L';
+			++fmt;
+		}
+	}
+
+	/* default base */
+	*base = 10;
+	switch (*fmt) {
+	case 'c':
+		*type = FORMAT_TYPE_CHAR;
+		return ++fmt - start;
+
+	case 's':
+		*type = FORMAT_TYPE_STR;
+		return ++fmt - start;
+
+	case 'p':
+		*type = FORMAT_TYPE_PTR;
+		return fmt - start;
+		/* skip alnum */
+
+	case 'n':
+		/* FIXME:
+		* What does C99 say about the overflow case here? */
+		*type = FORMAT_TYPE_NRCHARS;
+		return ++fmt - start;
+
+	case '%':
+		*type = FORMAT_TYPE_PERCENT_CHAR;
+		return ++fmt - start;
+
+	/* integer number formats - set up the flags and "break" */
+	case 'o':
+		*base = 8;
+		break;
+
+	case 'x':
+		*flags |= SMALL;
+
+	case 'X':
+		*base = 16;
+		break;
+
+	case 'd':
+	case 'i':
+		sign = true;
+	case 'u':
+		break;
+
+	default:
+		*type = FORMAT_TYPE_INVALID;
+		return fmt - start;
+	}
+
+	if (*qualifier == 'L')
+		*type = FORMAT_TYPE_LONG_LONG;
+	else if (*qualifier == 'l') {
+		if (sign)
+			*type = FORMAT_TYPE_LONG;
+		else
+			*type = FORMAT_TYPE_ULONG;
+	} else if (*qualifier == 'Z' || *qualifier == 'z') {
+		*type = FORMAT_TYPE_SIZE_T;
+	} else if (*qualifier == 't') {
+		*type = FORMAT_TYPE_PTRDIFF;
+	} else if (*qualifier == 'h') {
+		if (sign)
+			*type = FORMAT_TYPE_SHORT;
+		else
+			*type = FORMAT_TYPE_USHORT;
+	} else {
+		if (sign)
+			*type = FORMAT_TYPE_INT;
+		else
+			*type = FORMAT_TYPE_UINT;
+	}
+
+	return ++fmt - start;
+}
+
+
 /**
  * vsnprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
@@ -726,15 +949,17 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
 int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
 	unsigned long long num;
-	int base;
+	int base = 0;
 	char *str, *end, c;
+	int read;
+	enum format_type type = FORMAT_TYPE_NONE;
 
-	int flags;		/* flags to number() */
+	int flags = 0;		/* flags to number() */
 
-	int field_width;	/* width of output field */
-	int precision;		/* min. # of digits for integers; max
+	int field_width = 0;	/* width of output field */
+	int precision = 0;	/* min. # of digits for integers; max
 				   number of chars for from string */
-	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
+	int qualifier = 0;	/* 'h', 'l', or 'L' for integer fields */
 				/* 'z' support added 23/7/1999 S.H.    */
 				/* 'z' changed to 'Z' --davidm 1/25/99 */
 				/* 't' added for ptrdiff_t */
@@ -758,184 +983,133 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 		size = end - buf;
 	}
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%') {
-			if (str < end)
-				*str = *fmt;
-			++str;
-			continue;
-		}
+	while (*fmt) {
+		char *old_fmt = (char *)fmt;
 
-		/* process flags */
-		flags = 0;
-		repeat:
-			++fmt;		/* this also skips first '%' */
-			switch (*fmt) {
-				case '-': flags |= LEFT; goto repeat;
-				case '+': flags |= PLUS; goto repeat;
-				case ' ': flags |= SPACE; goto repeat;
-				case '#': flags |= SPECIAL; goto repeat;
-				case '0': flags |= ZEROPAD; goto repeat;
-			}
+		read = format_decode(fmt, &type, &flags, &field_width, &base,
+					&precision, &qualifier);
 
-		/* get field width */
-		field_width = -1;
-		if (isdigit(*fmt))
-			field_width = skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			field_width = va_arg(args, int);
-			if (field_width < 0) {
-				field_width = -field_width;
-				flags |= LEFT;
-			}
-		}
+		fmt += read;
 
-		/* get the precision */
-		precision = -1;
-		if (*fmt == '.') {
-			++fmt;	
-			if (isdigit(*fmt))
-				precision = skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				precision = va_arg(args, int);
+		switch (type) {
+		case FORMAT_TYPE_NONE: {
+			int copy = read;
+			if (str < end) {
+				if (copy > end - str)
+					copy = end - str;
+				memcpy(str, old_fmt, copy);
 			}
-			if (precision < 0)
-				precision = 0;
+			str += read;
+			break;
 		}
 
-		/* get the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt =='Z' || *fmt == 'z' || *fmt == 't') {
-			qualifier = *fmt;
-			++fmt;
-			if (qualifier == 'l' && *fmt == 'l') {
-				qualifier = 'L';
-				++fmt;
-			}
-		}
+		case FORMAT_TYPE_WITDH:
+			field_width = va_arg(args, int);
+			break;
 
-		/* default base */
-		base = 10;
+		case FORMAT_TYPE_PRECISION:
+			precision = va_arg(args, int);
+			break;
 
-		switch (*fmt) {
-			case 'c':
-				if (!(flags & LEFT)) {
-					while (--field_width > 0) {
-						if (str < end)
-							*str = ' ';
-						++str;
-					}
-				}
-				c = (unsigned char) va_arg(args, int);
-				if (str < end)
-					*str = c;
-				++str;
+		case FORMAT_TYPE_CHAR:
+			if (!(flags & LEFT)) {
 				while (--field_width > 0) {
 					if (str < end)
 						*str = ' ';
 					++str;
-				}
-				continue;
-
-			case 's':
-				str = string(str, end, va_arg(args, char *), field_width, precision, flags);
-				continue;
-
-			case 'p':
-				str = pointer(fmt+1, str, end,
-						va_arg(args, void *),
-						field_width, precision, flags);
-				/* Skip all alphanumeric pointer suffixes */
-				while (isalnum(fmt[1]))
-					fmt++;
-				continue;
-
-			case 'n':
-				/* FIXME:
-				* What does C99 say about the overflow case here? */
-				if (qualifier == 'l') {
-					long * ip = va_arg(args, long *);
-					*ip = (str - buf);
-				} else if (qualifier == 'Z' || qualifier == 'z') {
-					size_t * ip = va_arg(args, size_t *);
-					*ip = (str - buf);
-				} else {
-					int * ip = va_arg(args, int *);
-					*ip = (str - buf);
-				}
-				continue;
 
-			case '%':
+				}
+			}
+			c = (unsigned char) va_arg(args, int);
+			if (str < end)
+				*str = c;
+			++str;
+			while (--field_width > 0) {
 				if (str < end)
-					*str = '%';
+					*str = ' ';
 				++str;
-				continue;
+			}
+			break;
 
-				/* integer number formats - set up the flags and "break" */
-			case 'o':
-				base = 8;
-				break;
+		case FORMAT_TYPE_STR:
+			str = string(str, end, va_arg(args, char *),
+					field_width, precision, flags);
+			break;
 
-			case 'x':
-				flags |= SMALL;
-			case 'X':
-				base = 16;
-				break;
+		case FORMAT_TYPE_PTR:
+			str = pointer(fmt+1, str, end, va_arg(args, void *),
+				      field_width, precision, flags);
+			while (isalnum(fmt[1]))
+				fmt++;
+			break;
 
-			case 'd':
-			case 'i':
-				flags |= SIGN;
-			case 'u':
-				break;
+		case FORMAT_TYPE_PERCENT_CHAR:
+			if (str < end)
+				*str = '%';
+			++str;
+			break;
 
-			default:
+		case FORMAT_TYPE_INVALID:
+			if (str < end)
+				*str = '%';
+			++str;
+			if (*fmt) {
 				if (str < end)
-					*str = '%';
+					*str = *fmt;
 				++str;
-				if (*fmt) {
-					if (str < end)
-						*str = *fmt;
-					++str;
-				} else {
-					--fmt;
-				}
-				continue;
-		}
-		if (qualifier == 'L')
-			num = va_arg(args, long long);
-		else if (qualifier == 'l') {
-			num = va_arg(args, unsigned long);
-			if (flags & SIGN)
-				num = (signed long) num;
-		} else if (qualifier == 'Z' || qualifier == 'z') {
-			num = va_arg(args, size_t);
-		} else if (qualifier == 't') {
-			num = va_arg(args, ptrdiff_t);
-		} else if (qualifier == 'h') {
-			num = (unsigned short) va_arg(args, int);
-			if (flags & SIGN)
-				num = (signed short) num;
-		} else {
-			num = va_arg(args, unsigned int);
-			if (flags & SIGN)
-				num = (signed int) num;
-		}
-		str = number(str, end, num, base,
+			} else {
+				--fmt;
+			}
+			break;
+
+		case FORMAT_TYPE_NRCHARS:
+			if (qualifier == 'l') {
+				long *ip = va_arg(args, long *);
+				*ip = (str - buf);
+			} else if (qualifier == 'Z' || qualifier == 'z') {
+				size_t *ip = va_arg(args, size_t *);
+				*ip = (str - buf);
+			} else {
+				int *ip = va_arg(args, int *);
+				*ip = (str - buf);
+			}
+			break;
+
+		default:
+			if (type == FORMAT_TYPE_LONG_LONG)
+				num = va_arg(args, long long);
+			else if (type == FORMAT_TYPE_ULONG)
+				num = va_arg(args, unsigned long);
+			else if (type == FORMAT_TYPE_LONG)
+				num = va_arg(args, unsigned long);
+			else if (type == FORMAT_TYPE_SIZE_T)
+				num = va_arg(args, size_t);
+			else if (type == FORMAT_TYPE_PTRDIFF)
+				num = va_arg(args, ptrdiff_t);
+			else if (type == FORMAT_TYPE_USHORT)
+				num = (unsigned short) va_arg(args, int);
+			else if (type == FORMAT_TYPE_SHORT)
+				num = (short) va_arg(args, int);
+			else if (type == FORMAT_TYPE_UINT)
+				num = va_arg(args, unsigned int);
+			else
+				num = va_arg(args, unsigned int);
+
+			str = number(str, end, num, base,
 				field_width, precision, flags);
+		}
 	}
+
 	if (size > 0) {
 		if (str < end)
 			*str = '\0';
 		else
 			end[-1] = '\0';
 	}
+
 	/* the trailing null byte doesn't count towards the total */
 	return str-buf;
+
 }
 EXPORT_SYMBOL(vsnprintf);
 
@@ -1085,7 +1259,13 @@ EXPORT_SYMBOL(sprintf);
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 {
 	char *str, *end;
-	int qualifier;
+	int qualifier = 0;
+	int base = 0;
+	enum format_type type = 0;
+	int flags = 0;
+	int field_width = 0;
+	int precision = 0;
+	int read;
 
 	str = (char *)bin_buf;
 	end = (char *)(bin_buf + size);
@@ -1110,57 +1290,31 @@ do {									\
 	str += sizeof(type);						\
 } while (0)
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%')
-			continue;
 
-repeat:
-		/* parse flags */
-		++fmt;		/* this also skips first '%' */
-		if (*fmt == '-' || *fmt == '+' || *fmt == ' '
-				|| *fmt == '#' || *fmt == '0')
-			goto repeat;
+	while (*fmt) {
+		read = format_decode(fmt, &type, &flags, &field_width, &base,
+					&precision, &qualifier);
 
-		/* parse field width */
-		if (isdigit(*fmt))
-			skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			save_arg(int);
-		}
+		fmt += read;
 
-		/* parse the precision */
-		if (*fmt == '.') {
-			++fmt;
-			if (isdigit(*fmt))
-				skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				save_arg(int);
-			}
+		switch (type) {
+		case FORMAT_TYPE_NONE: {
+			break;
 		}
 
-		/* parse the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
-			qualifier = *fmt;
-			++fmt;
-			if (qualifier == 'l' && *fmt == 'l') {
-				qualifier = 'L';
-				++fmt;
-			}
-		}
+		case FORMAT_TYPE_WITDH:
+			save_arg(int);
+			break;
 
-		/* parse format type */
-		switch (*fmt) {
-		case 'c':
+		case FORMAT_TYPE_PRECISION:
+			save_arg(int);
+			break;
+
+		case FORMAT_TYPE_CHAR:
 			save_arg(char);
-			continue;
-		case 's': {
-			/* save the string argument */
+			break;
+
+		case FORMAT_TYPE_STR: {
 			const char *save_str = va_arg(args, char *);
 			size_t len;
 			if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
@@ -1170,15 +1324,25 @@ repeat:
 			if (str + len + 1 < end)
 				memcpy(str, save_str, len + 1);
 			str += len + 1;
-			continue;
+			break;
 		}
-		case 'p':
+
+		case FORMAT_TYPE_PTR:
 			save_arg(void *);
 			/* skip all alphanumeric pointer suffixes */
 			while (isalnum(fmt[1]))
 				fmt++;
-			continue;
-		case 'n': {
+			break;
+
+		case FORMAT_TYPE_PERCENT_CHAR:
+			break;
+
+		case FORMAT_TYPE_INVALID:
+			if (!*fmt)
+				--fmt;
+			break;
+
+		case FORMAT_TYPE_NRCHARS: {
 			/* skip %n 's argument */
 			void *skip_arg;
 			if (qualifier == 'l')
@@ -1187,37 +1351,29 @@ repeat:
 				skip_arg = va_arg(args, size_t *);
 			else
 				skip_arg = va_arg(args, int *);
-			continue;
+			break;
 		}
-		case 'o':
-		case 'x':
-		case 'X':
-		case 'd':
-		case 'i':
-		case 'u':
-			/* save arg for case: 'o', 'x', 'X', 'd', 'i', 'u' */
-			if (qualifier == 'L')
+
+		default:
+			if (type == FORMAT_TYPE_LONG_LONG)
 				save_arg(long long);
-			else if (qualifier == 'l')
+			else if (type == FORMAT_TYPE_ULONG ||
+				 type == FORMAT_TYPE_LONG)
 				save_arg(unsigned long);
-			else if (qualifier == 'Z' || qualifier == 'z')
+			else if (type == FORMAT_TYPE_SIZE_T)
 				save_arg(size_t);
-			else if (qualifier == 't')
+			else if (type == FORMAT_TYPE_PTRDIFF)
 				save_arg(ptrdiff_t);
-			else if (qualifier == 'h')
+			else if (type == FORMAT_TYPE_USHORT ||
+				 type == FORMAT_TYPE_SHORT)
 				save_arg(short);
 			else
 				save_arg(int);
-			continue;
-		default:
-			if (!*fmt)
-				--fmt;
-			continue;
 		}
 	}
-#undef save_arg
-
 	return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
+
+#undef save_arg
 }
 EXPORT_SYMBOL_GPL(vbin_printf);
 
@@ -1253,10 +1409,11 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 	char *str, *end, c;
 	const char *args = (const char *)bin_buf;
 
-	int flags;
-	int field_width;
-	int precision;
-	int qualifier;
+	int flags = 0;
+	int field_width = 0;
+	int precision = 0;
+	int qualifier = 0;
+	enum format_type type = 0;
 
 	if (unlikely((int) size < 0)) {
 		/* There can be only one.. */
@@ -1290,82 +1447,36 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 		size = end - buf;
 	}
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%') {
-			if (str < end)
-				*str = *fmt;
-			++str;
-			continue;
-		}
+	while (*fmt) {
+		int read;
+		char *old_fmt = (char *)fmt;
 
-		/* process flags */
-		flags = 0;
-repeat:
-		++fmt;		/* this also skips first '%' */
-		switch (*fmt) {
-		case '-':
-			flags |= LEFT;
-			goto repeat;
-		case '+':
-			flags |= PLUS;
-			goto repeat;
-		case ' ':
-			flags |= SPACE;
-			goto repeat;
-		case '#':
-			flags |= SPECIAL;
-			goto repeat;
-		case '0':
-			flags |= ZEROPAD;
-			goto repeat;
-		}
+		read = format_decode(fmt, &type, &flags, &field_width, &base,
+					&precision, &qualifier);
 
-		/* get field width */
-		field_width = -1;
-		if (isdigit(*fmt))
-			field_width = skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			field_width = get_arg(int);
-			if (field_width < 0) {
-				field_width = -field_width;
-				flags |= LEFT;
-			}
-		}
+		fmt += read;
 
-		/* get the precision */
-		precision = -1;
-		if (*fmt == '.') {
-			++fmt;
-			if (isdigit(*fmt))
-				precision = skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				precision = get_arg(int);
+		switch (type) {
+		case FORMAT_TYPE_NONE: {
+			int copy = read;
+			if (str < end) {
+				if (copy > end - str)
+					copy = end - str;
+				memcpy(str, old_fmt, copy);
 			}
-			if (precision < 0)
-				precision = 0;
+			str += read;
+			break;
 		}
 
-		/* get the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
-			qualifier = *fmt;
-			++fmt;
-			if (qualifier == 'l' && *fmt == 'l') {
-				qualifier = 'L';
-				++fmt;
-			}
-		}
+		case FORMAT_TYPE_WITDH:
+			field_width = get_arg(int);
+			break;
 
-		/* default base */
-		base = 10;
+		case FORMAT_TYPE_PRECISION:
+			precision = get_arg(int);
+			break;
 
-		switch (*fmt) {
-		case 'c':
+		case FORMAT_TYPE_CHAR:
 			if (!(flags & LEFT)) {
 				while (--field_width > 0) {
 					if (str < end)
@@ -1382,53 +1493,31 @@ repeat:
 					*str = ' ';
 				++str;
 			}
-			continue;
+			break;
 
-		case 's':{
+		case FORMAT_TYPE_STR: {
 			const char *str_arg = args;
 			size_t len = strlen(str_arg);
 			args += len + 1;
 			str = string(str, end, (char *)str_arg, field_width,
 					precision, flags);
-			continue;
+			break;
 		}
 
-		case 'p':
+		case FORMAT_TYPE_PTR:
 			str = pointer(fmt+1, str, end, get_arg(void *),
-					field_width, precision, flags);
-			/* Skip all alphanumeric pointer suffixes */
+				      field_width, precision, flags);
 			while (isalnum(fmt[1]))
 				fmt++;
-			continue;
-
-		case 'n':
-			/* skip %n */
-			continue;
+			break;
 
-		case '%':
+		case FORMAT_TYPE_PERCENT_CHAR:
 			if (str < end)
 				*str = '%';
 			++str;
-			continue;
-
-		/* integer number formats - set up the flags and "break" */
-		case 'o':
-			base = 8;
-			break;
-
-		case 'x':
-			flags |= SMALL;
-		case 'X':
-			base = 16;
-			break;
-
-		case 'd':
-		case 'i':
-			flags |= SIGN;
-		case 'u':
 			break;
 
-		default:
+		case FORMAT_TYPE_INVALID:
 			if (str < end)
 				*str = '%';
 			++str;
@@ -1439,36 +1528,44 @@ repeat:
 			} else {
 				--fmt;
 			}
-			continue;
-		}
-		if (qualifier == 'L')
-			num = get_arg(long long);
-		else if (qualifier == 'l') {
-			num = get_arg(unsigned long);
-			if (flags & SIGN)
-				num = (signed long) num;
-		} else if (qualifier == 'Z' || qualifier == 'z') {
-			num = get_arg(size_t);
-		} else if (qualifier == 't') {
-			num = get_arg(ptrdiff_t);
-		} else if (qualifier == 'h') {
-			num = (unsigned short) get_arg(short);
-			if (flags & SIGN)
-				num = (signed short) num;
-		} else {
-			num = get_arg(unsigned int);
-			if (flags & SIGN)
-				num = (signed int) num;
-		}
-		str = number(str, end, num, base,
+			break;
+
+		case FORMAT_TYPE_NRCHARS:
+			/* skip */
+			break;
+
+		default:
+			if (type == FORMAT_TYPE_LONG_LONG)
+				num = get_arg(long long);
+			else if (type == FORMAT_TYPE_ULONG)
+				num = get_arg(unsigned long);
+			else if (type == FORMAT_TYPE_LONG)
+				num = get_arg(unsigned long);
+			else if (type == FORMAT_TYPE_SIZE_T)
+				num = get_arg(size_t);
+			else if (type == FORMAT_TYPE_PTRDIFF)
+				num = get_arg(ptrdiff_t);
+			else if (type == FORMAT_TYPE_USHORT)
+				num = get_arg(unsigned short);
+			else if (type == FORMAT_TYPE_SHORT)
+				num = get_arg(short);
+			else if (type == FORMAT_TYPE_UINT)
+				num = get_arg(unsigned int);
+			else
+				num = get_arg(int);
+
+			str = number(str, end, num, base,
 				field_width, precision, flags);
+		}
 	}
+
 	if (size > 0) {
 		if (str < end)
 			*str = '\0';
 		else
 			end[-1] = '\0';
 	}
+
 #undef get_arg
 
 	/* the trailing null byte doesn't count towards the total */



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27  6:19                 ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Frederic Weisbecker
@ 2009-02-27  6:46                   ` Andrew Morton
  2009-02-27  7:12                     ` Frederic Weisbecker
                                       ` (2 more replies)
  2009-02-28  0:39                   ` Linus Torvalds
  1 sibling, 3 replies; 44+ messages in thread
From: Andrew Morton @ 2009-02-27  6:46 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Linus Torvalds, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Fri, 27 Feb 2009 07:19:37 +0100 Frederic Weisbecker <fweisbec@gmail.com> wrote:

> 
> An new optimization is making its way to ftrace. Its purpose is to
> make ftrace_printk() consuming less memory and be faster.
> 
> Written by Lai Jiangshan, the approach is to delay the formatting
> job from tracing time to output time.
> Currently, a call to ftrace_printk will format the whole string and
> insert it into the ring buffer.

It does that?   eek.

> Then you can read it on /debug/tracing/trace
> file.
> 
> The new implementation stores the address of the format string and
> the binary parameters into the ring buffer, making the packet more compact
> and faster to insert.
> Later, when the user exports the traces, the format string is retrieved
> with the binary parameters and the formatting job is eventually done.
> 
> Here is the result of a small comparative benchmark while putting the following
> ftrace_printk on the timer interrupt. ftrace_printk is the old implementation,
> ftrace_bprintk is a the new one:
> 
> ftrace_printk("This is the timer interrupt: %llu", jiffies_64);
> 
> After some time running on low load (no X, no really active processes):
> 
> ftrace_printk:  duration average: 2044 ns, avg of bytes stored per entry: 39
> ftrace_bprintk: duration average: 1426 ns, avg of bytes stored per entry: 16
> 
> Higher load (started X and launched a cat running on an X console looping on
> traces printing):
> 
> ftrace_printk:  duration average: 8812 ns
> ftrace_bprintk: duration average: 2611 ns
> 
> Which means the new implementation can be 70 % faster on higher load.
> And it consumes lesser memory on the ring buffer.
> 
> The curent implementation rewrites a lot of format decoding bits from
> vsnprintf() function, making now 3 differents functions to maintain
> in their duplicated parts of printf format decoding bits.

<looks for ftrace_bprintk() in linux-next, fails>

Why does the current ftrace_bprintk() need to hack around in (or
duplicate) vprintk() internals?  It's a bit grubby, but by placing an
upper bound on the number of args, it could simply call vscnprintf()
directly?


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27  6:46                   ` Andrew Morton
@ 2009-02-27  7:12                     ` Frederic Weisbecker
  2009-02-27  7:39                       ` Andrew Morton
  2009-02-27  8:20                     ` Ingo Molnar
  2009-02-27 15:03                     ` Steven Rostedt
  2 siblings, 1 reply; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-27  7:12 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Ingo Molnar, Linus Torvalds, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Thu, Feb 26, 2009 at 10:46:56PM -0800, Andrew Morton wrote:
> On Fri, 27 Feb 2009 07:19:37 +0100 Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > 
> > An new optimization is making its way to ftrace. Its purpose is to
> > make ftrace_printk() consuming less memory and be faster.
> > 
> > Written by Lai Jiangshan, the approach is to delay the formatting
> > job from tracing time to output time.
> > Currently, a call to ftrace_printk will format the whole string and
> > insert it into the ring buffer.
> 
> It does that?   eek.
> 
> > Then you can read it on /debug/tracing/trace
> > file.
> > 
> > The new implementation stores the address of the format string and
> > the binary parameters into the ring buffer, making the packet more compact
> > and faster to insert.
> > Later, when the user exports the traces, the format string is retrieved
> > with the binary parameters and the formatting job is eventually done.
> > 
> > Here is the result of a small comparative benchmark while putting the following
> > ftrace_printk on the timer interrupt. ftrace_printk is the old implementation,
> > ftrace_bprintk is a the new one:
> > 
> > ftrace_printk("This is the timer interrupt: %llu", jiffies_64);
> > 
> > After some time running on low load (no X, no really active processes):
> > 
> > ftrace_printk:  duration average: 2044 ns, avg of bytes stored per entry: 39
> > ftrace_bprintk: duration average: 1426 ns, avg of bytes stored per entry: 16
> > 
> > Higher load (started X and launched a cat running on an X console looping on
> > traces printing):
> > 
> > ftrace_printk:  duration average: 8812 ns
> > ftrace_bprintk: duration average: 2611 ns
> > 
> > Which means the new implementation can be 70 % faster on higher load.
> > And it consumes lesser memory on the ring buffer.
> > 
> > The curent implementation rewrites a lot of format decoding bits from
> > vsnprintf() function, making now 3 differents functions to maintain
> > in their duplicated parts of printf format decoding bits.
> 
> <looks for ftrace_bprintk() in linux-next, fails>


It hasn't been applied for now, waiting for this cleanup.
Grr, I can't find an http link for the patchset...
 

> Why does the current ftrace_bprintk() need to hack around in (or
> duplicate) vprintk() internals?  It's a bit grubby, but by placing an
> upper bound on the number of args, it could simply call vscnprintf()
> directly?
>

The problem is more in the way to save the parameters.

You have two functions:

_int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)


This one creates a compact binary packet of all args described in the format.
The result is a binary set of random values on bin_buf.


_int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)


This one lately parses the buffer filled by vbin_printf() and eventually format
this binary contiguous set of binary values according to the format in fmt (which is the
same that was passed to vbin_printf()
The result is formatted in buf.

vbin uses too much specific write-to-bin_buf operations to allow us to wrap
vsnprintf()

It's the same for bstr_printf, but in the read from buffer way.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27  7:12                     ` Frederic Weisbecker
@ 2009-02-27  7:39                       ` Andrew Morton
  2009-02-27  8:32                         ` Ingo Molnar
  0 siblings, 1 reply; 44+ messages in thread
From: Andrew Morton @ 2009-02-27  7:39 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Linus Torvalds, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Fri, 27 Feb 2009 08:12:13 +0100 Frederic Weisbecker <fweisbec@gmail.com> wrote:

> > Why does the current ftrace_bprintk() need to hack around in (or
> > duplicate) vprintk() internals?  It's a bit grubby, but by placing an
> > upper bound on the number of args, it could simply call vscnprintf()
> > directly?
> >
> 
> The problem is more in the way to save the parameters.
> 
> You have two functions:
> 
> _int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
> 
> 
> This one creates a compact binary packet of all args described in the format.
> The result is a binary set of random values on bin_buf.
> 
> 
> _int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
> 
> 
> This one lately parses the buffer filled by vbin_printf() and eventually format
> this binary contiguous set of binary values according to the format in fmt (which is the
> same that was passed to vbin_printf()
> The result is formatted in buf.
> 
> vbin uses too much specific write-to-bin_buf operations to allow us to wrap
> vsnprintf()

I don't know what vbin is.

On little-endian architecture you could do something like

	u32 *p = bin_buf;
	char *fmt;
	u32 a0, a1, a2, a3, a4;

#ifdef CONFIG_32BIT
	fmt = (char *)(*bin_buf++);
#else
	<exercise for the reader>
#endif

	a0 = *bin_buf++;
	a1 = *bin_buf++;
	a2 = *bin_buf++;
	a3 = *bin_buf++;
	a4 = *bin_buf++;

	snprintf(somewhere, some_size, fmt, a0, a1, a2, a3, a4, a5);

(as I said, ugly).

but that won't work for 64-bit values on big-endian architectures.  And
it's hacky (and might not even work) for 64-bit on little endian.

Perhaps this is the secret problem which you haven't described yet?

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27  6:46                   ` Andrew Morton
  2009-02-27  7:12                     ` Frederic Weisbecker
@ 2009-02-27  8:20                     ` Ingo Molnar
  2009-02-27  8:33                       ` Andrew Morton
  2009-02-27 15:03                     ` Steven Rostedt
  2 siblings, 1 reply; 44+ messages in thread
From: Ingo Molnar @ 2009-02-27  8:20 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Frederic Weisbecker, Linus Torvalds, linux-kernel,
	Steven Rostedt, Lai Jiangshan, Peter Zijlstra


* Andrew Morton <akpm@linux-foundation.org> wrote:

> On Fri, 27 Feb 2009 07:19:37 +0100 Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > An new optimization is making its way to ftrace. Its purpose 
> > is to make ftrace_printk() consuming less memory and be 
> > faster.
> > 
> > Written by Lai Jiangshan, the approach is to delay the 
> > formatting job from tracing time to output time. Currently, 
> > a call to ftrace_printk will format the whole string and 
> > insert it into the ring buffer.
> 
> It does that?  eek.

hm, did you expect something else from a printf based interface?

	Ingo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27  7:39                       ` Andrew Morton
@ 2009-02-27  8:32                         ` Ingo Molnar
  2009-02-27  8:45                           ` Andrew Morton
  0 siblings, 1 reply; 44+ messages in thread
From: Ingo Molnar @ 2009-02-27  8:32 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Frederic Weisbecker, Linus Torvalds, linux-kernel,
	Steven Rostedt, Lai Jiangshan, Peter Zijlstra


* Andrew Morton <akpm@linux-foundation.org> wrote:

> On Fri, 27 Feb 2009 08:12:13 +0100 Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > > Why does the current ftrace_bprintk() need to hack around in (or
> > > duplicate) vprintk() internals?  It's a bit grubby, but by placing an
> > > upper bound on the number of args, it could simply call vscnprintf()
> > > directly?
> > >
> > 
> > The problem is more in the way to save the parameters.
> > 
> > You have two functions:
> > 
> > _int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
> > 
> > 
> > This one creates a compact binary packet of all args described in the format.
> > The result is a binary set of random values on bin_buf.
> > 
> > 
> > _int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
> > 
> > 
> > This one lately parses the buffer filled by vbin_printf() and eventually format
> > this binary contiguous set of binary values according to the format in fmt (which is the
> > same that was passed to vbin_printf()
> > The result is formatted in buf.
> > 
> > vbin uses too much specific write-to-bin_buf operations to 
> > allow us to wrap vsnprintf()
> 
> I don't know what vbin is.

a printf variant that outputs not into a string buffer but into 
a binary-values buffer. See earlier mails in this same thread. 
(I Cc:-ed you mid-thread on the assumption that you might be 
interested - but you need to look into the lkml folder to see 
all the context.)

> On little-endian architecture you could do something like
> 
> 	u32 *p = bin_buf;
> 	char *fmt;
> 	u32 a0, a1, a2, a3, a4;
> 
> #ifdef CONFIG_32BIT
> 	fmt = (char *)(*bin_buf++);
> #else
> 	<exercise for the reader>
> #endif
> 
> 	a0 = *bin_buf++;
> 	a1 = *bin_buf++;
> 	a2 = *bin_buf++;
> 	a3 = *bin_buf++;
> 	a4 = *bin_buf++;
> 
> 	snprintf(somewhere, some_size, fmt, a0, a1, a2, a3, a4, a5);
> 
> (as I said, ugly).

Yes, and slow as well. This isnt new functionality - this is 
about speedups, using the format string as a data types 
descriptor.

The goal is to get the speedup that Frederic cited:

  ftrace_printk:  duration average: 8812 ns
  ftrace_bprintk: duration average: 2611 ns

The _fastest_ way of tracing is obviously to know about the 
precise argument layout and having a specific C based tracepoint 
stub that directly stuffs that data into the ring buffer. Most 
tracepoints are of such nature.

That does not remove the ease of use of ad-hoc printk-alike 
tracepoints though, and speeding them up 3-fold is a worthwile 
goal.

> but that won't work for 64-bit values on big-endian 
> architectures.  And it's hacky (and might not even work) for 
> 64-bit on little endian.
> 
> Perhaps this is the secret problem which you haven't described 
> yet?

The first submission was a completely standalone implementations 
not touching vsnprintf at all. I suggested to Frederic to try to 
generalize it, to reduce code duplication. That's why this patch 
touches vsnprintf.

We can do it all in kernel/trace/trace.c if the lib/vsnprintf.c 
bits turn out to be ugly or not useful enough. We just thought 
we'd try to make this more generally useful before stuffing it 
into a specific subsystem.

	Ingo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27  8:20                     ` Ingo Molnar
@ 2009-02-27  8:33                       ` Andrew Morton
  0 siblings, 0 replies; 44+ messages in thread
From: Andrew Morton @ 2009-02-27  8:33 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Frederic Weisbecker, Linus Torvalds, linux-kernel,
	Steven Rostedt, Lai Jiangshan, Peter Zijlstra

On Fri, 27 Feb 2009 09:20:35 +0100 Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Andrew Morton <akpm@linux-foundation.org> wrote:
> 
> > On Fri, 27 Feb 2009 07:19:37 +0100 Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > 
> > > An new optimization is making its way to ftrace. Its purpose 
> > > is to make ftrace_printk() consuming less memory and be 
> > > faster.
> > > 
> > > Written by Lai Jiangshan, the approach is to delay the 
> > > formatting job from tracing time to output time. Currently, 
> > > a call to ftrace_printk will format the whole string and 
> > > insert it into the ring buffer.
> > 
> > It does that?  eek.
> 
> hm, did you expect something else from a printf based interface?
> 

Well, it's very obvious that it will be very slow.  We've never had any
motivation for caring about the performance of the printf functions
because they're called so infrequently.

I mean...  the whole thing's designed to fail, really - if we care
about the performance impact of tracing, we can't trace with per-tracepoint
printf()s.  If we don't care about the performance impact of tracing
then fine, leave ftrace_printk() as it is - slow.

Trying to make something which is inherently slow run slightly faster seems... 
odd.


Oh well, that's beside the point.  Right now I'm trying to provoke you
guys into revealing what the implementation problems are, and what
you're proposing to do.  Stuff like: how will it work with 64-bit
quantities on arm/s390/etc.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27  8:32                         ` Ingo Molnar
@ 2009-02-27  8:45                           ` Andrew Morton
  2009-02-27  9:35                             ` Ingo Molnar
  0 siblings, 1 reply; 44+ messages in thread
From: Andrew Morton @ 2009-02-27  8:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Frederic Weisbecker, Linus Torvalds, linux-kernel,
	Steven Rostedt, Lai Jiangshan, Peter Zijlstra

On Fri, 27 Feb 2009 09:32:12 +0100 Ingo Molnar <mingo@elte.hu> wrote:

> 
> 
> a printf variant that outputs not into a string buffer but into 
> a binary-values buffer.

Oh, OK.

Of course, parsing a printf string (using our
nobody-bothered-optimising-it parser) isn't particularly efficient
either.

Is there a case here for adding a new way of describing how to
serialise the incoming data?  Perhaps with a new and simpler control
string or perhaps even

	struct ftrace_thing ft;

	init_ftrace_thing(&ft, "%d %l %s");
	add_ftrace_thing_int(&ft, some_int);
	add_ftrace_thing_long(&ft, some_long);
	add_ftrace_thing_string(&ft, some_string);
	emit_ftrace_thing(&ft);

or some similar thing.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27  8:45                           ` Andrew Morton
@ 2009-02-27  9:35                             ` Ingo Molnar
  0 siblings, 0 replies; 44+ messages in thread
From: Ingo Molnar @ 2009-02-27  9:35 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Frederic Weisbecker, Linus Torvalds, linux-kernel,
	Steven Rostedt, Lai Jiangshan, Peter Zijlstra


* Andrew Morton <akpm@linux-foundation.org> wrote:

> On Fri, 27 Feb 2009 09:32:12 +0100 Ingo Molnar <mingo@elte.hu> wrote:
> 
> > a printf variant that outputs not into a string buffer but 
> > into a binary-values buffer.
> 
> Oh, OK.
> 
> Of course, parsing a printf string (using our 
> nobody-bothered-optimising-it parser) isn't particularly 
> efficient either.
> 
> Is there a case here for adding a new way of describing how to 
> serialise the incoming data?  Perhaps with a new and simpler 
> control string or perhaps even
> 
> 	struct ftrace_thing ft;
> 
> 	init_ftrace_thing(&ft, "%d %l %s");
> 	add_ftrace_thing_int(&ft, some_int);
> 	add_ftrace_thing_long(&ft, some_long);
> 	add_ftrace_thing_string(&ft, some_string);
> 	emit_ftrace_thing(&ft);
> 
> or some similar thing.

Maybe - but i think it misses the main point.

If we care about a tracepoint from a performance POV we wont use 
printf. When it comes to raw performance then we already have 
direct, non-string ways of describing data: the C language and 
function calls straight to a tracepoint function. No "inbetween" 
solution will be able to beat that.

The beauty (and pretty much only point) of trace_printk() is the 
convenience of the well-known printf usage. It might be slow to 
parse but it is what we are using in:

  earth4:~/tip> git grep printk | wc -l
  68603

... tens of thousands of places in the kernel.

We could invent something inbetween but why? We wont be able to 
beat the performance of tracepoints that leave out the format 
string middle-man, and we dont want to.

So what Lai and Frederic is trying is to offer a transparent 
speedup for trace_printk().

	Ingo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27  6:46                   ` Andrew Morton
  2009-02-27  7:12                     ` Frederic Weisbecker
  2009-02-27  8:20                     ` Ingo Molnar
@ 2009-02-27 15:03                     ` Steven Rostedt
  2009-02-28  0:30                       ` Linus Torvalds
  2 siblings, 1 reply; 44+ messages in thread
From: Steven Rostedt @ 2009-02-27 15:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Frederic Weisbecker, Ingo Molnar, Linus Torvalds, linux-kernel,
	Lai Jiangshan, Peter Zijlstra


On Thu, 26 Feb 2009, Andrew Morton wrote:

> On Fri, 27 Feb 2009 07:19:37 +0100 Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > 
> > An new optimization is making its way to ftrace. Its purpose is to
> > make ftrace_printk() consuming less memory and be faster.
> > 
> > Written by Lai Jiangshan, the approach is to delay the formatting
> > job from tracing time to output time.
> > Currently, a call to ftrace_printk will format the whole string and
> > insert it into the ring buffer.
> 
> It does that?   eek.

Yes, I remember when Linus suggested having a pointer to the format and
the args saved. There was two issues I found.

1) You still needed to read the format string to find out what arguments 
   to save.

2) The format string could be part of module data, and if the module is
   unloaded between the time of recording and the time of reading the 
   trace, we now have a format pointer pointing to God knows what.

When posting ftrace_printk, I said this was for debugging purposes only,
and I was not about to work on optimizations there of. But Frederic and
Lai are doing a great job in that aspect ;-)

HAHAHAHHAAAA!!!! My evil scheme is working! I post some sub-optimal code, 
and have others do the nasty work for me!!!!

Oh, did I just say that out loud?

-- Steve

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27 15:03                     ` Steven Rostedt
@ 2009-02-28  0:30                       ` Linus Torvalds
  0 siblings, 0 replies; 44+ messages in thread
From: Linus Torvalds @ 2009-02-28  0:30 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Andrew Morton, Frederic Weisbecker, Ingo Molnar, linux-kernel,
	Lai Jiangshan, Peter Zijlstra



On Fri, 27 Feb 2009, Steven Rostedt wrote:
> 
> HAHAHAHHAAAA!!!! My evil scheme is working! I post some sub-optimal code, 
> and have others do the nasty work for me!!!!
> 
> Oh, did I just say that out loud?

Not only that, I will also sue you for my patent on that algorithm.

			Linus

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-27  6:19                 ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Frederic Weisbecker
  2009-02-27  6:46                   ` Andrew Morton
@ 2009-02-28  0:39                   ` Linus Torvalds
  2009-02-28  8:11                     ` Frederic Weisbecker
  1 sibling, 1 reply; 44+ messages in thread
From: Linus Torvalds @ 2009-02-28  0:39 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra



On Fri, 27 Feb 2009, Frederic Weisbecker wrote:
> 
> Ok, here is a try, I've mostly unified the format decoding bits.
> Most interesting other helpers were already implemented (pointer(),
> string() and number()

Ok, I don't think this is beautiful, but neither was the original code, 
so..

However, in order to make it at least a _bit_ more palatable, could we 
please agree to at least clean up this:

> +static int format_decode(const char *fmt, enum format_type *type, int *flags,
> +			  int *field_width, int *base, int *precision,
> +			  int *qualifier)

To

	struct printf_spec {
		enum format_type type;
		int flags, field_width, base, precision, qualifier;
	};

and then pass things around like

	static int format_decode(const char *fmt, struct printf_spec *spec)
	{
	..

instead? Wouldn't that be nicer? I suspect it would make the code look 
nicer too (instead of doing "*base = x", you'd see "spec->base = x" and it 
would look less like line noise in the callee, an the caller could just 
do a single "struct format_spec spec = { 0, }" to initialize that thing).

		Linus

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-28  0:39                   ` Linus Torvalds
@ 2009-02-28  8:11                     ` Frederic Weisbecker
  2009-02-28  9:13                       ` Ingo Molnar
  2009-02-28 16:54                       ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Linus Torvalds
  0 siblings, 2 replies; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-28  8:11 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Fri, Feb 27, 2009 at 04:39:37PM -0800, Linus Torvalds wrote:
> 
> 
> On Fri, 27 Feb 2009, Frederic Weisbecker wrote:
> > 
> > Ok, here is a try, I've mostly unified the format decoding bits.
> > Most interesting other helpers were already implemented (pointer(),
> > string() and number()
> 
> Ok, I don't think this is beautiful, but neither was the original code, 
> so..
> 
> However, in order to make it at least a _bit_ more palatable, could we 
> please agree to at least clean up this:
> 
> > +static int format_decode(const char *fmt, enum format_type *type, int *flags,
> > +			  int *field_width, int *base, int *precision,
> > +			  int *qualifier)
> 
> To
> 
> 	struct printf_spec {
> 		enum format_type type;
> 		int flags, field_width, base, precision, qualifier;
> 	};
> 
> and then pass things around like
> 
> 	static int format_decode(const char *fmt, struct printf_spec *spec)
> 	{
> 	..
> 
> instead? Wouldn't that be nicer? I suspect it would make the code look 
> nicer too (instead of doing "*base = x", you'd see "spec->base = x" and it 
> would look less like line noise in the callee, an the caller could just 
> do a single "struct format_spec spec = { 0, }" to initialize that thing).
> 
> 		Linus

You're right, that's much proper.
See the V2 below:
---

>From 601f73ac457f720f2201ba88a9c741c9a3690119 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 28 Feb 2009 08:56:18 +0100
Subject: [PATCH v2] vsprintf: unify the format decoding layer for its 3 users

An new optimization is making its way to ftrace. Its purpose is to
make ftrace_printk() consuming less memory and be faster.

Written by Lai Jiangshan, the approach is to delay the formatting
job from tracing time to output time.
Currently, a call to ftrace_printk will format the whole string and
insert it into the ring buffer. Then you can read it on /debug/tracing/trace
file.

The new implementation stores the address of the format string and
the binary parameters into the ring buffer, making the packet more compact
and faster to insert.
Later, when the user exports the traces, the format string is retrieved
with the binary parameters and the formatting job is eventually done.

Here is the result of a small comparative benchmark while putting the following
ftrace_printk on the timer interrupt. ftrace_printk is the old implementation,
ftrace_bprintk is a the new one:

ftrace_printk("This is the timer interrupt: %llu", jiffies_64);

After some time running on low load (no X, no really active processes):

ftrace_printk:  duration average: 2044 ns, avg of bytes stored per entry: 39
ftrace_bprintk: duration average: 1426 ns, avg of bytes stored per entry: 16

Higher load (started X and launched a cat running on an X console looping on
traces printing):

ftrace_printk:  duration average: 8812 ns
ftrace_bprintk: duration average: 2611 ns

Which means the new implementation can be 70 % faster on higher load.
And it consumes lesser memory on the ring buffer.

The curent implementation rewrites a lot of format decoding bits from
vsnprintf() function, making now 3 differents functions to maintain
in their duplicated parts of printf format decoding bits.

Suggested by Ingo Molnar, this patch tries to factorize the most
possible common bits from these functions.
The real common part between them is the format decoding. Although
they do somewhat similar jobs, their way to export or import the parameters
is very different. Thus, only the decoding layer is extracted, unless you see
other parts that could be worth factorized.


Changes in V2:

- Address a suggestion from Linus to group the format_decode() parameters inside
  a structure.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 lib/vsprintf.c |  828 +++++++++++++++++++++++++++++++-------------------------
 1 files changed, 466 insertions(+), 362 deletions(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3543bbe..d03167e 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -700,6 +700,242 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
 	return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags);
 }
 
+
+enum format_type {
+	FORMAT_TYPE_NONE, /* Just a string part */
+	FORMAT_TYPE_WITDH,
+	FORMAT_TYPE_PRECISION,
+	FORMAT_TYPE_CHAR,
+	FORMAT_TYPE_STR,
+	FORMAT_TYPE_PTR,
+	FORMAT_TYPE_PERCENT_CHAR,
+	FORMAT_TYPE_INVALID,
+	FORMAT_TYPE_LONG_LONG,
+	FORMAT_TYPE_ULONG,
+	FORMAT_TYPE_LONG,
+	FORMAT_TYPE_USHORT,
+	FORMAT_TYPE_SHORT,
+	FORMAT_TYPE_UINT,
+	FORMAT_TYPE_INT,
+	FORMAT_TYPE_NRCHARS,
+	FORMAT_TYPE_SIZE_T,
+	FORMAT_TYPE_PTRDIFF
+};
+
+struct printf_spec {
+	enum format_type type;
+	int flags;			/* flags to number() */
+	int field_width;		/* width of output field */
+	int base;
+	/* min. # of digits for integers; max number of chars for from string */
+	int precision;
+	int qualifier;
+};
+
+/*
+ * Helper function to decode printf style format.
+ * Each call decode a token from the format and return the
+ * number of characters read (or likely the delta where it wants
+ * to go on the next call).
+ * The decoded token is returned through the parameters
+ *
+ * 'h', 'l', or 'L' for integer fields
+ * 'z' support added 23/7/1999 S.H.
+ * 'z' changed to 'Z' --davidm 1/25/99
+ * 't' added for ptrdiff_t
+ *
+ * @fmt: the format string
+ * @type of the token returned
+ * @flags: various flags such as +, -, # tokens..
+ * @field_width: overwritten width
+ * @base: base of the number (octal, hex, ...)
+ * @precision: precision of a number
+ * @qualifier: qualifier of a number (long, size_t, ...)
+ */
+static int format_decode(const char *fmt, struct printf_spec *spec)
+{
+	const char *start = fmt;
+	bool sign = false;
+
+	/* we finished early by reading the field width */
+	if (spec->type == FORMAT_TYPE_WITDH) {
+		if (spec->field_width < 0) {
+			spec->field_width = -spec->field_width;
+			spec->flags |= LEFT;
+		}
+		spec->type = FORMAT_TYPE_NONE;
+		goto precision;
+	}
+
+	/* we finished early by reading the precision */
+	if (spec->type == FORMAT_TYPE_PRECISION) {
+		if (spec->precision < 0)
+			spec->precision = 0;
+
+		spec->type = FORMAT_TYPE_NONE;
+		goto qualifier;
+	}
+
+	/* By default */
+	spec->type = FORMAT_TYPE_NONE;
+
+	for (; *fmt ; ++fmt) {
+		if (*fmt == '%')
+			break;
+	}
+
+	/* Return the current non-format string */
+	if (fmt != start || !*fmt)
+		return fmt - start;
+
+	/* Process flags */
+	spec->flags = 0;
+
+	while (1) { /* this also skips first '%' */
+		bool found = true;
+
+		++fmt;
+
+		switch (*fmt) {
+		case '-':
+			spec->flags |= LEFT;
+			break;
+		case '+':
+			spec->flags |= PLUS;
+			break;
+		case ' ':
+			spec->flags |= SPACE;
+			break;
+		case '#':
+			spec->flags |= SPECIAL;
+			break;
+		case '0':
+			spec->flags |= ZEROPAD;
+			break;
+		default:
+			found = false;
+		}
+
+		if (!found)
+			break;
+	}
+
+	/* get field width */
+	spec->field_width = -1;
+
+	if (isdigit(*fmt))
+		spec->field_width = skip_atoi(&fmt);
+	else if (*fmt == '*') {
+		/* it's the next argument */
+		spec->type = FORMAT_TYPE_WITDH;
+		return ++fmt - start;
+	}
+
+precision:
+	/* get the precision */
+	spec->precision = -1;
+	if (*fmt == '.') {
+		++fmt;
+		if (isdigit(*fmt)) {
+			spec->precision = skip_atoi(&fmt);
+			if (spec->precision < 0)
+				spec->precision = 0;
+		} else if (*fmt == '*') {
+			/* it's the next argument */
+			spec->type = FORMAT_TYPE_WITDH;
+			return ++fmt - start;
+		}
+	}
+
+qualifier:
+	/* get the conversion qualifier */
+	spec->qualifier = -1;
+	if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+	    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
+		spec->qualifier = *fmt;
+		++fmt;
+		if (spec->qualifier == 'l' && *fmt == 'l') {
+			spec->qualifier = 'L';
+			++fmt;
+		}
+	}
+
+	/* default base */
+	spec->base = 10;
+	switch (*fmt) {
+	case 'c':
+		spec->type = FORMAT_TYPE_CHAR;
+		return ++fmt - start;
+
+	case 's':
+		spec->type = FORMAT_TYPE_STR;
+		return ++fmt - start;
+
+	case 'p':
+		spec->type = FORMAT_TYPE_PTR;
+		return fmt - start;
+		/* skip alnum */
+
+	case 'n':
+		/* FIXME:
+		* What does C99 say about the overflow case here? */
+		spec->type = FORMAT_TYPE_NRCHARS;
+		return ++fmt - start;
+
+	case '%':
+		spec->type = FORMAT_TYPE_PERCENT_CHAR;
+		return ++fmt - start;
+
+	/* integer number formats - set up the flags and "break" */
+	case 'o':
+		spec->base = 8;
+		break;
+
+	case 'x':
+		spec->flags |= SMALL;
+
+	case 'X':
+		spec->base = 16;
+		break;
+
+	case 'd':
+	case 'i':
+		sign = true;
+	case 'u':
+		break;
+
+	default:
+		spec->type = FORMAT_TYPE_INVALID;
+		return fmt - start;
+	}
+
+	if (spec->qualifier == 'L')
+		spec->type = FORMAT_TYPE_LONG_LONG;
+	else if (spec->qualifier == 'l') {
+		if (sign)
+			spec->type = FORMAT_TYPE_LONG;
+		else
+			spec->type = FORMAT_TYPE_ULONG;
+	} else if (spec->qualifier == 'Z' || spec->qualifier == 'z') {
+		spec->type = FORMAT_TYPE_SIZE_T;
+	} else if (spec->qualifier == 't') {
+		spec->type = FORMAT_TYPE_PTRDIFF;
+	} else if (spec->qualifier == 'h') {
+		if (sign)
+			spec->type = FORMAT_TYPE_SHORT;
+		else
+			spec->type = FORMAT_TYPE_USHORT;
+	} else {
+		if (sign)
+			spec->type = FORMAT_TYPE_INT;
+		else
+			spec->type = FORMAT_TYPE_UINT;
+	}
+
+	return ++fmt - start;
+}
+
+
 /**
  * vsnprintf - Format a string and place it in a buffer
  * @buf: The buffer to place the result into
@@ -726,18 +962,9 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
 int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
 	unsigned long long num;
-	int base;
 	char *str, *end, c;
-
-	int flags;		/* flags to number() */
-
-	int field_width;	/* width of output field */
-	int precision;		/* min. # of digits for integers; max
-				   number of chars for from string */
-	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
-				/* 'z' support added 23/7/1999 S.H.    */
-				/* 'z' changed to 'Z' --davidm 1/25/99 */
-				/* 't' added for ptrdiff_t */
+	int read;
+	struct printf_spec spec = {0};
 
 	/* Reject out-of-range values early.  Large positive sizes are
 	   used for unknown buffer sizes. */
@@ -758,184 +985,141 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 		size = end - buf;
 	}
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%') {
-			if (str < end)
-				*str = *fmt;
-			++str;
-			continue;
-		}
+	while (*fmt) {
+		char *old_fmt = (char *)fmt;
 
-		/* process flags */
-		flags = 0;
-		repeat:
-			++fmt;		/* this also skips first '%' */
-			switch (*fmt) {
-				case '-': flags |= LEFT; goto repeat;
-				case '+': flags |= PLUS; goto repeat;
-				case ' ': flags |= SPACE; goto repeat;
-				case '#': flags |= SPECIAL; goto repeat;
-				case '0': flags |= ZEROPAD; goto repeat;
-			}
+		read = format_decode(fmt, &spec);
 
-		/* get field width */
-		field_width = -1;
-		if (isdigit(*fmt))
-			field_width = skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			field_width = va_arg(args, int);
-			if (field_width < 0) {
-				field_width = -field_width;
-				flags |= LEFT;
-			}
-		}
+		fmt += read;
 
-		/* get the precision */
-		precision = -1;
-		if (*fmt == '.') {
-			++fmt;	
-			if (isdigit(*fmt))
-				precision = skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				precision = va_arg(args, int);
+		switch (spec.type) {
+		case FORMAT_TYPE_NONE: {
+			int copy = read;
+			if (str < end) {
+				if (copy > end - str)
+					copy = end - str;
+				memcpy(str, old_fmt, copy);
 			}
-			if (precision < 0)
-				precision = 0;
+			str += read;
+			break;
 		}
 
-		/* get the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt =='Z' || *fmt == 'z' || *fmt == 't') {
-			qualifier = *fmt;
-			++fmt;
-			if (qualifier == 'l' && *fmt == 'l') {
-				qualifier = 'L';
-				++fmt;
-			}
-		}
+		case FORMAT_TYPE_WITDH:
+			spec.field_width = va_arg(args, int);
+			break;
 
-		/* default base */
-		base = 10;
+		case FORMAT_TYPE_PRECISION:
+			spec.precision = va_arg(args, int);
+			break;
 
-		switch (*fmt) {
-			case 'c':
-				if (!(flags & LEFT)) {
-					while (--field_width > 0) {
-						if (str < end)
-							*str = ' ';
-						++str;
-					}
-				}
-				c = (unsigned char) va_arg(args, int);
-				if (str < end)
-					*str = c;
-				++str;
-				while (--field_width > 0) {
+		case FORMAT_TYPE_CHAR:
+			if (!(spec.flags & LEFT)) {
+				while (--spec.field_width > 0) {
 					if (str < end)
 						*str = ' ';
 					++str;
-				}
-				continue;
-
-			case 's':
-				str = string(str, end, va_arg(args, char *), field_width, precision, flags);
-				continue;
-
-			case 'p':
-				str = pointer(fmt+1, str, end,
-						va_arg(args, void *),
-						field_width, precision, flags);
-				/* Skip all alphanumeric pointer suffixes */
-				while (isalnum(fmt[1]))
-					fmt++;
-				continue;
-
-			case 'n':
-				/* FIXME:
-				* What does C99 say about the overflow case here? */
-				if (qualifier == 'l') {
-					long * ip = va_arg(args, long *);
-					*ip = (str - buf);
-				} else if (qualifier == 'Z' || qualifier == 'z') {
-					size_t * ip = va_arg(args, size_t *);
-					*ip = (str - buf);
-				} else {
-					int * ip = va_arg(args, int *);
-					*ip = (str - buf);
-				}
-				continue;
 
-			case '%':
+				}
+			}
+			c = (unsigned char) va_arg(args, int);
+			if (str < end)
+				*str = c;
+			++str;
+			while (--spec.field_width > 0) {
 				if (str < end)
-					*str = '%';
+					*str = ' ';
 				++str;
-				continue;
+			}
+			break;
 
-				/* integer number formats - set up the flags and "break" */
-			case 'o':
-				base = 8;
-				break;
+		case FORMAT_TYPE_STR:
+			str = string(str, end, va_arg(args, char *),
+					spec.field_width, spec.precision,
+					spec.flags);
+			break;
 
-			case 'x':
-				flags |= SMALL;
-			case 'X':
-				base = 16;
-				break;
+		case FORMAT_TYPE_PTR:
+			str = pointer(fmt+1, str, end, va_arg(args, void *),
+				      spec.field_width, spec.precision,
+				      spec.flags);
+			while (isalnum(fmt[1]))
+				fmt++;
+			break;
 
-			case 'd':
-			case 'i':
-				flags |= SIGN;
-			case 'u':
-				break;
+		case FORMAT_TYPE_PERCENT_CHAR:
+			if (str < end)
+				*str = '%';
+			++str;
+			break;
 
-			default:
+		case FORMAT_TYPE_INVALID:
+			if (str < end)
+				*str = '%';
+			++str;
+			if (*fmt) {
 				if (str < end)
-					*str = '%';
+					*str = *fmt;
 				++str;
-				if (*fmt) {
-					if (str < end)
-						*str = *fmt;
-					++str;
-				} else {
-					--fmt;
-				}
-				continue;
+			} else {
+				--fmt;
+			}
+			break;
+
+		case FORMAT_TYPE_NRCHARS: {
+			int qualifier = spec.qualifier;
+
+			if (qualifier == 'l') {
+				long *ip = va_arg(args, long *);
+				*ip = (str - buf);
+			} else if (qualifier == 'Z' ||
+					qualifier == 'z') {
+				size_t *ip = va_arg(args, size_t *);
+				*ip = (str - buf);
+			} else {
+				int *ip = va_arg(args, int *);
+				*ip = (str - buf);
+			}
+			break;
+		}
+
+		default: {
+			enum format_type type = spec.type;
+
+			if (type == FORMAT_TYPE_LONG_LONG)
+				num = va_arg(args, long long);
+			else if (type == FORMAT_TYPE_ULONG)
+				num = va_arg(args, unsigned long);
+			else if (type == FORMAT_TYPE_LONG)
+				num = va_arg(args, unsigned long);
+			else if (type == FORMAT_TYPE_SIZE_T)
+				num = va_arg(args, size_t);
+			else if (type == FORMAT_TYPE_PTRDIFF)
+				num = va_arg(args, ptrdiff_t);
+			else if (type == FORMAT_TYPE_USHORT)
+				num = (unsigned short) va_arg(args, int);
+			else if (type == FORMAT_TYPE_SHORT)
+				num = (short) va_arg(args, int);
+			else if (type == FORMAT_TYPE_UINT)
+				num = va_arg(args, unsigned int);
+			else
+				num = va_arg(args, unsigned int);
+
+			str = number(str, end, num, spec.base,
+				spec.field_width, spec.precision, spec.flags);
 		}
-		if (qualifier == 'L')
-			num = va_arg(args, long long);
-		else if (qualifier == 'l') {
-			num = va_arg(args, unsigned long);
-			if (flags & SIGN)
-				num = (signed long) num;
-		} else if (qualifier == 'Z' || qualifier == 'z') {
-			num = va_arg(args, size_t);
-		} else if (qualifier == 't') {
-			num = va_arg(args, ptrdiff_t);
-		} else if (qualifier == 'h') {
-			num = (unsigned short) va_arg(args, int);
-			if (flags & SIGN)
-				num = (signed short) num;
-		} else {
-			num = va_arg(args, unsigned int);
-			if (flags & SIGN)
-				num = (signed int) num;
 		}
-		str = number(str, end, num, base,
-				field_width, precision, flags);
 	}
+
 	if (size > 0) {
 		if (str < end)
 			*str = '\0';
 		else
 			end[-1] = '\0';
 	}
+
 	/* the trailing null byte doesn't count towards the total */
 	return str-buf;
+
 }
 EXPORT_SYMBOL(vsnprintf);
 
@@ -1084,8 +1268,9 @@ EXPORT_SYMBOL(sprintf);
  */
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 {
+	struct printf_spec spec = {0};
 	char *str, *end;
-	int qualifier;
+	int read;
 
 	str = (char *)bin_buf;
 	end = (char *)(bin_buf + size);
@@ -1110,57 +1295,30 @@ do {									\
 	str += sizeof(type);						\
 } while (0)
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%')
-			continue;
 
-repeat:
-		/* parse flags */
-		++fmt;		/* this also skips first '%' */
-		if (*fmt == '-' || *fmt == '+' || *fmt == ' '
-				|| *fmt == '#' || *fmt == '0')
-			goto repeat;
+	while (*fmt) {
+		read = format_decode(fmt, &spec);
 
-		/* parse field width */
-		if (isdigit(*fmt))
-			skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			save_arg(int);
-		}
+		fmt += read;
 
-		/* parse the precision */
-		if (*fmt == '.') {
-			++fmt;
-			if (isdigit(*fmt))
-				skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				save_arg(int);
-			}
+		switch (spec.type) {
+		case FORMAT_TYPE_NONE: {
+			break;
 		}
 
-		/* parse the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
-			qualifier = *fmt;
-			++fmt;
-			if (qualifier == 'l' && *fmt == 'l') {
-				qualifier = 'L';
-				++fmt;
-			}
-		}
+		case FORMAT_TYPE_WITDH:
+			save_arg(int);
+			break;
 
-		/* parse format type */
-		switch (*fmt) {
-		case 'c':
+		case FORMAT_TYPE_PRECISION:
+			save_arg(int);
+			break;
+
+		case FORMAT_TYPE_CHAR:
 			save_arg(char);
-			continue;
-		case 's': {
-			/* save the string argument */
+			break;
+
+		case FORMAT_TYPE_STR: {
 			const char *save_str = va_arg(args, char *);
 			size_t len;
 			if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
@@ -1170,16 +1328,27 @@ repeat:
 			if (str + len + 1 < end)
 				memcpy(str, save_str, len + 1);
 			str += len + 1;
-			continue;
+			break;
 		}
-		case 'p':
+
+		case FORMAT_TYPE_PTR:
 			save_arg(void *);
 			/* skip all alphanumeric pointer suffixes */
 			while (isalnum(fmt[1]))
 				fmt++;
-			continue;
-		case 'n': {
+			break;
+
+		case FORMAT_TYPE_PERCENT_CHAR:
+			break;
+
+		case FORMAT_TYPE_INVALID:
+			if (!*fmt)
+				--fmt;
+			break;
+
+		case FORMAT_TYPE_NRCHARS: {
 			/* skip %n 's argument */
+			int qualifier = spec.qualifier;
 			void *skip_arg;
 			if (qualifier == 'l')
 				skip_arg = va_arg(args, long *);
@@ -1187,37 +1356,32 @@ repeat:
 				skip_arg = va_arg(args, size_t *);
 			else
 				skip_arg = va_arg(args, int *);
-			continue;
+			break;
 		}
-		case 'o':
-		case 'x':
-		case 'X':
-		case 'd':
-		case 'i':
-		case 'u':
-			/* save arg for case: 'o', 'x', 'X', 'd', 'i', 'u' */
-			if (qualifier == 'L')
+
+		default: {
+			enum format_type type = spec.type;
+
+			if (type == FORMAT_TYPE_LONG_LONG)
 				save_arg(long long);
-			else if (qualifier == 'l')
+			else if (type == FORMAT_TYPE_ULONG ||
+				 type == FORMAT_TYPE_LONG)
 				save_arg(unsigned long);
-			else if (qualifier == 'Z' || qualifier == 'z')
+			else if (type == FORMAT_TYPE_SIZE_T)
 				save_arg(size_t);
-			else if (qualifier == 't')
+			else if (type == FORMAT_TYPE_PTRDIFF)
 				save_arg(ptrdiff_t);
-			else if (qualifier == 'h')
+			else if (type == FORMAT_TYPE_USHORT ||
+				 type == FORMAT_TYPE_SHORT)
 				save_arg(short);
 			else
 				save_arg(int);
-			continue;
-		default:
-			if (!*fmt)
-				--fmt;
-			continue;
+		}
 		}
 	}
-#undef save_arg
-
 	return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
+
+#undef save_arg
 }
 EXPORT_SYMBOL_GPL(vbin_printf);
 
@@ -1249,14 +1413,10 @@ EXPORT_SYMBOL_GPL(vbin_printf);
 int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 {
 	unsigned long long num;
-	int base;
 	char *str, *end, c;
 	const char *args = (const char *)bin_buf;
 
-	int flags;
-	int field_width;
-	int precision;
-	int qualifier;
+	struct printf_spec spec = {0};
 
 	if (unlikely((int) size < 0)) {
 		/* There can be only one.. */
@@ -1290,84 +1450,37 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 		size = end - buf;
 	}
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%') {
-			if (str < end)
-				*str = *fmt;
-			++str;
-			continue;
-		}
+	while (*fmt) {
+		int read;
+		char *old_fmt = (char *)fmt;
 
-		/* process flags */
-		flags = 0;
-repeat:
-		++fmt;		/* this also skips first '%' */
-		switch (*fmt) {
-		case '-':
-			flags |= LEFT;
-			goto repeat;
-		case '+':
-			flags |= PLUS;
-			goto repeat;
-		case ' ':
-			flags |= SPACE;
-			goto repeat;
-		case '#':
-			flags |= SPECIAL;
-			goto repeat;
-		case '0':
-			flags |= ZEROPAD;
-			goto repeat;
-		}
+		read = format_decode(fmt, &spec);
 
-		/* get field width */
-		field_width = -1;
-		if (isdigit(*fmt))
-			field_width = skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			field_width = get_arg(int);
-			if (field_width < 0) {
-				field_width = -field_width;
-				flags |= LEFT;
-			}
-		}
+		fmt += read;
 
-		/* get the precision */
-		precision = -1;
-		if (*fmt == '.') {
-			++fmt;
-			if (isdigit(*fmt))
-				precision = skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				precision = get_arg(int);
+		switch (spec.type) {
+		case FORMAT_TYPE_NONE: {
+			int copy = read;
+			if (str < end) {
+				if (copy > end - str)
+					copy = end - str;
+				memcpy(str, old_fmt, copy);
 			}
-			if (precision < 0)
-				precision = 0;
+			str += read;
+			break;
 		}
 
-		/* get the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
-			qualifier = *fmt;
-			++fmt;
-			if (qualifier == 'l' && *fmt == 'l') {
-				qualifier = 'L';
-				++fmt;
-			}
-		}
+		case FORMAT_TYPE_WITDH:
+			spec.field_width = get_arg(int);
+			break;
 
-		/* default base */
-		base = 10;
+		case FORMAT_TYPE_PRECISION:
+			spec.precision = get_arg(int);
+			break;
 
-		switch (*fmt) {
-		case 'c':
-			if (!(flags & LEFT)) {
-				while (--field_width > 0) {
+		case FORMAT_TYPE_CHAR:
+			if (!(spec.flags & LEFT)) {
+				while (--spec.field_width > 0) {
 					if (str < end)
 						*str = ' ';
 					++str;
@@ -1377,58 +1490,38 @@ repeat:
 			if (str < end)
 				*str = c;
 			++str;
-			while (--field_width > 0) {
+			while (--spec.field_width > 0) {
 				if (str < end)
 					*str = ' ';
 				++str;
 			}
-			continue;
+			break;
 
-		case 's':{
+		case FORMAT_TYPE_STR: {
 			const char *str_arg = args;
 			size_t len = strlen(str_arg);
 			args += len + 1;
-			str = string(str, end, (char *)str_arg, field_width,
-					precision, flags);
-			continue;
+			str = string(str, end, (char *)str_arg,
+					spec.field_width,
+					spec.precision, spec.flags);
+			break;
 		}
 
-		case 'p':
+		case FORMAT_TYPE_PTR:
 			str = pointer(fmt+1, str, end, get_arg(void *),
-					field_width, precision, flags);
-			/* Skip all alphanumeric pointer suffixes */
+				      spec.field_width, spec.precision,
+				      spec.flags);
 			while (isalnum(fmt[1]))
 				fmt++;
-			continue;
-
-		case 'n':
-			/* skip %n */
-			continue;
+			break;
 
-		case '%':
+		case FORMAT_TYPE_PERCENT_CHAR:
 			if (str < end)
 				*str = '%';
 			++str;
-			continue;
-
-		/* integer number formats - set up the flags and "break" */
-		case 'o':
-			base = 8;
-			break;
-
-		case 'x':
-			flags |= SMALL;
-		case 'X':
-			base = 16;
 			break;
 
-		case 'd':
-		case 'i':
-			flags |= SIGN;
-		case 'u':
-			break;
-
-		default:
+		case FORMAT_TYPE_INVALID:
 			if (str < end)
 				*str = '%';
 			++str;
@@ -1439,36 +1532,47 @@ repeat:
 			} else {
 				--fmt;
 			}
-			continue;
+			break;
+
+		case FORMAT_TYPE_NRCHARS:
+			/* skip */
+			break;
+
+		default: {
+			enum format_type type = spec.type;
+
+			if (type == FORMAT_TYPE_LONG_LONG)
+				num = get_arg(long long);
+			else if (type == FORMAT_TYPE_ULONG)
+				num = get_arg(unsigned long);
+			else if (type == FORMAT_TYPE_LONG)
+				num = get_arg(unsigned long);
+			else if (type == FORMAT_TYPE_SIZE_T)
+				num = get_arg(size_t);
+			else if (type == FORMAT_TYPE_PTRDIFF)
+				num = get_arg(ptrdiff_t);
+			else if (type == FORMAT_TYPE_USHORT)
+				num = get_arg(unsigned short);
+			else if (type == FORMAT_TYPE_SHORT)
+				num = get_arg(short);
+			else if (type == FORMAT_TYPE_UINT)
+				num = get_arg(unsigned int);
+			else
+				num = get_arg(int);
+
+			str = number(str, end, num, spec.base,
+				spec.field_width, spec.precision, spec.flags);
 		}
-		if (qualifier == 'L')
-			num = get_arg(long long);
-		else if (qualifier == 'l') {
-			num = get_arg(unsigned long);
-			if (flags & SIGN)
-				num = (signed long) num;
-		} else if (qualifier == 'Z' || qualifier == 'z') {
-			num = get_arg(size_t);
-		} else if (qualifier == 't') {
-			num = get_arg(ptrdiff_t);
-		} else if (qualifier == 'h') {
-			num = (unsigned short) get_arg(short);
-			if (flags & SIGN)
-				num = (signed short) num;
-		} else {
-			num = get_arg(unsigned int);
-			if (flags & SIGN)
-				num = (signed int) num;
 		}
-		str = number(str, end, num, base,
-				field_width, precision, flags);
 	}
+
 	if (size > 0) {
 		if (str < end)
 			*str = '\0';
 		else
 			end[-1] = '\0';
 	}
+
 #undef get_arg
 
 	/* the trailing null byte doesn't count towards the total */
-- 
1.6.1





^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-28  8:11                     ` Frederic Weisbecker
@ 2009-02-28  9:13                       ` Ingo Molnar
  2009-02-28 19:45                         ` [PATCH 1/5] add binary printf Frederic Weisbecker
                                           ` (6 more replies)
  2009-02-28 16:54                       ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Linus Torvalds
  1 sibling, 7 replies; 44+ messages in thread
From: Ingo Molnar @ 2009-02-28  9:13 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> > instead? Wouldn't that be nicer? I suspect it would make the 
> > code look nicer too (instead of doing "*base = x", you'd see 
> > "spec->base = x" and it would look less like line noise in 
> > the callee, an the caller could just do a single "struct 
> > format_spec spec = { 0, }" to initialize that thing).
> > 
> > 		Linus
> 
> You're right, that's much proper.
> See the V2 below:

Just a few (very) small code style pet peeves:

> +struct printf_spec {
> +	enum format_type type;
> +	int flags;			/* flags to number() */
> +	int field_width;		/* width of output field */
> +	int base;
> +	/* min. # of digits for integers; max number of chars for from string */
> +	int precision;
> +	int qualifier;
> +};

doesnt it look a bit tidier this way:

 struct printf_spec {
	enum format_type	type;
	int			flags;		/* flags to number()     */
	int			field_width;	/* width of output field */
	int			base;
	int			precision;	/* # of digits/chars     */
	int			qualifier;
 };

?

> +		case '+':
> +			spec->flags |= PLUS;
> +			break;
> +		case ' ':
> +			spec->flags |= SPACE;
> +			break;
> +		case '#':
> +			spec->flags |= SPECIAL;
> +			break;
> +		case '0':
> +			spec->flags |= ZEROPAD;
> +			break;
> +		default:
> +			found = false;

btw., this is one of the cases where i think the original style 
was more useful:

> +		case '+':	spec->flags |= PLUS;  break;
> +		case ' ':	spec->flags |= SPACE; break;
[etc.]

as it's always good to compress repetitive patterns of code.

(If checkpatch complains about this then ignore checkpatch.)

> +	case 'n':
> +		/* FIXME:
> +		* What does C99 say about the overflow case here? */

(this comment looks a bit funny.)

> +		default: {
> +			enum format_type type = spec.type;
> +
> +			if (type == FORMAT_TYPE_LONG_LONG)
> +				num = get_arg(long long);
> +			else if (type == FORMAT_TYPE_ULONG)
> +				num = get_arg(unsigned long);
> +			else if (type == FORMAT_TYPE_LONG)
> +				num = get_arg(unsigned long);
> +			else if (type == FORMAT_TYPE_SIZE_T)
> +				num = get_arg(size_t);
> +			else if (type == FORMAT_TYPE_PTRDIFF)
> +				num = get_arg(ptrdiff_t);
> +			else if (type == FORMAT_TYPE_USHORT)
> +				num = get_arg(unsigned short);
> +			else if (type == FORMAT_TYPE_SHORT)
> +				num = get_arg(short);
> +			else if (type == FORMAT_TYPE_UINT)
> +				num = get_arg(unsigned int);
> +			else
> +				num = get_arg(int);

Wouldnt it be cleaner as a switch() statement and to put into a 
helper function?

Also, could you please resend the current stuff with a 0/ 
description and a diffstat in the 0 mail so that we can all see 
all the patches again and the total impact?

	Ingo

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-28  8:11                     ` Frederic Weisbecker
  2009-02-28  9:13                       ` Ingo Molnar
@ 2009-02-28 16:54                       ` Linus Torvalds
  2009-02-28 17:18                         ` Frederic Weisbecker
  1 sibling, 1 reply; 44+ messages in thread
From: Linus Torvalds @ 2009-02-28 16:54 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra



On Sat, 28 Feb 2009, Frederic Weisbecker wrote:
> 
> You're right, that's much proper.
> See the V2 below:

Good. This looks better.

One final (?) nit:

> +		default: {
> +			enum format_type type = spec.type;
> +
> +			if (type == FORMAT_TYPE_LONG_LONG)
> +				num = va_arg(args, long long);
> +			else if (type == FORMAT_TYPE_ULONG)
> +				num = va_arg(args, unsigned long);
> +			else if (type == FORMAT_TYPE_LONG)
> +				num = va_arg(args, unsigned long);
> +			else if (type == FORMAT_TYPE_SIZE_T)
> +				num = va_arg(args, size_t);
> +			else if (type == FORMAT_TYPE_PTRDIFF)
> +				num = va_arg(args, ptrdiff_t);
> +			else if (type == FORMAT_TYPE_USHORT)
> +				num = (unsigned short) va_arg(args, int);
> +			else if (type == FORMAT_TYPE_SHORT)
> +				num = (short) va_arg(args, int);
> +			else if (type == FORMAT_TYPE_UINT)
> +				num = va_arg(args, unsigned int);
> +			else
> +				num = va_arg(args, unsigned int);
> +
> +			str = number(str, end, num, spec.base,
> +				spec.field_width, spec.precision, spec.flags);

You've got three copes of these (in the three different users), and that 
just looks horrid. I see why you did it, but even something like

		case FORMAT_TYPE_LONG_LONG:
			num = va_arg(args, long long);
			goto handle_number;
		case FORMAT_TYPE_ULONG:
		...
		case FORMAT_TYPE_INT:
			num = va_arg(args, int);
	handle_number:
		str = number(str, end, num, &spec);
		break;

would be better.

And yes, please do pass in "spec" to the "number()/pointer()/string()" 
routines instead of exploding it into the individual numbers. When 
cleaning up, let's just do it properly..

		Linus

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-28 16:54                       ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Linus Torvalds
@ 2009-02-28 17:18                         ` Frederic Weisbecker
  0 siblings, 0 replies; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-28 17:18 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ingo Molnar, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Sat, Feb 28, 2009 at 08:54:15AM -0800, Linus Torvalds wrote:
> 
> 
> On Sat, 28 Feb 2009, Frederic Weisbecker wrote:
> > 
> > You're right, that's much proper.
> > See the V2 below:
> 
> Good. This looks better.
> 
> One final (?) nit:
> 
> > +		default: {
> > +			enum format_type type = spec.type;
> > +
> > +			if (type == FORMAT_TYPE_LONG_LONG)
> > +				num = va_arg(args, long long);
> > +			else if (type == FORMAT_TYPE_ULONG)
> > +				num = va_arg(args, unsigned long);
> > +			else if (type == FORMAT_TYPE_LONG)
> > +				num = va_arg(args, unsigned long);
> > +			else if (type == FORMAT_TYPE_SIZE_T)
> > +				num = va_arg(args, size_t);
> > +			else if (type == FORMAT_TYPE_PTRDIFF)
> > +				num = va_arg(args, ptrdiff_t);
> > +			else if (type == FORMAT_TYPE_USHORT)
> > +				num = (unsigned short) va_arg(args, int);
> > +			else if (type == FORMAT_TYPE_SHORT)
> > +				num = (short) va_arg(args, int);
> > +			else if (type == FORMAT_TYPE_UINT)
> > +				num = va_arg(args, unsigned int);
> > +			else
> > +				num = va_arg(args, unsigned int);
> > +
> > +			str = number(str, end, num, spec.base,
> > +				spec.field_width, spec.precision, spec.flags);
> 
> You've got three copes of these (in the three different users), and that 
> just looks horrid. I see why you did it, but even something like
> 
> 		case FORMAT_TYPE_LONG_LONG:
> 			num = va_arg(args, long long);
> 			goto handle_number;
> 		case FORMAT_TYPE_ULONG:
> 		...
> 		case FORMAT_TYPE_INT:
> 			num = va_arg(args, int);
> 	handle_number:
> 		str = number(str, end, num, &spec);
> 		break;
> 
> would be better.
> 
> And yes, please do pass in "spec" to the "number()/pointer()/string()" 
> routines instead of exploding it into the individual numbers. When 
> cleaning up, let's just do it properly..
> 
> 		Linus


Ok, I'm addressing your comments and those from Ingo, then I will rebase
the whole patchset (with the ftrace_bprintk related things) against latest
-tip and resend it.

Thanks!


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH 1/5] add binary printf
  2009-02-28  9:13                       ` Ingo Molnar
@ 2009-02-28 19:45                         ` Frederic Weisbecker
  2009-02-28 20:16                         ` [PATCH 2/5] ftrace: infrastructure for supporting binary record Frederic Weisbecker
                                           ` (5 subsequent siblings)
  6 siblings, 0 replies; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-28 19:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Lai Jiangshan, Linus Torvalds, Steven Rostedt,
	Peter Zijlstra, linux-kernel

From: Lai Jiangshan <laijs@cn.fujitsu.com>

Impact: Add APIs for binary trace printk infrastructure

vbin_printf(): write args to binary buffer, string is copied
when "%s" is occurred.

bstr_printf(): read from binary buffer for args and format a string

[fweisbec@gmail.com: rebase against latest -tip]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/string.h |    7 +
 lib/Kconfig            |    3 +
 lib/vsprintf.c         |  442 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 452 insertions(+), 0 deletions(-)

diff --git a/include/linux/string.h b/include/linux/string.h
index d18fc19..27ac317 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -10,6 +10,7 @@
 #include <linux/compiler.h>	/* for inline */
 #include <linux/types.h>	/* for size_t */
 #include <linux/stddef.h>	/* for NULL */
+#include <stdarg.h>
 
 extern char *strndup_user(const char __user *, long);
 
@@ -111,6 +112,12 @@ extern void argv_free(char **argv);
 
 extern bool sysfs_streq(const char *s1, const char *s2);
 
+#ifdef CONFIG_BINARY_PRINTF
+int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
+int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
+int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
+#endif
+
 extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
 			const void *from, size_t available);
 
diff --git a/lib/Kconfig b/lib/Kconfig
index fc8ea1c..cd57690 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -2,6 +2,9 @@
 # Library configuration
 #
 
+config BINARY_PRINTF
+	def_bool n
+
 menu "Library routines"
 
 config BITREVERSE
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 0fbd012..3543bbe 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1058,6 +1058,448 @@ int sprintf(char * buf, const char *fmt, ...)
 }
 EXPORT_SYMBOL(sprintf);
 
+#ifdef CONFIG_BINARY_PRINTF
+/*
+ * bprintf service:
+ * vbin_printf() - VA arguments to binary data
+ * bstr_printf() - Binary data to text string
+ */
+
+/**
+ * vbin_printf - Parse a format string and place args' binary value in a buffer
+ * @bin_buf: The buffer to place args' binary value
+ * @size: The size of the buffer(by words(32bits), not characters)
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The format follows C99 vsnprintf, except %n is ignored, and its argument
+ * is skiped.
+ *
+ * The return value is the number of words(32bits) which would be generated for
+ * the given input.
+ *
+ * NOTE:
+ * If the return value is greater than @size, the resulting bin_buf is NOT
+ * valid for bstr_printf().
+ */
+int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
+{
+	char *str, *end;
+	int qualifier;
+
+	str = (char *)bin_buf;
+	end = (char *)(bin_buf + size);
+
+#define save_arg(type)							\
+do {									\
+	if (sizeof(type) == 8) {					\
+		unsigned long long value;				\
+		str = PTR_ALIGN(str, sizeof(u32));			\
+		value = va_arg(args, unsigned long long);		\
+		if (str + sizeof(type) <= end) {			\
+			*(u32 *)str = *(u32 *)&value;			\
+			*(u32 *)(str + 4) = *((u32 *)&value + 1);	\
+		}							\
+	} else {							\
+		unsigned long value;					\
+		str = PTR_ALIGN(str, sizeof(type));			\
+		value = va_arg(args, int);				\
+		if (str + sizeof(type) <= end)				\
+			*(typeof(type) *)str = (type)value;		\
+	}								\
+	str += sizeof(type);						\
+} while (0)
+
+	for (; *fmt ; ++fmt) {
+		if (*fmt != '%')
+			continue;
+
+repeat:
+		/* parse flags */
+		++fmt;		/* this also skips first '%' */
+		if (*fmt == '-' || *fmt == '+' || *fmt == ' '
+				|| *fmt == '#' || *fmt == '0')
+			goto repeat;
+
+		/* parse field width */
+		if (isdigit(*fmt))
+			skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			save_arg(int);
+		}
+
+		/* parse the precision */
+		if (*fmt == '.') {
+			++fmt;
+			if (isdigit(*fmt))
+				skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				save_arg(int);
+			}
+		}
+
+		/* parse the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+		    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
+			qualifier = *fmt;
+			++fmt;
+			if (qualifier == 'l' && *fmt == 'l') {
+				qualifier = 'L';
+				++fmt;
+			}
+		}
+
+		/* parse format type */
+		switch (*fmt) {
+		case 'c':
+			save_arg(char);
+			continue;
+		case 's': {
+			/* save the string argument */
+			const char *save_str = va_arg(args, char *);
+			size_t len;
+			if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
+					|| (unsigned long)save_str < PAGE_SIZE)
+				save_str = "<NULL>";
+			len = strlen(save_str);
+			if (str + len + 1 < end)
+				memcpy(str, save_str, len + 1);
+			str += len + 1;
+			continue;
+		}
+		case 'p':
+			save_arg(void *);
+			/* skip all alphanumeric pointer suffixes */
+			while (isalnum(fmt[1]))
+				fmt++;
+			continue;
+		case 'n': {
+			/* skip %n 's argument */
+			void *skip_arg;
+			if (qualifier == 'l')
+				skip_arg = va_arg(args, long *);
+			else if (qualifier == 'Z' || qualifier == 'z')
+				skip_arg = va_arg(args, size_t *);
+			else
+				skip_arg = va_arg(args, int *);
+			continue;
+		}
+		case 'o':
+		case 'x':
+		case 'X':
+		case 'd':
+		case 'i':
+		case 'u':
+			/* save arg for case: 'o', 'x', 'X', 'd', 'i', 'u' */
+			if (qualifier == 'L')
+				save_arg(long long);
+			else if (qualifier == 'l')
+				save_arg(unsigned long);
+			else if (qualifier == 'Z' || qualifier == 'z')
+				save_arg(size_t);
+			else if (qualifier == 't')
+				save_arg(ptrdiff_t);
+			else if (qualifier == 'h')
+				save_arg(short);
+			else
+				save_arg(int);
+			continue;
+		default:
+			if (!*fmt)
+				--fmt;
+			continue;
+		}
+	}
+#undef save_arg
+
+	return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
+}
+EXPORT_SYMBOL_GPL(vbin_printf);
+
+/**
+ * bstr_printf - Format a string from binary arguments and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @bin_buf: Binary arguments for the format string
+ *
+ * This function like C99 vsnprintf, but the difference is that vsnprintf gets
+ * arguments from stack, and bstr_printf gets arguments from @bin_buf which is
+ * a binary buffer that generated by vbin_printf.
+ *
+ * The format follows C99 vsnprintf, but has some extensions:
+ * %pS output the name of a text symbol
+ * %pF output the name of a function pointer
+ * %pR output the address range in a struct resource
+ * %n is ignored
+ *
+ * The return value is the number of characters which would
+ * be generated for the given input, excluding the trailing
+ * '\0', as per ISO C99. If you want to have the exact
+ * number of characters written into @buf as return value
+ * (not including the trailing '\0'), use vscnprintf(). If the
+ * return is greater than or equal to @size, the resulting
+ * string is truncated.
+ */
+int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
+{
+	unsigned long long num;
+	int base;
+	char *str, *end, c;
+	const char *args = (const char *)bin_buf;
+
+	int flags;
+	int field_width;
+	int precision;
+	int qualifier;
+
+	if (unlikely((int) size < 0)) {
+		/* There can be only one.. */
+		static char warn = 1;
+		WARN_ON(warn);
+		warn = 0;
+		return 0;
+	}
+
+	str = buf;
+	end = buf + size;
+
+#define get_arg(type)							\
+({									\
+	typeof(type) value;						\
+	if (sizeof(type) == 8) {					\
+		args = PTR_ALIGN(args, sizeof(u32));			\
+		*(u32 *)&value = *(u32 *)args;				\
+		*((u32 *)&value + 1) = *(u32 *)(args + 4);		\
+	} else {							\
+		args = PTR_ALIGN(args, sizeof(type));			\
+		value = *(typeof(type) *)args;				\
+	}								\
+	args += sizeof(type);						\
+	value;								\
+})
+
+	/* Make sure end is always >= buf */
+	if (end < buf) {
+		end = ((void *)-1);
+		size = end - buf;
+	}
+
+	for (; *fmt ; ++fmt) {
+		if (*fmt != '%') {
+			if (str < end)
+				*str = *fmt;
+			++str;
+			continue;
+		}
+
+		/* process flags */
+		flags = 0;
+repeat:
+		++fmt;		/* this also skips first '%' */
+		switch (*fmt) {
+		case '-':
+			flags |= LEFT;
+			goto repeat;
+		case '+':
+			flags |= PLUS;
+			goto repeat;
+		case ' ':
+			flags |= SPACE;
+			goto repeat;
+		case '#':
+			flags |= SPECIAL;
+			goto repeat;
+		case '0':
+			flags |= ZEROPAD;
+			goto repeat;
+		}
+
+		/* get field width */
+		field_width = -1;
+		if (isdigit(*fmt))
+			field_width = skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			field_width = get_arg(int);
+			if (field_width < 0) {
+				field_width = -field_width;
+				flags |= LEFT;
+			}
+		}
+
+		/* get the precision */
+		precision = -1;
+		if (*fmt == '.') {
+			++fmt;
+			if (isdigit(*fmt))
+				precision = skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				precision = get_arg(int);
+			}
+			if (precision < 0)
+				precision = 0;
+		}
+
+		/* get the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+		    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
+			qualifier = *fmt;
+			++fmt;
+			if (qualifier == 'l' && *fmt == 'l') {
+				qualifier = 'L';
+				++fmt;
+			}
+		}
+
+		/* default base */
+		base = 10;
+
+		switch (*fmt) {
+		case 'c':
+			if (!(flags & LEFT)) {
+				while (--field_width > 0) {
+					if (str < end)
+						*str = ' ';
+					++str;
+				}
+			}
+			c = (unsigned char) get_arg(char);
+			if (str < end)
+				*str = c;
+			++str;
+			while (--field_width > 0) {
+				if (str < end)
+					*str = ' ';
+				++str;
+			}
+			continue;
+
+		case 's':{
+			const char *str_arg = args;
+			size_t len = strlen(str_arg);
+			args += len + 1;
+			str = string(str, end, (char *)str_arg, field_width,
+					precision, flags);
+			continue;
+		}
+
+		case 'p':
+			str = pointer(fmt+1, str, end, get_arg(void *),
+					field_width, precision, flags);
+			/* Skip all alphanumeric pointer suffixes */
+			while (isalnum(fmt[1]))
+				fmt++;
+			continue;
+
+		case 'n':
+			/* skip %n */
+			continue;
+
+		case '%':
+			if (str < end)
+				*str = '%';
+			++str;
+			continue;
+
+		/* integer number formats - set up the flags and "break" */
+		case 'o':
+			base = 8;
+			break;
+
+		case 'x':
+			flags |= SMALL;
+		case 'X':
+			base = 16;
+			break;
+
+		case 'd':
+		case 'i':
+			flags |= SIGN;
+		case 'u':
+			break;
+
+		default:
+			if (str < end)
+				*str = '%';
+			++str;
+			if (*fmt) {
+				if (str < end)
+					*str = *fmt;
+				++str;
+			} else {
+				--fmt;
+			}
+			continue;
+		}
+		if (qualifier == 'L')
+			num = get_arg(long long);
+		else if (qualifier == 'l') {
+			num = get_arg(unsigned long);
+			if (flags & SIGN)
+				num = (signed long) num;
+		} else if (qualifier == 'Z' || qualifier == 'z') {
+			num = get_arg(size_t);
+		} else if (qualifier == 't') {
+			num = get_arg(ptrdiff_t);
+		} else if (qualifier == 'h') {
+			num = (unsigned short) get_arg(short);
+			if (flags & SIGN)
+				num = (signed short) num;
+		} else {
+			num = get_arg(unsigned int);
+			if (flags & SIGN)
+				num = (signed int) num;
+		}
+		str = number(str, end, num, base,
+				field_width, precision, flags);
+	}
+	if (size > 0) {
+		if (str < end)
+			*str = '\0';
+		else
+			end[-1] = '\0';
+	}
+#undef get_arg
+
+	/* the trailing null byte doesn't count towards the total */
+	return str - buf;
+}
+EXPORT_SYMBOL_GPL(bstr_printf);
+
+/**
+ * bprintf - Parse a format string and place args' binary value in a buffer
+ * @bin_buf: The buffer to place args' binary value
+ * @size: The size of the buffer(by words(32bits), not characters)
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The function returns the number of words(u32) written
+ * into @bin_buf.
+ */
+int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, fmt);
+	ret = vbin_printf(bin_buf, size, fmt, args);
+	va_end(args);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bprintf);
+
+#endif /* CONFIG_BINARY_PRINTF */
+
 /**
  * vsscanf - Unformat a buffer into a list of arguments
  * @buf:	input buffer
-- 
1.6.1



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 2/5] ftrace: infrastructure for supporting binary record
  2009-02-28  9:13                       ` Ingo Molnar
  2009-02-28 19:45                         ` [PATCH 1/5] add binary printf Frederic Weisbecker
@ 2009-02-28 20:16                         ` Frederic Weisbecker
  2009-03-02 16:27                           ` Steven Rostedt
  2009-02-28 21:30                         ` [PATCH 3/5] ftrace: add ftrace_bprintk() Frederic Weisbecker
                                           ` (4 subsequent siblings)
  6 siblings, 1 reply; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-28 20:16 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Lai Jiangshan, Linus Torvalds, Steven Rostedt,
	Peter Zijlstra, linux-kernel

From: Lai Jiangshan <laijs@cn.fujitsu.com>

Impact: save on memory for tracing

Current tracers are typically using a struct(like struct ftrace_entry,
struct ctx_switch_entry, struct special_entr etc...)to record a binary
event. These structs can only record a their own kind of events.
A new kind of tracer need a new struct and a lot of code too handle it.

So we need a generic binary record for events. This infrastructure
is for this purpose.

[fweisbec@gmail.com: rebase against latest -tip]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace.h       |    3 +
 kernel/trace/Kconfig         |    6 +++
 kernel/trace/Makefile        |    1 +
 kernel/trace/trace.c         |   55 ++++++++++++++++++++++++++
 kernel/trace/trace.h         |   12 ++++++
 kernel/trace/trace_bprintk.c |   87 ++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_output.c  |   76 ++++++++++++++++++++++++++++++++++++
 7 files changed, 240 insertions(+), 0 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 847bb3c..8ac2fac 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -222,6 +222,9 @@ extern int ftrace_make_nop(struct module *mod,
  */
 extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
 
+#ifdef CONFIG_TRACE_BPRINTK
+extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
+#endif
 
 /* May be defined in arch */
 extern int ftrace_arch_read_dyn_info(char *buf, int size);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 02bc5d6..2e84fc7 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -86,6 +86,12 @@ config FUNCTION_GRAPH_TRACER
 	  This is done by setting the current return address on the current
 	  task structure into a stack of calls.
 
+config TRACE_BPRINTK
+	bool "Binary printk for tracing"
+	default y
+	depends on TRACING
+	select BINARY_PRINTF
+
 config IRQSOFF_TRACER
 	bool "Interrupts-off Latency Tracer"
 	default n
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c931fe0..dba6157 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_TRACING) += trace_clock.o
 obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
+obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c5e39cd..3a3ae19 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3572,6 +3572,61 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
 }
 EXPORT_SYMBOL_GPL(__ftrace_vprintk);
 
+/**
+ * trace_vbprintk - write binary msg to tracing buffer
+ *
+ * Caller must insure @fmt are valid when msg is in tracing buffer.
+ */
+int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
+{
+	static DEFINE_SPINLOCK(trace_buf_lock);
+	static u32 trace_buf[TRACE_BUF_SIZE];
+
+	struct ring_buffer_event *event;
+	struct trace_array *tr = &global_trace;
+	struct trace_array_cpu *data;
+	struct bprintk_entry *entry;
+	unsigned long flags;
+	int cpu, len = 0, size, pc;
+
+	if (tracing_disabled || !trace_bprintk_enable)
+		return 0;
+
+	pc = preempt_count();
+	preempt_disable_notrace();
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+
+	if (unlikely(atomic_read(&data->disabled)))
+		goto out;
+
+	spin_lock_irqsave(&trace_buf_lock, flags);
+	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+
+	if (len > TRACE_BUF_SIZE || len < 0)
+		goto out_unlock;
+
+	size = sizeof(*entry) + sizeof(u32) * len;
+	event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc);
+	if (!event)
+		goto out_unlock;
+	entry = ring_buffer_event_data(event);
+	entry->ip			= ip;
+	entry->fmt			= fmt;
+
+	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
+	ring_buffer_unlock_commit(tr->buffer, event);
+
+out_unlock:
+	spin_unlock_irqrestore(&trace_buf_lock, flags);
+
+out:
+	preempt_enable_notrace();
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(trace_vbprintk);
+
 static int trace_panic_handler(struct notifier_block *this,
 			       unsigned long event, void *unused)
 {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f6fa0b9..92703b1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -20,6 +20,7 @@ enum trace_type {
 	TRACE_WAKE,
 	TRACE_STACK,
 	TRACE_PRINT,
+	TRACE_BPRINTK,
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
@@ -124,6 +125,16 @@ struct print_entry {
 	char			buf[];
 };
 
+struct bprintk_entry {
+	struct trace_entry ent;
+	unsigned long ip;
+	const char *fmt;
+	u32 buf[];
+};
+#ifdef CONFIG_TRACE_BPRINTK
+extern int trace_bprintk_enable;
+#endif
+
 #define TRACE_OLD_SIZE		88
 
 struct trace_field_cont {
@@ -284,6 +295,7 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
+		IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
 		IF_ASSIGN(var, ent, struct special_entry, 0);		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
 			  TRACE_MMIO_RW);				\
diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
new file mode 100644
index 0000000..1f8e532
--- /dev/null
+++ b/kernel/trace/trace_bprintk.c
@@ -0,0 +1,87 @@
+/*
+ * trace binary printk
+ *
+ * Copyright (C) 2008 Lai Jiangshan <laijs@cn.fujitsu.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/marker.h>
+#include <linux/uaccess.h>
+
+#include "trace.h"
+
+/* binary printk basic */
+static DEFINE_MUTEX(btrace_mutex);
+static int btrace_metadata_count;
+
+static inline void lock_btrace(void)
+{
+	mutex_lock(&btrace_mutex);
+}
+
+static inline void unlock_btrace(void)
+{
+	mutex_unlock(&btrace_mutex);
+}
+
+static void get_btrace_metadata(void)
+{
+	lock_btrace();
+	btrace_metadata_count++;
+	unlock_btrace();
+}
+
+static void put_btrace_metadata(void)
+{
+	lock_btrace();
+	btrace_metadata_count--;
+	unlock_btrace();
+}
+
+/* events tracer */
+int trace_bprintk_enable;
+
+static void start_bprintk_trace(struct trace_array *tr)
+{
+	get_btrace_metadata();
+	tracing_reset_online_cpus(tr);
+	trace_bprintk_enable = 1;
+}
+
+static void stop_bprintk_trace(struct trace_array *tr)
+{
+	trace_bprintk_enable = 0;
+	tracing_reset_online_cpus(tr);
+	put_btrace_metadata();
+}
+
+static int init_bprintk_trace(struct trace_array *tr)
+{
+	start_bprintk_trace(tr);
+	return 0;
+}
+
+static struct tracer bprintk_trace __read_mostly =
+{
+	.name	     = "events",
+	.init	     = init_bprintk_trace,
+	.reset	     = stop_bprintk_trace,
+	.start	     = start_bprintk_trace,
+	.stop	     = stop_bprintk_trace,
+};
+
+static __init int init_bprintk(void)
+{
+	return register_tracer(&bprintk_trace);
+}
+
+device_initcall(init_bprintk);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 9fc8150..7897b5d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	return len;
 }
 
+static int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (!len)
+		return 0;
+
+	ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
+
+	/* If we can't write it all, don't bother writing anything */
+	if (ret >= len)
+		return 0;
+
+	s->len += ret;
+
+	return len;
+}
+
 /**
  * trace_seq_puts - trace sequence printing of simple string
  * @s: trace sequence descriptor
@@ -887,6 +907,61 @@ static struct trace_event trace_print_event = {
 	.raw		= trace_print_raw,
 };
 
+/* TRACE_BPRINTK */
+static enum print_line_t
+trace_bprintk_print(struct trace_iterator *iter, int flags)
+{
+	struct trace_entry *entry = iter->ent;
+	struct trace_seq *s = &iter->seq;
+	struct bprintk_entry *field;
+
+	trace_assign_type(field, entry);
+
+	if (!seq_print_ip_sym(s, field->ip, flags))
+		goto partial;
+
+	if (!trace_seq_puts(s, ": "))
+		goto partial;
+
+	if (!trace_seq_bprintf(s, field->fmt, field->buf))
+		goto partial;
+
+	return TRACE_TYPE_HANDLED;
+
+ partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t
+trace_bprintk_raw(struct trace_iterator *iter, int flags)
+{
+	struct trace_entry *entry = iter->ent;
+	struct trace_seq *s = &iter->seq;
+	struct bprintk_entry *field;
+
+	trace_assign_type(field, entry);
+
+	if (!trace_seq_printf(s, ": %lx : ", field->ip))
+		goto partial;
+
+	if (!trace_seq_bprintf(s, field->fmt, field->buf))
+		goto partial;
+
+	return TRACE_TYPE_HANDLED;
+
+ partial:
+	return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_bprintk_event = {
+	.type	 	= TRACE_BPRINTK,
+	.trace		= trace_bprintk_print,
+	.latency_trace	= trace_bprintk_print,
+	.raw		= trace_bprintk_raw,
+	.hex		= trace_nop_print,
+	.binary		= trace_nop_print,
+};
+
 static struct trace_event *events[] __initdata = {
 	&trace_fn_event,
 	&trace_ctx_event,
@@ -895,6 +970,7 @@ static struct trace_event *events[] __initdata = {
 	&trace_stack_event,
 	&trace_user_stack_event,
 	&trace_print_event,
+	&trace_bprintk_event,
 	NULL
 };
 
-- 
1.6.1



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 3/5] ftrace: add ftrace_bprintk()
  2009-02-28  9:13                       ` Ingo Molnar
  2009-02-28 19:45                         ` [PATCH 1/5] add binary printf Frederic Weisbecker
  2009-02-28 20:16                         ` [PATCH 2/5] ftrace: infrastructure for supporting binary record Frederic Weisbecker
@ 2009-02-28 21:30                         ` Frederic Weisbecker
  2009-03-02 16:34                           ` Steven Rostedt
  2009-02-28 22:16                         ` [PATCH 4/5] tracing/core: drop the old ftrace_printk implementation in favour of ftrace_bprintk Frederic Weisbecker
                                           ` (3 subsequent siblings)
  6 siblings, 1 reply; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-28 21:30 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Lai Jiangshan, Linus Torvalds, Steven Rostedt,
	Peter Zijlstra, linux-kernel

From: Lai Jiangshan <laijs@cn.fujitsu.com>

Impact: Add a generic printk() for ftrace, like ftrace_printk()

ftrace_bprintk() use the infrastructure to record events on ring_buffer.

[fweisbec@gmail.com: ported to latest -tip and made it work if !CONFIG_MODULES]

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/asm-generic/vmlinux.lds.h |    3 +
 include/linux/ftrace.h            |   21 ++++++
 include/linux/module.h            |    5 ++
 kernel/module.c                   |    6 ++
 kernel/trace/trace.c              |   15 ++++
 kernel/trace/trace_bprintk.c      |  132 ++++++++++++++++++++++++++++++++++++-
 6 files changed, 181 insertions(+), 1 deletions(-)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 9d97491..13f20f4 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -100,6 +100,9 @@
 		*(__vermagic)		/* Kernel version magic */	\
 		*(__markers_strings)	/* Markers: strings */		\
 		*(__tracepoints_strings)/* Tracepoints: strings */	\
+		VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .;	\
+		*(__trace_printk_fmt)	/* Trace_printk fmt' pointer */	\
+		VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .;		\
 	}								\
 									\
 	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {		\
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8ac2fac..f82b986 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -224,6 +224,27 @@ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
 
 #ifdef CONFIG_TRACE_BPRINTK
 extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
+extern int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
+		__attribute__ ((format (printf, 2, 3)));
+
+static inline void  ____ftrace_bprintk_check_format(const char *fmt, ...)
+		__attribute__ ((format (printf, 1, 2)));
+static inline void ____ftrace_bprintk_check_format(const char *fmt, ...) {}
+#define __ftrace_bprintk_check_format(fmt, args...)			\
+do {									\
+	if (0)								\
+		____ftrace_bprintk_check_format(fmt, ##args);		\
+} while (0)
+
+#define ftrace_bprintk(fmt, args...)					\
+do {									\
+	static char *__attribute__((section("__trace_bprintk_fmt")))	\
+			ftrace_bprintk_fmt = fmt;			\
+	__ftrace_bprintk_check_format(fmt, ##args);			\
+	__ftrace_bprintk(_THIS_IP_, ftrace_bprintk_fmt, ##args);	\
+} while (0)
+#else
+#define ftrace_bprintk ftrace_printk
 #endif
 
 /* May be defined in arch */
diff --git a/include/linux/module.h b/include/linux/module.h
index 07b1cb4..c4f2845 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -347,6 +347,11 @@ struct module
 	unsigned int num_tracepoints;
 #endif
 
+#ifdef CONFIG_TRACE_BPRINTK
+	const char **trace_bprintk_fmt_start;
+	unsigned int num_trace_bprintk_fmt;
+#endif
+
 #ifdef CONFIG_MODULE_UNLOAD
 	/* What modules depend on me? */
 	struct list_head modules_which_use_me;
diff --git a/kernel/module.c b/kernel/module.c
index 90a6d63..fa96909 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2192,6 +2192,12 @@ static noinline struct module *load_module(void __user *umod,
 					&mod->num_tracepoints);
 #endif
 
+#ifdef CONFIG_TRACE_BPRINTK
+	mod->trace_bprintk_fmt_start = section_objs(hdr, sechdrs, secstrings,
+			"__trace_bprintk_fmt", sizeof(char *),
+			&mod->num_trace_bprintk_fmt);
+#endif
+
 #ifdef CONFIG_MODVERSIONS
 	if ((mod->num_syms && !mod->crcs)
 	    || (mod->num_gpl_syms && !mod->gpl_crcs)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3a3ae19..0e30dce 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3627,6 +3627,21 @@ out:
 }
 EXPORT_SYMBOL_GPL(trace_vbprintk);
 
+int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
+{
+	int ret;
+	va_list ap;
+
+	if (!fmt)
+		return 0;
+
+	va_start(ap, fmt);
+	ret = trace_vbprintk(ip, fmt, ap);
+	va_end(ap);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__ftrace_bprintk);
+
 static int trace_panic_handler(struct notifier_block *this,
 			       unsigned long event, void *unused)
 {
diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
index 1f8e532..28e6286 100644
--- a/kernel/trace/trace_bprintk.c
+++ b/kernel/trace/trace_bprintk.c
@@ -19,9 +19,23 @@
 
 #include "trace.h"
 
+#ifdef CONFIG_MODULES
+
 /* binary printk basic */
 static DEFINE_MUTEX(btrace_mutex);
 static int btrace_metadata_count;
+/*
+ * modules ftrace_bprintk()'s formats are autosaved in struct trace_bprintk_fmt
+ * which are queued on trace_bprintk_fmt_list.
+ */
+static LIST_HEAD(trace_bprintk_fmt_list);
+
+struct trace_bprintk_fmt {
+	struct list_head list;
+	int count;
+	char fmt[0];
+};
+
 
 static inline void lock_btrace(void)
 {
@@ -33,6 +47,93 @@ static inline void unlock_btrace(void)
 	mutex_unlock(&btrace_mutex);
 }
 
+/*
+ * If trace_bprintk() is not active, we release the trace_bprintk's
+ * formats that no modules has reference to.
+ */
+static inline void shrink_trace_bprintk_fmt(void)
+{
+	struct trace_bprintk_fmt *pos, *next;
+	list_for_each_entry_safe(pos, next, &trace_bprintk_fmt_list, list) {
+		if (!pos->count) {
+			list_del(&pos->list);
+			kfree(pos);
+		}
+	}
+}
+
+static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
+{
+	struct trace_bprintk_fmt *pos;
+	list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
+		if (!strcmp(pos->fmt, fmt))
+			return pos;
+	}
+	return NULL;
+}
+
+static
+void hold_module_trace_bprintk_format(const char **start, const char **end)
+{
+	const char **iter;
+	lock_btrace();
+	for (iter = start; iter < end; iter++) {
+		struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
+		if (tb_fmt) {
+			tb_fmt->count++;
+			*iter = tb_fmt->fmt;
+			continue;
+		}
+
+		tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt)
+				+ strlen(*iter) + 1, GFP_KERNEL);
+		if (tb_fmt) {
+			list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
+			tb_fmt->count = 1;
+			strcpy(tb_fmt->fmt, *iter);
+			*iter = tb_fmt->fmt;
+		} else
+			*iter = NULL;
+	}
+	unlock_btrace();
+}
+
+static
+void release_module_trace_bprintk_format(const char **start, const char **end)
+{
+	const char **iter;
+	lock_btrace();
+	for (iter = start; iter < end; iter++) {
+		struct trace_bprintk_fmt *tb_fmt;
+		if (!*iter)
+			continue;
+
+		tb_fmt = container_of(*iter, struct trace_bprintk_fmt, fmt[0]);
+		tb_fmt->count--;
+		if (!tb_fmt->count && !btrace_metadata_count) {
+			list_del(&tb_fmt->list);
+			kfree(tb_fmt);
+		}
+	}
+	unlock_btrace();
+}
+
+static int module_trace_bprintk_format_notify(struct notifier_block *self,
+		unsigned long val, void *data)
+{
+	struct module *mod = data;
+	if (mod->num_trace_bprintk_fmt) {
+		const char **start = mod->trace_bprintk_fmt_start;
+		const char **end = start + mod->num_trace_bprintk_fmt;
+
+		if (val == MODULE_STATE_COMING)
+			hold_module_trace_bprintk_format(start, end);
+		else if (val == MODULE_STATE_GOING)
+			release_module_trace_bprintk_format(start, end);
+	}
+	return 0;
+}
+
 static void get_btrace_metadata(void)
 {
 	lock_btrace();
@@ -44,9 +145,31 @@ static void put_btrace_metadata(void)
 {
 	lock_btrace();
 	btrace_metadata_count--;
+
+	if (!btrace_metadata_count) {
+		shrink_trace_bprintk_fmt();
+	}
 	unlock_btrace();
 }
 
+#else /* !CONFIG_MODULES */
+__init static int
+module_trace_bprintk_format_notify(struct notifier_block *self,
+		unsigned long val, void *data)
+{
+	return 0;
+}
+
+static inline void get_btrace_metadata(void) { }
+static inline void put_btrace_metadata(void) { }
+#endif /* CONFIG_MODULES */
+
+
+__initdata_or_module static
+struct notifier_block module_trace_bprintk_format_nb = {
+	.notifier_call = module_trace_bprintk_format_notify,
+};
+
 /* events tracer */
 int trace_bprintk_enable;
 
@@ -81,7 +204,14 @@ static struct tracer bprintk_trace __read_mostly =
 
 static __init int init_bprintk(void)
 {
-	return register_tracer(&bprintk_trace);
+	int ret = register_module_notifier(&module_trace_bprintk_format_nb);
+	if (ret)
+		return ret;
+
+	ret = register_tracer(&bprintk_trace);
+	if (ret)
+		unregister_module_notifier(&module_trace_bprintk_format_nb);
+	return ret;
 }
 
 device_initcall(init_bprintk);
-- 
1.6.1



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 4/5] tracing/core: drop the old ftrace_printk implementation in favour of ftrace_bprintk
  2009-02-28  9:13                       ` Ingo Molnar
                                           ` (2 preceding siblings ...)
  2009-02-28 21:30                         ` [PATCH 3/5] ftrace: add ftrace_bprintk() Frederic Weisbecker
@ 2009-02-28 22:16                         ` Frederic Weisbecker
  2009-03-02 16:37                           ` Steven Rostedt
  2009-02-28 23:11                         ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Frederic Weisbecker
                                           ` (2 subsequent siblings)
  6 siblings, 1 reply; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-28 22:16 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Lai Jiangshan, Linus Torvalds, Steven Rostedt,
	Peter Zijlstra, linux-kernel

Impact: faster and lighter tracing

Now that we have ftrace_bprintk() which is faster and consume lesser
memory than ftrace_printk() and has the same purpose, we can now drop
the old implementation in favour of the binary one from ftrace_bprintk(),
which means we move all the implementation of ftrace_bprintk() to
ftrace_printk(), so the Api doesn't change except that we must now use
trace_seq_bprintk() to print the TRACE_PRINT entries.

Some changes result of this:

- Previously, ftrace_bprintk depended of a single tracer and couldn't
  work without. This tracer has been dropped and the whole implementation
  of ftrace_printk() (like the module formats management) is now integrated
  in the tracing core (comes with CONFIG_TRACING), though we keep the file
  trace_bprintk (previously trace_bprintk.c) where we can find the module
  management. Thus we don't overflow trace.c

- changes some parts to use trace_seq_bprintk() to print TRACE_PRINT entries.

- change a bit ftrace_printk/ftrace_vprintk macros to support non-builtin formats
  constants, and fix 'const' qualifiers warnings. But this is all transparent for
  developers.

- etc...

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace.h                           |   68 ++++----
 include/linux/module.h                           |    2 +-
 kernel/trace/Kconfig                             |    7 +-
 kernel/trace/Makefile                            |    2 +-
 kernel/trace/trace.c                             |  209 +++++++---------------
 kernel/trace/trace.h                             |   14 +--
 kernel/trace/trace_functions_graph.c             |    6 +-
 kernel/trace/trace_mmiotrace.c                   |    9 +-
 kernel/trace/trace_output.c                      |   69 +------
 kernel/trace/trace_output.h                      |    2 +
 kernel/trace/{trace_bprintk.c => trace_printk.c} |  113 ++++--------
 11 files changed, 162 insertions(+), 339 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f82b986..696a09b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -222,31 +222,6 @@ extern int ftrace_make_nop(struct module *mod,
  */
 extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
 
-#ifdef CONFIG_TRACE_BPRINTK
-extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
-extern int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
-		__attribute__ ((format (printf, 2, 3)));
-
-static inline void  ____ftrace_bprintk_check_format(const char *fmt, ...)
-		__attribute__ ((format (printf, 1, 2)));
-static inline void ____ftrace_bprintk_check_format(const char *fmt, ...) {}
-#define __ftrace_bprintk_check_format(fmt, args...)			\
-do {									\
-	if (0)								\
-		____ftrace_bprintk_check_format(fmt, ##args);		\
-} while (0)
-
-#define ftrace_bprintk(fmt, args...)					\
-do {									\
-	static char *__attribute__((section("__trace_bprintk_fmt")))	\
-			ftrace_bprintk_fmt = fmt;			\
-	__ftrace_bprintk_check_format(fmt, ##args);			\
-	__ftrace_bprintk(_THIS_IP_, ftrace_bprintk_fmt, ##args);	\
-} while (0)
-#else
-#define ftrace_bprintk ftrace_printk
-#endif
-
 /* May be defined in arch */
 extern int ftrace_arch_read_dyn_info(char *buf, int size);
 
@@ -367,15 +342,40 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
  * Please refrain from leaving ftrace_printks scattered around in
  * your code.
  */
-# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt)
-extern int
-__ftrace_printk(unsigned long ip, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)));
-# define ftrace_vprintk(fmt, ap) __ftrace_printk(_THIS_IP_, fmt, ap)
-extern int
-__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
+extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list args);
+extern int __ftrace_printk(unsigned long ip, const char *fmt, ...)
+		__attribute__ ((format (printf, 2, 3)));
+
+static inline void  ____ftrace_printk_check_format(const char *fmt, ...)
+		__attribute__ ((format (printf, 1, 2)));
+static inline void ____ftrace_printk_check_format(const char *fmt, ...) {}
+#define __ftrace_printk_check_format(fmt, args...)			\
+do {									\
+	if (0)								\
+		____ftrace_printk_check_format(fmt, ##args);		\
+} while (0)
+
+#define ftrace_vprintk(fmt, vargs)					\
+do {									\
+	static const char *ftrace_printk_fmt				\
+	__attribute__((section("__trace_bprintk_fmt")));		\
+	ftrace_printk_fmt = fmt;					\
+	__ftrace_vprintk(_THIS_IP_, ftrace_printk_fmt, vargs);		\
+} while (0)
+
+#define ftrace_printk(fmt, args...)					\
+do {									\
+	static const char *ftrace_printk_fmt				\
+	__attribute__((section("__trace_bprintk_fmt")));		\
+	ftrace_printk_fmt = fmt;					\
+	__ftrace_printk_check_format(fmt, ##args);			\
+	__ftrace_printk(_THIS_IP_, ftrace_printk_fmt, ##args);		\
+} while (0)
+
 extern void ftrace_dump(void);
-#else
+
+#else /* !CONFIG_TRACING */
+
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
 static inline int
@@ -395,7 +395,7 @@ ftrace_vprintk(const char *fmt, va_list ap)
 	return 0;
 }
 static inline void ftrace_dump(void) { }
-#endif
+#endif /* CONFIG_TRACING */
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
diff --git a/include/linux/module.h b/include/linux/module.h
index c4f2845..e418da1 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -347,7 +347,7 @@ struct module
 	unsigned int num_tracepoints;
 #endif
 
-#ifdef CONFIG_TRACE_BPRINTK
+#ifdef CONFIG_TRACING
 	const char **trace_bprintk_fmt_start;
 	unsigned int num_trace_bprintk_fmt;
 #endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2e84fc7..52439c8 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -52,6 +52,7 @@ config TRACING
 	select STACKTRACE if STACKTRACE_SUPPORT
 	select TRACEPOINTS
 	select NOP_TRACER
+	select BINARY_PRINTF
 
 menu "Tracers"
 
@@ -86,12 +87,6 @@ config FUNCTION_GRAPH_TRACER
 	  This is done by setting the current return address on the current
 	  task structure into a stack of calls.
 
-config TRACE_BPRINTK
-	bool "Binary printk for tracing"
-	default y
-	depends on TRACING
-	select BINARY_PRINTF
-
 config IRQSOFF_TRACER
 	bool "Interrupts-off Latency Tracer"
 	default n
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index dba6157..25e243f 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_TRACING) += trace_clock.o
 obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
-obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
+obj-$(CONFIG_TRACING) += trace_printk.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
 obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0e30dce..2ad285b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1171,6 +1171,67 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
+
+/**
+ * trace_vprintk - write binary msg to tracing buffer
+ *
+ * Caller must insure @fmt are valid when msg is in tracing buffer.
+ */
+int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
+{
+	static DEFINE_SPINLOCK(trace_buf_lock);
+	static u32 trace_buf[TRACE_BUF_SIZE];
+
+	struct ring_buffer_event *event;
+	struct trace_array *tr = &global_trace;
+	struct trace_array_cpu *data;
+	struct print_entry *entry;
+	unsigned long flags;
+	int cpu, len = 0, size, pc;
+
+	if (unlikely(tracing_selftest_running || tracing_disabled))
+		return 0;
+
+	/* Don't pollute graph traces with trace_vprintk internals */
+	pause_graph_tracing();
+
+	pc = preempt_count();
+	preempt_disable_notrace();
+	cpu = raw_smp_processor_id();
+	data = tr->data[cpu];
+
+	if (unlikely(atomic_read(&data->disabled)))
+		goto out;
+
+	spin_lock_irqsave(&trace_buf_lock, flags);
+	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+
+	if (len > TRACE_BUF_SIZE || len < 0)
+		goto out_unlock;
+
+	size = sizeof(*entry) + sizeof(u32) * len;
+	event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc);
+	if (!event)
+		goto out_unlock;
+	entry = ring_buffer_event_data(event);
+	entry->ip			= ip;
+	entry->depth			= depth;
+	entry->fmt			= fmt;
+
+	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
+	ring_buffer_unlock_commit(tr->buffer, event);
+
+out_unlock:
+	spin_unlock_irqrestore(&trace_buf_lock, flags);
+
+out:
+	preempt_enable_notrace();
+	unpause_graph_tracing();
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(trace_vprintk);
+
 enum trace_file_type {
 	TRACE_FILE_LAT_FMT	= 1,
 	TRACE_FILE_ANNOTATE	= 2,
@@ -1588,7 +1649,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
 
 	trace_assign_type(field, entry);
 
-	ret = trace_seq_printf(s, "%s", field->buf);
+	ret = trace_seq_bprintf(s, field->fmt, field->buf);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -3496,152 +3557,6 @@ static __init int tracer_init_debugfs(void)
 	return 0;
 }
 
-int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
-{
-	static DEFINE_SPINLOCK(trace_buf_lock);
-	static char trace_buf[TRACE_BUF_SIZE];
-
-	struct ring_buffer_event *event;
-	struct trace_array *tr = &global_trace;
-	struct trace_array_cpu *data;
-	int cpu, len = 0, size, pc;
-	struct print_entry *entry;
-	unsigned long irq_flags;
-
-	if (tracing_disabled || tracing_selftest_running)
-		return 0;
-
-	pc = preempt_count();
-	preempt_disable_notrace();
-	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
-
-	if (unlikely(atomic_read(&data->disabled)))
-		goto out;
-
-	pause_graph_tracing();
-	spin_lock_irqsave(&trace_buf_lock, irq_flags);
-	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
-
-	len = min(len, TRACE_BUF_SIZE-1);
-	trace_buf[len] = 0;
-
-	size = sizeof(*entry) + len + 1;
-	event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
-	if (!event)
-		goto out_unlock;
-	entry = ring_buffer_event_data(event);
-	entry->ip			= ip;
-	entry->depth			= depth;
-
-	memcpy(&entry->buf, trace_buf, len);
-	entry->buf[len] = 0;
-	ring_buffer_unlock_commit(tr->buffer, event);
-
- out_unlock:
-	spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
-	unpause_graph_tracing();
- out:
-	preempt_enable_notrace();
-
-	return len;
-}
-EXPORT_SYMBOL_GPL(trace_vprintk);
-
-int __ftrace_printk(unsigned long ip, const char *fmt, ...)
-{
-	int ret;
-	va_list ap;
-
-	if (!(trace_flags & TRACE_ITER_PRINTK))
-		return 0;
-
-	va_start(ap, fmt);
-	ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
-	va_end(ap);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(__ftrace_printk);
-
-int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
-{
-	if (!(trace_flags & TRACE_ITER_PRINTK))
-		return 0;
-
-	return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
-}
-EXPORT_SYMBOL_GPL(__ftrace_vprintk);
-
-/**
- * trace_vbprintk - write binary msg to tracing buffer
- *
- * Caller must insure @fmt are valid when msg is in tracing buffer.
- */
-int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
-{
-	static DEFINE_SPINLOCK(trace_buf_lock);
-	static u32 trace_buf[TRACE_BUF_SIZE];
-
-	struct ring_buffer_event *event;
-	struct trace_array *tr = &global_trace;
-	struct trace_array_cpu *data;
-	struct bprintk_entry *entry;
-	unsigned long flags;
-	int cpu, len = 0, size, pc;
-
-	if (tracing_disabled || !trace_bprintk_enable)
-		return 0;
-
-	pc = preempt_count();
-	preempt_disable_notrace();
-	cpu = raw_smp_processor_id();
-	data = tr->data[cpu];
-
-	if (unlikely(atomic_read(&data->disabled)))
-		goto out;
-
-	spin_lock_irqsave(&trace_buf_lock, flags);
-	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
-
-	if (len > TRACE_BUF_SIZE || len < 0)
-		goto out_unlock;
-
-	size = sizeof(*entry) + sizeof(u32) * len;
-	event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc);
-	if (!event)
-		goto out_unlock;
-	entry = ring_buffer_event_data(event);
-	entry->ip			= ip;
-	entry->fmt			= fmt;
-
-	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
-	ring_buffer_unlock_commit(tr->buffer, event);
-
-out_unlock:
-	spin_unlock_irqrestore(&trace_buf_lock, flags);
-
-out:
-	preempt_enable_notrace();
-
-	return len;
-}
-EXPORT_SYMBOL_GPL(trace_vbprintk);
-
-int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
-{
-	int ret;
-	va_list ap;
-
-	if (!fmt)
-		return 0;
-
-	va_start(ap, fmt);
-	ret = trace_vbprintk(ip, fmt, ap);
-	va_end(ap);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(__ftrace_bprintk);
-
 static int trace_panic_handler(struct notifier_block *this,
 			       unsigned long event, void *unused)
 {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 92703b1..0ccc678 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -20,7 +20,6 @@ enum trace_type {
 	TRACE_WAKE,
 	TRACE_STACK,
 	TRACE_PRINT,
-	TRACE_BPRINTK,
 	TRACE_SPECIAL,
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
@@ -120,16 +119,10 @@ struct userstack_entry {
  */
 struct print_entry {
 	struct trace_entry	ent;
-	unsigned long		ip;
+	unsigned long 		ip;
 	int			depth;
-	char			buf[];
-};
-
-struct bprintk_entry {
-	struct trace_entry ent;
-	unsigned long ip;
-	const char *fmt;
-	u32 buf[];
+	const char		*fmt;
+	u32 			buf[];
 };
 #ifdef CONFIG_TRACE_BPRINTK
 extern int trace_bprintk_enable;
@@ -295,7 +288,6 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
-		IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
 		IF_ASSIGN(var, ent, struct special_entry, 0);		\
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
 			  TRACE_MMIO_RW);				\
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 2461732..13e362d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -742,7 +742,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
 		}
 
 	/* The comment */
-	ret = trace_seq_printf(s, "/* %s", trace->buf);
+	ret = trace_seq_printf(s, "/* ");
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	ret = trace_seq_bprintf(s, trace->fmt, trace->buf);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index c401b90..23e346a 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -254,15 +254,18 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
 {
 	struct trace_entry *entry = iter->ent;
 	struct print_entry *print = (struct print_entry *)entry;
-	const char *msg		= print->buf;
 	struct trace_seq *s	= &iter->seq;
 	unsigned long long t	= ns2usecs(iter->ts);
-	unsigned long usec_rem	= do_div(t, 1000000ULL);
+	unsigned long usec_rem	= do_div(t, USEC_PER_SEC);
 	unsigned secs		= (unsigned long)t;
 	int ret;
 
 	/* The trailing newline must be in the message. */
-	ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
+	ret = trace_seq_printf(s, "MARK %u.%06lu ", secs, usec_rem);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	ret = trace_seq_bprintf(s, print->fmt, print->buf);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7897b5d..a020a9b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -53,8 +53,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	return len;
 }
 
-static int
-trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
 	int len = (PAGE_SIZE - 1) - s->len;
 	int ret;
@@ -876,50 +875,6 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
 	if (!seq_print_ip_sym(s, field->ip, flags))
 		goto partial;
 
-	if (!trace_seq_printf(s, ": %s", field->buf))
-		goto partial;
-
-	return TRACE_TYPE_HANDLED;
-
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
-}
-
-static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
-{
-	struct print_entry *field;
-
-	trace_assign_type(field, iter->ent);
-
-	if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf))
-		goto partial;
-
-	return TRACE_TYPE_HANDLED;
-
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
-}
-
-static struct trace_event trace_print_event = {
-	.type	 	= TRACE_PRINT,
-	.trace		= trace_print_print,
-	.latency_trace	= trace_print_print,
-	.raw		= trace_print_raw,
-};
-
-/* TRACE_BPRINTK */
-static enum print_line_t
-trace_bprintk_print(struct trace_iterator *iter, int flags)
-{
-	struct trace_entry *entry = iter->ent;
-	struct trace_seq *s = &iter->seq;
-	struct bprintk_entry *field;
-
-	trace_assign_type(field, entry);
-
-	if (!seq_print_ip_sym(s, field->ip, flags))
-		goto partial;
-
 	if (!trace_seq_puts(s, ": "))
 		goto partial;
 
@@ -932,14 +887,13 @@ trace_bprintk_print(struct trace_iterator *iter, int flags)
 	return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static enum print_line_t
-trace_bprintk_raw(struct trace_iterator *iter, int flags)
+
+static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
 {
-	struct trace_entry *entry = iter->ent;
+	struct print_entry *field;
 	struct trace_seq *s = &iter->seq;
-	struct bprintk_entry *field;
 
-	trace_assign_type(field, entry);
+	trace_assign_type(field, iter->ent);
 
 	if (!trace_seq_printf(s, ": %lx : ", field->ip))
 		goto partial;
@@ -953,13 +907,11 @@ trace_bprintk_raw(struct trace_iterator *iter, int flags)
 	return TRACE_TYPE_PARTIAL_LINE;
 }
 
-static struct trace_event trace_bprintk_event = {
-	.type	 	= TRACE_BPRINTK,
-	.trace		= trace_bprintk_print,
-	.latency_trace	= trace_bprintk_print,
-	.raw		= trace_bprintk_raw,
-	.hex		= trace_nop_print,
-	.binary		= trace_nop_print,
+static struct trace_event trace_print_event = {
+	.type	 	= TRACE_PRINT,
+	.trace		= trace_print_print,
+	.latency_trace	= trace_print_print,
+	.raw		= trace_print_raw,
 };
 
 static struct trace_event *events[] __initdata = {
@@ -970,7 +922,6 @@ static struct trace_event *events[] __initdata = {
 	&trace_stack_event,
 	&trace_user_stack_event,
 	&trace_print_event,
-	&trace_bprintk_event,
 	NULL
 };
 
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 551a25a..6d39010 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -19,6 +19,8 @@ struct trace_event {
 extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	__attribute__ ((format (printf, 2, 3)));
 extern int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
+extern int
 seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
 		unsigned long sym_flags);
 extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_printk.c
similarity index 65%
rename from kernel/trace/trace_bprintk.c
rename to kernel/trace/trace_printk.c
index 28e6286..09f4ace 100644
--- a/kernel/trace/trace_bprintk.c
+++ b/kernel/trace/trace_printk.c
@@ -21,32 +21,21 @@
 
 #ifdef CONFIG_MODULES
 
-/* binary printk basic */
-static DEFINE_MUTEX(btrace_mutex);
-static int btrace_metadata_count;
 /*
- * modules ftrace_bprintk()'s formats are autosaved in struct trace_bprintk_fmt
+ * modules ftrace_printk()'s formats are autosaved in struct trace_bprintk_fmt
  * which are queued on trace_bprintk_fmt_list.
  */
 static LIST_HEAD(trace_bprintk_fmt_list);
 
+/* serialize accesses to trace_bprintk_fmt_list */
+static DEFINE_MUTEX(btrace_mutex);
+
 struct trace_bprintk_fmt {
 	struct list_head list;
 	int count;
 	char fmt[0];
 };
 
-
-static inline void lock_btrace(void)
-{
-	mutex_lock(&btrace_mutex);
-}
-
-static inline void unlock_btrace(void)
-{
-	mutex_unlock(&btrace_mutex);
-}
-
 /*
  * If trace_bprintk() is not active, we release the trace_bprintk's
  * formats that no modules has reference to.
@@ -76,7 +65,8 @@ static
 void hold_module_trace_bprintk_format(const char **start, const char **end)
 {
 	const char **iter;
-	lock_btrace();
+
+	mutex_lock(&btrace_mutex);
 	for (iter = start; iter < end; iter++) {
 		struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
 		if (tb_fmt) {
@@ -95,14 +85,15 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
 		} else
 			*iter = NULL;
 	}
-	unlock_btrace();
+	mutex_unlock(&btrace_mutex);
 }
 
 static
 void release_module_trace_bprintk_format(const char **start, const char **end)
 {
 	const char **iter;
-	lock_btrace();
+
+	mutex_lock(&btrace_mutex);
 	for (iter = start; iter < end; iter++) {
 		struct trace_bprintk_fmt *tb_fmt;
 		if (!*iter)
@@ -110,12 +101,12 @@ void release_module_trace_bprintk_format(const char **start, const char **end)
 
 		tb_fmt = container_of(*iter, struct trace_bprintk_fmt, fmt[0]);
 		tb_fmt->count--;
-		if (!tb_fmt->count && !btrace_metadata_count) {
+		if (!tb_fmt->count) {
 			list_del(&tb_fmt->list);
 			kfree(tb_fmt);
 		}
 	}
-	unlock_btrace();
+	mutex_unlock(&btrace_mutex);
 }
 
 static int module_trace_bprintk_format_notify(struct notifier_block *self,
@@ -134,24 +125,6 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self,
 	return 0;
 }
 
-static void get_btrace_metadata(void)
-{
-	lock_btrace();
-	btrace_metadata_count++;
-	unlock_btrace();
-}
-
-static void put_btrace_metadata(void)
-{
-	lock_btrace();
-	btrace_metadata_count--;
-
-	if (!btrace_metadata_count) {
-		shrink_trace_bprintk_fmt();
-	}
-	unlock_btrace();
-}
-
 #else /* !CONFIG_MODULES */
 __init static int
 module_trace_bprintk_format_notify(struct notifier_block *self,
@@ -159,9 +132,6 @@ module_trace_bprintk_format_notify(struct notifier_block *self,
 {
 	return 0;
 }
-
-static inline void get_btrace_metadata(void) { }
-static inline void put_btrace_metadata(void) { }
 #endif /* CONFIG_MODULES */
 
 
@@ -170,48 +140,39 @@ struct notifier_block module_trace_bprintk_format_nb = {
 	.notifier_call = module_trace_bprintk_format_notify,
 };
 
-/* events tracer */
-int trace_bprintk_enable;
-
-static void start_bprintk_trace(struct trace_array *tr)
+int __ftrace_printk(unsigned long ip, const char *fmt, ...)
 {
-	get_btrace_metadata();
-	tracing_reset_online_cpus(tr);
-	trace_bprintk_enable = 1;
-}
+	int ret;
+	va_list ap;
 
-static void stop_bprintk_trace(struct trace_array *tr)
-{
-	trace_bprintk_enable = 0;
-	tracing_reset_online_cpus(tr);
-	put_btrace_metadata();
-}
+	if (unlikely(!fmt))
+		return 0;
 
-static int init_bprintk_trace(struct trace_array *tr)
-{
-	start_bprintk_trace(tr);
-	return 0;
+	if (!(trace_flags & TRACE_ITER_PRINTK))
+		return 0;
+
+	va_start(ap, fmt);
+	ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
+	va_end(ap);
+	return ret;
 }
+EXPORT_SYMBOL_GPL(__ftrace_printk);
 
-static struct tracer bprintk_trace __read_mostly =
+int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
 {
-	.name	     = "events",
-	.init	     = init_bprintk_trace,
-	.reset	     = stop_bprintk_trace,
-	.start	     = start_bprintk_trace,
-	.stop	     = stop_bprintk_trace,
-};
+	if (unlikely(!fmt))
+		return 0;
 
-static __init int init_bprintk(void)
-{
-	int ret = register_module_notifier(&module_trace_bprintk_format_nb);
-	if (ret)
-		return ret;
+	if (!(trace_flags & TRACE_ITER_PRINTK))
+		return 0;
 
-	ret = register_tracer(&bprintk_trace);
-	if (ret)
-		unregister_module_notifier(&module_trace_bprintk_format_nb);
-	return ret;
+	return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
 }
+EXPORT_SYMBOL_GPL(__ftrace_vprintk);
+
 
-device_initcall(init_bprintk);
+static __init int init_trace_printk(void)
+{
+	return register_module_notifier(&module_trace_bprintk_format_nb);
+}
+early_initcall(init_trace_printk);
-- 
1.6.1



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users
  2009-02-28  9:13                       ` Ingo Molnar
                                           ` (3 preceding siblings ...)
  2009-02-28 22:16                         ` [PATCH 4/5] tracing/core: drop the old ftrace_printk implementation in favour of ftrace_bprintk Frederic Weisbecker
@ 2009-02-28 23:11                         ` Frederic Weisbecker
  2009-03-01  2:34                         ` [PATCH 5/5 v3] " Frederic Weisbecker
  2009-03-01  3:31                         ` [PATCH 0/5] Binary ftrace_printk Frederic Weisbecker
  6 siblings, 0 replies; 44+ messages in thread
From: Frederic Weisbecker @ 2009-02-28 23:11 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Andrew Morton, linux-kernel, Steven Rostedt,
	Lai Jiangshan, Peter Zijlstra

On Sat, Feb 28, 2009 at 10:13:05AM +0100, Ingo Molnar wrote:
> 
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > > instead? Wouldn't that be nicer? I suspect it would make the 
> > > code look nicer too (instead of doing "*base = x", you'd see 
> > > "spec->base = x" and it would look less like line noise in 
> > > the callee, an the caller could just do a single "struct 
> > > format_spec spec = { 0, }" to initialize that thing).
> > > 
> > > 		Linus
> > 
> > You're right, that's much proper.
> > See the V2 below:
> 
> Just a few (very) small code style pet peeves:
> 
> > +struct printf_spec {
> > +	enum format_type type;
> > +	int flags;			/* flags to number() */
> > +	int field_width;		/* width of output field */
> > +	int base;
> > +	/* min. # of digits for integers; max number of chars for from string */
> > +	int precision;
> > +	int qualifier;
> > +};
> 
> doesnt it look a bit tidier this way:
> 
>  struct printf_spec {
> 	enum format_type	type;
> 	int			flags;		/* flags to number()     */
> 	int			field_width;	/* width of output field */
> 	int			base;
> 	int			precision;	/* # of digits/chars     */
> 	int			qualifier;
>  };
> 
> ?
> 
> > +		case '+':
> > +			spec->flags |= PLUS;
> > +			break;
> > +		case ' ':
> > +			spec->flags |= SPACE;
> > +			break;
> > +		case '#':
> > +			spec->flags |= SPECIAL;
> > +			break;
> > +		case '0':
> > +			spec->flags |= ZEROPAD;
> > +			break;
> > +		default:
> > +			found = false;
> 
> btw., this is one of the cases where i think the original style 
> was more useful:
> 
> > +		case '+':	spec->flags |= PLUS;  break;
> > +		case ' ':	spec->flags |= SPACE; break;
> [etc.]
> 
> as it's always good to compress repetitive patterns of code.
> 
> (If checkpatch complains about this then ignore checkpatch.)
> 
> > +	case 'n':
> > +		/* FIXME:
> > +		* What does C99 say about the overflow case here? */
> 
> (this comment looks a bit funny.)
> 
> > +		default: {
> > +			enum format_type type = spec.type;
> > +
> > +			if (type == FORMAT_TYPE_LONG_LONG)
> > +				num = get_arg(long long);
> > +			else if (type == FORMAT_TYPE_ULONG)
> > +				num = get_arg(unsigned long);
> > +			else if (type == FORMAT_TYPE_LONG)
> > +				num = get_arg(unsigned long);
> > +			else if (type == FORMAT_TYPE_SIZE_T)
> > +				num = get_arg(size_t);
> > +			else if (type == FORMAT_TYPE_PTRDIFF)
> > +				num = get_arg(ptrdiff_t);
> > +			else if (type == FORMAT_TYPE_USHORT)
> > +				num = get_arg(unsigned short);
> > +			else if (type == FORMAT_TYPE_SHORT)
> > +				num = get_arg(short);
> > +			else if (type == FORMAT_TYPE_UINT)
> > +				num = get_arg(unsigned int);
> > +			else
> > +				num = get_arg(int);
> 
> Wouldnt it be cleaner as a switch() statement and to put into a 
> helper function?
> 
> Also, could you please resend the current stuff with a 0/ 
> description and a diffstat in the 0 mail so that we can all see 
> all the patches again and the total impact?
> 
> 	Ingo

Ok, for all these comments. Except I'm not sure that it is needed to export this part
in a helper, it means 3 new different helpers with two of them having a pointer to a va_list
in their parameters.
A pointer to va_list is legal but doesn't seem to me much proper.

Unless you have some objections, I will repost the new addressed version and if you still
think these parts should be exported to helpers, then I will do it.


^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH 5/5 v3] vsprintf: unify the format decoding layer for its 3 users
  2009-02-28  9:13                       ` Ingo Molnar
                                           ` (4 preceding siblings ...)
  2009-02-28 23:11                         ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Frederic Weisbecker
@ 2009-03-01  2:34                         ` Frederic Weisbecker
  2009-03-01  3:31                         ` [PATCH 0/5] Binary ftrace_printk Frederic Weisbecker
  6 siblings, 0 replies; 44+ messages in thread
From: Frederic Weisbecker @ 2009-03-01  2:34 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Lai Jiangshan, Linus Torvalds, Steven Rostedt,
	Peter Zijlstra, linux-kernel

An new optimization is making its way to ftrace. Its purpose is to
make ftrace_printk() consuming less memory and be faster.

Written by Lai Jiangshan, the approach is to delay the formatting
job from tracing time to output time.
Currently, a call to ftrace_printk will format the whole string and
insert it into the ring buffer. Then you can read it on /debug/tracing/trace
file.

The new implementation stores the address of the format string and
the binary parameters into the ring buffer, making the packet more compact
and faster to insert.
Later, when the user exports the traces, the format string is retrieved
with the binary parameters and the formatting job is eventually done.

The new implementation rewrites a lot of format decoding bits from
vsnprintf() function, making now 3 differents functions to maintain
in their duplicated parts of printf format decoding bits.

Suggested by Ingo Molnar, this patch tries to factorize the most
possible common bits from these functions.
The real common part between them is the format decoding. Although
they do somewhat similar jobs, their way to export or import the parameters
is very different. Thus, only the decoding layer is extracted, unless you see
other parts that could be worth factorized.

Changes in V2:

- Address a suggestion from Linus to group the format_decode() parameters inside
  a structure.

Changes in v3:

- Address other cleanups suggested by Ingo and Linus such as passing the
  printf_spec struct to the format helpers: pointer()/number()/string()
  Note that this struct is passed by copy and not by address. This is to
  avoid side effects because these functions often change these values and the
  changes shoudn't be persistant when a callee helper returns.
  It would be too risky.

- Various cleanups (code alignement, switch/case instead of if/else fountains).

- Fix a bug that printed the first format specifier following a %p


Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 lib/vsprintf.c |  976 +++++++++++++++++++++++++++++++-------------------------
 1 files changed, 543 insertions(+), 433 deletions(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3543bbe..af7a055 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -396,7 +396,38 @@ static noinline char* put_dec(char *buf, unsigned long long num)
 #define SMALL	32		/* Must be 32 == 0x20 */
 #define SPECIAL	64		/* 0x */
 
-static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type)
+enum format_type {
+	FORMAT_TYPE_NONE, /* Just a string part */
+	FORMAT_TYPE_WITDH,
+	FORMAT_TYPE_PRECISION,
+	FORMAT_TYPE_CHAR,
+	FORMAT_TYPE_STR,
+	FORMAT_TYPE_PTR,
+	FORMAT_TYPE_PERCENT_CHAR,
+	FORMAT_TYPE_INVALID,
+	FORMAT_TYPE_LONG_LONG,
+	FORMAT_TYPE_ULONG,
+	FORMAT_TYPE_LONG,
+	FORMAT_TYPE_USHORT,
+	FORMAT_TYPE_SHORT,
+	FORMAT_TYPE_UINT,
+	FORMAT_TYPE_INT,
+	FORMAT_TYPE_NRCHARS,
+	FORMAT_TYPE_SIZE_T,
+	FORMAT_TYPE_PTRDIFF
+};
+
+struct printf_spec {
+	enum format_type	type;
+	int			flags;		/* flags to number() */
+	int			field_width;	/* width of output field */
+	int			base;
+	int			precision;	/* # of digits/chars */
+	int			qualifier;
+};
+
+static char *number(char *buf, char *end, unsigned long long num,
+			struct printf_spec spec)
 {
 	/* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
 	static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
@@ -404,32 +435,32 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 	char tmp[66];
 	char sign;
 	char locase;
-	int need_pfx = ((type & SPECIAL) && base != 10);
+	int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
 	int i;
 
 	/* locase = 0 or 0x20. ORing digits or letters with 'locase'
 	 * produces same digits or (maybe lowercased) letters */
-	locase = (type & SMALL);
-	if (type & LEFT)
-		type &= ~ZEROPAD;
+	locase = (spec.flags & SMALL);
+	if (spec.flags & LEFT)
+		spec.flags &= ~ZEROPAD;
 	sign = 0;
-	if (type & SIGN) {
+	if (spec.flags & SIGN) {
 		if ((signed long long) num < 0) {
 			sign = '-';
 			num = - (signed long long) num;
-			size--;
-		} else if (type & PLUS) {
+			spec.field_width--;
+		} else if (spec.flags & PLUS) {
 			sign = '+';
-			size--;
-		} else if (type & SPACE) {
+			spec.field_width--;
+		} else if (spec.flags & SPACE) {
 			sign = ' ';
-			size--;
+			spec.field_width--;
 		}
 	}
 	if (need_pfx) {
-		size--;
-		if (base == 16)
-			size--;
+		spec.field_width--;
+		if (spec.base == 16)
+			spec.field_width--;
 	}
 
 	/* generate full string in tmp[], in reverse order */
@@ -441,10 +472,10 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 		tmp[i++] = (digits[do_div(num,base)] | locase);
 	} while (num != 0);
 	*/
-	else if (base != 10) { /* 8 or 16 */
-		int mask = base - 1;
+	else if (spec.base != 10) { /* 8 or 16 */
+		int mask = spec.base - 1;
 		int shift = 3;
-		if (base == 16) shift = 4;
+		if (spec.base == 16) shift = 4;
 		do {
 			tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
 			num >>= shift;
@@ -454,12 +485,12 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 	}
 
 	/* printing 100 using %2d gives "100", not "00" */
-	if (i > precision)
-		precision = i;
+	if (i > spec.precision)
+		spec.precision = i;
 	/* leading space padding */
-	size -= precision;
-	if (!(type & (ZEROPAD+LEFT))) {
-		while(--size >= 0) {
+	spec.field_width -= spec.precision;
+	if (!(spec.flags & (ZEROPAD+LEFT))) {
+		while(--spec.field_width >= 0) {
 			if (buf < end)
 				*buf = ' ';
 			++buf;
@@ -476,23 +507,23 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 		if (buf < end)
 			*buf = '0';
 		++buf;
-		if (base == 16) {
+		if (spec.base == 16) {
 			if (buf < end)
 				*buf = ('X' | locase);
 			++buf;
 		}
 	}
 	/* zero or space padding */
-	if (!(type & LEFT)) {
-		char c = (type & ZEROPAD) ? '0' : ' ';
-		while (--size >= 0) {
+	if (!(spec.flags & LEFT)) {
+		char c = (spec.flags & ZEROPAD) ? '0' : ' ';
+		while (--spec.field_width >= 0) {
 			if (buf < end)
 				*buf = c;
 			++buf;
 		}
 	}
 	/* hmm even more zero padding? */
-	while (i <= --precision) {
+	while (i <= --spec.precision) {
 		if (buf < end)
 			*buf = '0';
 		++buf;
@@ -504,7 +535,7 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 		++buf;
 	}
 	/* trailing space padding */
-	while (--size >= 0) {
+	while (--spec.field_width >= 0) {
 		if (buf < end)
 			*buf = ' ';
 		++buf;
@@ -512,17 +543,17 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
 	return buf;
 }
 
-static char *string(char *buf, char *end, char *s, int field_width, int precision, int flags)
+static char *string(char *buf, char *end, char *s, struct printf_spec spec)
 {
 	int len, i;
 
 	if ((unsigned long)s < PAGE_SIZE)
 		s = "<NULL>";
 
-	len = strnlen(s, precision);
+	len = strnlen(s, spec.precision);
 
-	if (!(flags & LEFT)) {
-		while (len < field_width--) {
+	if (!(spec.flags & LEFT)) {
+		while (len < spec.field_width--) {
 			if (buf < end)
 				*buf = ' ';
 			++buf;
@@ -533,7 +564,7 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio
 			*buf = *s;
 		++buf; ++s;
 	}
-	while (len < field_width--) {
+	while (len < spec.field_width--) {
 		if (buf < end)
 			*buf = ' ';
 		++buf;
@@ -541,21 +572,24 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio
 	return buf;
 }
 
-static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags)
+static char *symbol_string(char *buf, char *end, void *ptr,
+				struct printf_spec spec)
 {
 	unsigned long value = (unsigned long) ptr;
 #ifdef CONFIG_KALLSYMS
 	char sym[KSYM_SYMBOL_LEN];
 	sprint_symbol(sym, value);
-	return string(buf, end, sym, field_width, precision, flags);
+	return string(buf, end, sym, spec);
 #else
-	field_width = 2*sizeof(void *);
-	flags |= SPECIAL | SMALL | ZEROPAD;
-	return number(buf, end, value, 16, field_width, precision, flags);
+	spec.field_width = 2*sizeof(void *);
+	spec.flags |= SPECIAL | SMALL | ZEROPAD;
+	spec.base = 16;
+	return number(buf, end, value, spec);
 #endif
 }
 
-static char *resource_string(char *buf, char *end, struct resource *res, int field_width, int precision, int flags)
+static char *resource_string(char *buf, char *end, struct resource *res,
+				struct printf_spec spec)
 {
 #ifndef IO_RSRC_PRINTK_SIZE
 #define IO_RSRC_PRINTK_SIZE	4
@@ -564,7 +598,11 @@ static char *resource_string(char *buf, char *end, struct resource *res, int fie
 #ifndef MEM_RSRC_PRINTK_SIZE
 #define MEM_RSRC_PRINTK_SIZE	8
 #endif
-
+	struct printf_spec num_spec = {
+		.base = 16,
+		.precision = -1,
+		.flags = SPECIAL | SMALL | ZEROPAD,
+	};
 	/* room for the actual numbers, the two "0x", -, [, ] and the final zero */
 	char sym[4*sizeof(resource_size_t) + 8];
 	char *p = sym, *pend = sym + sizeof(sym);
@@ -576,17 +614,18 @@ static char *resource_string(char *buf, char *end, struct resource *res, int fie
 		size = MEM_RSRC_PRINTK_SIZE;
 
 	*p++ = '[';
-	p = number(p, pend, res->start, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+	num_spec.field_width = size;
+	p = number(p, pend, res->start, num_spec);
 	*p++ = '-';
-	p = number(p, pend, res->end, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
+	p = number(p, pend, res->end, num_spec);
 	*p++ = ']';
 	*p = 0;
 
-	return string(buf, end, sym, field_width, precision, flags);
+	return string(buf, end, sym, spec);
 }
 
-static char *mac_address_string(char *buf, char *end, u8 *addr, int field_width,
-				int precision, int flags)
+static char *mac_address_string(char *buf, char *end, u8 *addr,
+				struct printf_spec spec)
 {
 	char mac_addr[6 * 3]; /* (6 * 2 hex digits), 5 colons and trailing zero */
 	char *p = mac_addr;
@@ -594,16 +633,17 @@ static char *mac_address_string(char *buf, char *end, u8 *addr, int field_width,
 
 	for (i = 0; i < 6; i++) {
 		p = pack_hex_byte(p, addr[i]);
-		if (!(flags & SPECIAL) && i != 5)
+		if (!(spec.flags & SPECIAL) && i != 5)
 			*p++ = ':';
 	}
 	*p = '\0';
+	spec.flags &= ~SPECIAL;
 
-	return string(buf, end, mac_addr, field_width, precision, flags & ~SPECIAL);
+	return string(buf, end, mac_addr, spec);
 }
 
-static char *ip6_addr_string(char *buf, char *end, u8 *addr, int field_width,
-			 int precision, int flags)
+static char *ip6_addr_string(char *buf, char *end, u8 *addr,
+				struct printf_spec spec)
 {
 	char ip6_addr[8 * 5]; /* (8 * 4 hex digits), 7 colons and trailing zero */
 	char *p = ip6_addr;
@@ -612,16 +652,17 @@ static char *ip6_addr_string(char *buf, char *end, u8 *addr, int field_width,
 	for (i = 0; i < 8; i++) {
 		p = pack_hex_byte(p, addr[2 * i]);
 		p = pack_hex_byte(p, addr[2 * i + 1]);
-		if (!(flags & SPECIAL) && i != 7)
+		if (!(spec.flags & SPECIAL) && i != 7)
 			*p++ = ':';
 	}
 	*p = '\0';
+	spec.flags &= ~SPECIAL;
 
-	return string(buf, end, ip6_addr, field_width, precision, flags & ~SPECIAL);
+	return string(buf, end, ip6_addr, spec);
 }
 
-static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width,
-			 int precision, int flags)
+static char *ip4_addr_string(char *buf, char *end, u8 *addr,
+				struct printf_spec spec)
 {
 	char ip4_addr[4 * 4]; /* (4 * 3 decimal digits), 3 dots and trailing zero */
 	char temp[3];	/* hold each IP quad in reverse order */
@@ -637,8 +678,9 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width,
 			*p++ = '.';
 	}
 	*p = '\0';
+	spec.flags &= ~SPECIAL;
 
-	return string(buf, end, ip4_addr, field_width, precision, flags & ~SPECIAL);
+	return string(buf, end, ip4_addr, spec);
 }
 
 /*
@@ -663,41 +705,234 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width,
  * function pointers are really function descriptors, which contain a
  * pointer to the real address.
  */
-static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
+static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
+			struct printf_spec spec)
 {
 	if (!ptr)
-		return string(buf, end, "(null)", field_width, precision, flags);
+		return string(buf, end, "(null)", spec);
 
 	switch (*fmt) {
 	case 'F':
 		ptr = dereference_function_descriptor(ptr);
 		/* Fallthrough */
 	case 'S':
-		return symbol_string(buf, end, ptr, field_width, precision, flags);
+		return symbol_string(buf, end, ptr, spec);
 	case 'R':
-		return resource_string(buf, end, ptr, field_width, precision, flags);
+		return resource_string(buf, end, ptr, spec);
 	case 'm':
-		flags |= SPECIAL;
+		spec.flags |= SPECIAL;
 		/* Fallthrough */
 	case 'M':
-		return mac_address_string(buf, end, ptr, field_width, precision, flags);
+		return mac_address_string(buf, end, ptr, spec);
 	case 'i':
-		flags |= SPECIAL;
+		spec.flags |= SPECIAL;
 		/* Fallthrough */
 	case 'I':
 		if (fmt[1] == '6')
-			return ip6_addr_string(buf, end, ptr, field_width, precision, flags);
+			return ip6_addr_string(buf, end, ptr, spec);
 		if (fmt[1] == '4')
-			return ip4_addr_string(buf, end, ptr, field_width, precision, flags);
-		flags &= ~SPECIAL;
+			return ip4_addr_string(buf, end, ptr, spec);
+		spec.flags &= ~SPECIAL;
+		break;
+	}
+	spec.flags |= SMALL;
+	if (spec.field_width == -1) {
+		spec.field_width = 2*sizeof(void *);
+		spec.flags |= ZEROPAD;
+	}
+	spec.base = 16;
+
+	return number(buf, end, (unsigned long) ptr, spec);
+}
+
+/*
+ * Helper function to decode printf style format.
+ * Each call decode a token from the format and return the
+ * number of characters read (or likely the delta where it wants
+ * to go on the next call).
+ * The decoded token is returned through the parameters
+ *
+ * 'h', 'l', or 'L' for integer fields
+ * 'z' support added 23/7/1999 S.H.
+ * 'z' changed to 'Z' --davidm 1/25/99
+ * 't' added for ptrdiff_t
+ *
+ * @fmt: the format string
+ * @type of the token returned
+ * @flags: various flags such as +, -, # tokens..
+ * @field_width: overwritten width
+ * @base: base of the number (octal, hex, ...)
+ * @precision: precision of a number
+ * @qualifier: qualifier of a number (long, size_t, ...)
+ */
+static int format_decode(const char *fmt, struct printf_spec *spec)
+{
+	const char *start = fmt;
+	bool sign = false;
+
+	/* we finished early by reading the field width */
+	if (spec->type == FORMAT_TYPE_WITDH) {
+		if (spec->field_width < 0) {
+			spec->field_width = -spec->field_width;
+			spec->flags |= LEFT;
+		}
+		spec->type = FORMAT_TYPE_NONE;
+		goto precision;
+	}
+
+	/* we finished early by reading the precision */
+	if (spec->type == FORMAT_TYPE_PRECISION) {
+		if (spec->precision < 0)
+			spec->precision = 0;
+
+		spec->type = FORMAT_TYPE_NONE;
+		goto qualifier;
+	}
+
+	/* By default */
+	spec->type = FORMAT_TYPE_NONE;
+
+	for (; *fmt ; ++fmt) {
+		if (*fmt == '%')
+			break;
+	}
+
+	/* Return the current non-format string */
+	if (fmt != start || !*fmt)
+		return fmt - start;
+
+	/* Process flags */
+	spec->flags = 0;
+
+	while (1) { /* this also skips first '%' */
+		bool found = true;
+
+		++fmt;
+
+		switch (*fmt) {
+		case '-': spec->flags |= LEFT;    break;
+		case '+': spec->flags |= PLUS;    break;
+		case ' ': spec->flags |= SPACE;   break;
+		case '#': spec->flags |= SPECIAL; break;
+		case '0': spec->flags |= ZEROPAD; break;
+		default:  found = false;
+		}
+
+		if (!found)
+			break;
+	}
+
+	/* get field width */
+	spec->field_width = -1;
+
+	if (isdigit(*fmt))
+		spec->field_width = skip_atoi(&fmt);
+	else if (*fmt == '*') {
+		/* it's the next argument */
+		spec->type = FORMAT_TYPE_WITDH;
+		return ++fmt - start;
+	}
+
+precision:
+	/* get the precision */
+	spec->precision = -1;
+	if (*fmt == '.') {
+		++fmt;
+		if (isdigit(*fmt)) {
+			spec->precision = skip_atoi(&fmt);
+			if (spec->precision < 0)
+				spec->precision = 0;
+		} else if (*fmt == '*') {
+			/* it's the next argument */
+			spec->type = FORMAT_TYPE_WITDH;
+			return ++fmt - start;
+		}
+	}
+
+qualifier:
+	/* get the conversion qualifier */
+	spec->qualifier = -1;
+	if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+	    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
+		spec->qualifier = *fmt;
+		++fmt;
+		if (spec->qualifier == 'l' && *fmt == 'l') {
+			spec->qualifier = 'L';
+			++fmt;
+		}
+	}
+
+	/* default base */
+	spec->base = 10;
+	switch (*fmt) {
+	case 'c':
+		spec->type = FORMAT_TYPE_CHAR;
+		return ++fmt - start;
+
+	case 's':
+		spec->type = FORMAT_TYPE_STR;
+		return ++fmt - start;
+
+	case 'p':
+		spec->type = FORMAT_TYPE_PTR;
+		return fmt - start;
+		/* skip alnum */
+
+	case 'n':
+		spec->type = FORMAT_TYPE_NRCHARS;
+		return ++fmt - start;
+
+	case '%':
+		spec->type = FORMAT_TYPE_PERCENT_CHAR;
+		return ++fmt - start;
+
+	/* integer number formats - set up the flags and "break" */
+	case 'o':
+		spec->base = 8;
 		break;
+
+	case 'x':
+		spec->flags |= SMALL;
+
+	case 'X':
+		spec->base = 16;
+		break;
+
+	case 'd':
+	case 'i':
+		sign = true;
+	case 'u':
+		break;
+
+	default:
+		spec->type = FORMAT_TYPE_INVALID;
+		return fmt - start;
 	}
-	flags |= SMALL;
-	if (field_width == -1) {
-		field_width = 2*sizeof(void *);
-		flags |= ZEROPAD;
+
+	if (spec->qualifier == 'L')
+		spec->type = FORMAT_TYPE_LONG_LONG;
+	else if (spec->qualifier == 'l') {
+		if (sign)
+			spec->type = FORMAT_TYPE_LONG;
+		else
+			spec->type = FORMAT_TYPE_ULONG;
+	} else if (spec->qualifier == 'Z' || spec->qualifier == 'z') {
+		spec->type = FORMAT_TYPE_SIZE_T;
+	} else if (spec->qualifier == 't') {
+		spec->type = FORMAT_TYPE_PTRDIFF;
+	} else if (spec->qualifier == 'h') {
+		if (sign)
+			spec->type = FORMAT_TYPE_SHORT;
+		else
+			spec->type = FORMAT_TYPE_USHORT;
+	} else {
+		if (sign)
+			spec->type = FORMAT_TYPE_INT;
+		else
+			spec->type = FORMAT_TYPE_UINT;
 	}
-	return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags);
+
+	return ++fmt - start;
 }
 
 /**
@@ -726,18 +961,9 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field
 int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 {
 	unsigned long long num;
-	int base;
 	char *str, *end, c;
-
-	int flags;		/* flags to number() */
-
-	int field_width;	/* width of output field */
-	int precision;		/* min. # of digits for integers; max
-				   number of chars for from string */
-	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
-				/* 'z' support added 23/7/1999 S.H.    */
-				/* 'z' changed to 'Z' --davidm 1/25/99 */
-				/* 't' added for ptrdiff_t */
+	int read;
+	struct printf_spec spec = {0};
 
 	/* Reject out-of-range values early.  Large positive sizes are
 	   used for unknown buffer sizes. */
@@ -758,184 +984,144 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
 		size = end - buf;
 	}
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%') {
-			if (str < end)
-				*str = *fmt;
-			++str;
-			continue;
-		}
+	while (*fmt) {
+		char *old_fmt = (char *)fmt;
 
-		/* process flags */
-		flags = 0;
-		repeat:
-			++fmt;		/* this also skips first '%' */
-			switch (*fmt) {
-				case '-': flags |= LEFT; goto repeat;
-				case '+': flags |= PLUS; goto repeat;
-				case ' ': flags |= SPACE; goto repeat;
-				case '#': flags |= SPECIAL; goto repeat;
-				case '0': flags |= ZEROPAD; goto repeat;
-			}
+		read = format_decode(fmt, &spec);
 
-		/* get field width */
-		field_width = -1;
-		if (isdigit(*fmt))
-			field_width = skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			field_width = va_arg(args, int);
-			if (field_width < 0) {
-				field_width = -field_width;
-				flags |= LEFT;
-			}
-		}
+		fmt += read;
 
-		/* get the precision */
-		precision = -1;
-		if (*fmt == '.') {
-			++fmt;	
-			if (isdigit(*fmt))
-				precision = skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				precision = va_arg(args, int);
+		switch (spec.type) {
+		case FORMAT_TYPE_NONE: {
+			int copy = read;
+			if (str < end) {
+				if (copy > end - str)
+					copy = end - str;
+				memcpy(str, old_fmt, copy);
 			}
-			if (precision < 0)
-				precision = 0;
+			str += read;
+			break;
 		}
 
-		/* get the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt =='Z' || *fmt == 'z' || *fmt == 't') {
-			qualifier = *fmt;
-			++fmt;
-			if (qualifier == 'l' && *fmt == 'l') {
-				qualifier = 'L';
-				++fmt;
-			}
-		}
+		case FORMAT_TYPE_WITDH:
+			spec.field_width = va_arg(args, int);
+			break;
 
-		/* default base */
-		base = 10;
+		case FORMAT_TYPE_PRECISION:
+			spec.precision = va_arg(args, int);
+			break;
 
-		switch (*fmt) {
-			case 'c':
-				if (!(flags & LEFT)) {
-					while (--field_width > 0) {
-						if (str < end)
-							*str = ' ';
-						++str;
-					}
-				}
-				c = (unsigned char) va_arg(args, int);
-				if (str < end)
-					*str = c;
-				++str;
-				while (--field_width > 0) {
+		case FORMAT_TYPE_CHAR:
+			if (!(spec.flags & LEFT)) {
+				while (--spec.field_width > 0) {
 					if (str < end)
 						*str = ' ';
 					++str;
-				}
-				continue;
-
-			case 's':
-				str = string(str, end, va_arg(args, char *), field_width, precision, flags);
-				continue;
-
-			case 'p':
-				str = pointer(fmt+1, str, end,
-						va_arg(args, void *),
-						field_width, precision, flags);
-				/* Skip all alphanumeric pointer suffixes */
-				while (isalnum(fmt[1]))
-					fmt++;
-				continue;
-
-			case 'n':
-				/* FIXME:
-				* What does C99 say about the overflow case here? */
-				if (qualifier == 'l') {
-					long * ip = va_arg(args, long *);
-					*ip = (str - buf);
-				} else if (qualifier == 'Z' || qualifier == 'z') {
-					size_t * ip = va_arg(args, size_t *);
-					*ip = (str - buf);
-				} else {
-					int * ip = va_arg(args, int *);
-					*ip = (str - buf);
-				}
-				continue;
 
-			case '%':
+				}
+			}
+			c = (unsigned char) va_arg(args, int);
+			if (str < end)
+				*str = c;
+			++str;
+			while (--spec.field_width > 0) {
 				if (str < end)
-					*str = '%';
+					*str = ' ';
 				++str;
-				continue;
+			}
+			break;
 
-				/* integer number formats - set up the flags and "break" */
-			case 'o':
-				base = 8;
-				break;
+		case FORMAT_TYPE_STR:
+			str = string(str, end, va_arg(args, char *), spec);
+			break;
 
-			case 'x':
-				flags |= SMALL;
-			case 'X':
-				base = 16;
-				break;
+		case FORMAT_TYPE_PTR:
+			str = pointer(fmt+1, str, end, va_arg(args, void *),
+				      spec);
+			while (isalnum(*fmt))
+				fmt++;
+			break;
 
-			case 'd':
-			case 'i':
-				flags |= SIGN;
-			case 'u':
-				break;
+		case FORMAT_TYPE_PERCENT_CHAR:
+			if (str < end)
+				*str = '%';
+			++str;
+			break;
 
-			default:
+		case FORMAT_TYPE_INVALID:
+			if (str < end)
+				*str = '%';
+			++str;
+			if (*fmt) {
 				if (str < end)
-					*str = '%';
+					*str = *fmt;
 				++str;
-				if (*fmt) {
-					if (str < end)
-						*str = *fmt;
-					++str;
-				} else {
-					--fmt;
-				}
-				continue;
+			} else {
+				--fmt;
+			}
+			break;
+
+		case FORMAT_TYPE_NRCHARS: {
+			int qualifier = spec.qualifier;
+
+			if (qualifier == 'l') {
+				long *ip = va_arg(args, long *);
+				*ip = (str - buf);
+			} else if (qualifier == 'Z' ||
+					qualifier == 'z') {
+				size_t *ip = va_arg(args, size_t *);
+				*ip = (str - buf);
+			} else {
+				int *ip = va_arg(args, int *);
+				*ip = (str - buf);
+			}
+			break;
 		}
-		if (qualifier == 'L')
-			num = va_arg(args, long long);
-		else if (qualifier == 'l') {
-			num = va_arg(args, unsigned long);
-			if (flags & SIGN)
-				num = (signed long) num;
-		} else if (qualifier == 'Z' || qualifier == 'z') {
-			num = va_arg(args, size_t);
-		} else if (qualifier == 't') {
-			num = va_arg(args, ptrdiff_t);
-		} else if (qualifier == 'h') {
-			num = (unsigned short) va_arg(args, int);
-			if (flags & SIGN)
-				num = (signed short) num;
-		} else {
-			num = va_arg(args, unsigned int);
-			if (flags & SIGN)
-				num = (signed int) num;
+
+		default:
+			switch (spec.type) {
+			case FORMAT_TYPE_LONG_LONG:
+				num = va_arg(args, long long);
+				break;
+			case FORMAT_TYPE_ULONG:
+				num = va_arg(args, unsigned long);
+				break;
+			case FORMAT_TYPE_LONG:
+				num = va_arg(args, long);
+				break;
+			case FORMAT_TYPE_SIZE_T:
+				num = va_arg(args, size_t);
+				break;
+			case FORMAT_TYPE_PTRDIFF:
+				num = va_arg(args, ptrdiff_t);
+				break;
+			case FORMAT_TYPE_USHORT:
+				num = (unsigned short) va_arg(args, int);
+				break;
+			case FORMAT_TYPE_SHORT:
+				num = (short) va_arg(args, int);
+				break;
+			case FORMAT_TYPE_UINT:
+				num = va_arg(args, unsigned int);
+				break;
+			default:
+				num = va_arg(args, unsigned int);
+			}
+
+			str = number(str, end, num, spec);
 		}
-		str = number(str, end, num, base,
-				field_width, precision, flags);
 	}
+
 	if (size > 0) {
 		if (str < end)
 			*str = '\0';
 		else
 			end[-1] = '\0';
 	}
+
 	/* the trailing null byte doesn't count towards the total */
 	return str-buf;
+
 }
 EXPORT_SYMBOL(vsnprintf);
 
@@ -1084,8 +1270,9 @@ EXPORT_SYMBOL(sprintf);
  */
 int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 {
+	struct printf_spec spec = {0};
 	char *str, *end;
-	int qualifier;
+	int read;
 
 	str = (char *)bin_buf;
 	end = (char *)(bin_buf + size);
@@ -1110,57 +1297,26 @@ do {									\
 	str += sizeof(type);						\
 } while (0)
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%')
-			continue;
 
-repeat:
-		/* parse flags */
-		++fmt;		/* this also skips first '%' */
-		if (*fmt == '-' || *fmt == '+' || *fmt == ' '
-				|| *fmt == '#' || *fmt == '0')
-			goto repeat;
+	while (*fmt) {
+		read = format_decode(fmt, &spec);
 
-		/* parse field width */
-		if (isdigit(*fmt))
-			skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			save_arg(int);
-		}
+		fmt += read;
 
-		/* parse the precision */
-		if (*fmt == '.') {
-			++fmt;
-			if (isdigit(*fmt))
-				skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				save_arg(int);
-			}
-		}
+		switch (spec.type) {
+		case FORMAT_TYPE_NONE:
+			break;
 
-		/* parse the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
-			qualifier = *fmt;
-			++fmt;
-			if (qualifier == 'l' && *fmt == 'l') {
-				qualifier = 'L';
-				++fmt;
-			}
-		}
+		case FORMAT_TYPE_WITDH:
+		case FORMAT_TYPE_PRECISION:
+			save_arg(int);
+			break;
 
-		/* parse format type */
-		switch (*fmt) {
-		case 'c':
+		case FORMAT_TYPE_CHAR:
 			save_arg(char);
-			continue;
-		case 's': {
-			/* save the string argument */
+			break;
+
+		case FORMAT_TYPE_STR: {
 			const char *save_str = va_arg(args, char *);
 			size_t len;
 			if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
@@ -1170,16 +1326,27 @@ repeat:
 			if (str + len + 1 < end)
 				memcpy(str, save_str, len + 1);
 			str += len + 1;
-			continue;
+			break;
 		}
-		case 'p':
+
+		case FORMAT_TYPE_PTR:
 			save_arg(void *);
 			/* skip all alphanumeric pointer suffixes */
-			while (isalnum(fmt[1]))
+			while (isalnum(*fmt))
 				fmt++;
-			continue;
-		case 'n': {
+			break;
+
+		case FORMAT_TYPE_PERCENT_CHAR:
+			break;
+
+		case FORMAT_TYPE_INVALID:
+			if (!*fmt)
+				--fmt;
+			break;
+
+		case FORMAT_TYPE_NRCHARS: {
 			/* skip %n 's argument */
+			int qualifier = spec.qualifier;
 			void *skip_arg;
 			if (qualifier == 'l')
 				skip_arg = va_arg(args, long *);
@@ -1187,37 +1354,37 @@ repeat:
 				skip_arg = va_arg(args, size_t *);
 			else
 				skip_arg = va_arg(args, int *);
-			continue;
+			break;
 		}
-		case 'o':
-		case 'x':
-		case 'X':
-		case 'd':
-		case 'i':
-		case 'u':
-			/* save arg for case: 'o', 'x', 'X', 'd', 'i', 'u' */
-			if (qualifier == 'L')
+
+		default:
+			switch (spec.type) {
+
+			case FORMAT_TYPE_LONG_LONG:
 				save_arg(long long);
-			else if (qualifier == 'l')
+				break;
+			case FORMAT_TYPE_ULONG:
+			case FORMAT_TYPE_LONG:
 				save_arg(unsigned long);
-			else if (qualifier == 'Z' || qualifier == 'z')
+				break;
+			case FORMAT_TYPE_SIZE_T:
 				save_arg(size_t);
-			else if (qualifier == 't')
+				break;
+			case FORMAT_TYPE_PTRDIFF:
 				save_arg(ptrdiff_t);
-			else if (qualifier == 'h')
+				break;
+			case FORMAT_TYPE_USHORT:
+			case FORMAT_TYPE_SHORT:
 				save_arg(short);
-			else
+				break;
+			default:
 				save_arg(int);
-			continue;
-		default:
-			if (!*fmt)
-				--fmt;
-			continue;
+			}
 		}
 	}
-#undef save_arg
-
 	return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
+
+#undef save_arg
 }
 EXPORT_SYMBOL_GPL(vbin_printf);
 
@@ -1249,14 +1416,10 @@ EXPORT_SYMBOL_GPL(vbin_printf);
 int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 {
 	unsigned long long num;
-	int base;
 	char *str, *end, c;
 	const char *args = (const char *)bin_buf;
 
-	int flags;
-	int field_width;
-	int precision;
-	int qualifier;
+	struct printf_spec spec = {0};
 
 	if (unlikely((int) size < 0)) {
 		/* There can be only one.. */
@@ -1290,84 +1453,37 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 		size = end - buf;
 	}
 
-	for (; *fmt ; ++fmt) {
-		if (*fmt != '%') {
-			if (str < end)
-				*str = *fmt;
-			++str;
-			continue;
-		}
+	while (*fmt) {
+		int read;
+		char *old_fmt = (char *)fmt;
 
-		/* process flags */
-		flags = 0;
-repeat:
-		++fmt;		/* this also skips first '%' */
-		switch (*fmt) {
-		case '-':
-			flags |= LEFT;
-			goto repeat;
-		case '+':
-			flags |= PLUS;
-			goto repeat;
-		case ' ':
-			flags |= SPACE;
-			goto repeat;
-		case '#':
-			flags |= SPECIAL;
-			goto repeat;
-		case '0':
-			flags |= ZEROPAD;
-			goto repeat;
-		}
+		read = format_decode(fmt, &spec);
 
-		/* get field width */
-		field_width = -1;
-		if (isdigit(*fmt))
-			field_width = skip_atoi(&fmt);
-		else if (*fmt == '*') {
-			++fmt;
-			/* it's the next argument */
-			field_width = get_arg(int);
-			if (field_width < 0) {
-				field_width = -field_width;
-				flags |= LEFT;
-			}
-		}
+		fmt += read;
 
-		/* get the precision */
-		precision = -1;
-		if (*fmt == '.') {
-			++fmt;
-			if (isdigit(*fmt))
-				precision = skip_atoi(&fmt);
-			else if (*fmt == '*') {
-				++fmt;
-				/* it's the next argument */
-				precision = get_arg(int);
+		switch (spec.type) {
+		case FORMAT_TYPE_NONE: {
+			int copy = read;
+			if (str < end) {
+				if (copy > end - str)
+					copy = end - str;
+				memcpy(str, old_fmt, copy);
 			}
-			if (precision < 0)
-				precision = 0;
+			str += read;
+			break;
 		}
 
-		/* get the conversion qualifier */
-		qualifier = -1;
-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
-		    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
-			qualifier = *fmt;
-			++fmt;
-			if (qualifier == 'l' && *fmt == 'l') {
-				qualifier = 'L';
-				++fmt;
-			}
-		}
+		case FORMAT_TYPE_WITDH:
+			spec.field_width = get_arg(int);
+			break;
 
-		/* default base */
-		base = 10;
+		case FORMAT_TYPE_PRECISION:
+			spec.precision = get_arg(int);
+			break;
 
-		switch (*fmt) {
-		case 'c':
-			if (!(flags & LEFT)) {
-				while (--field_width > 0) {
+		case FORMAT_TYPE_CHAR:
+			if (!(spec.flags & LEFT)) {
+				while (--spec.field_width > 0) {
 					if (str < end)
 						*str = ' ';
 					++str;
@@ -1377,58 +1493,34 @@ repeat:
 			if (str < end)
 				*str = c;
 			++str;
-			while (--field_width > 0) {
+			while (--spec.field_width > 0) {
 				if (str < end)
 					*str = ' ';
 				++str;
 			}
-			continue;
+			break;
 
-		case 's':{
+		case FORMAT_TYPE_STR: {
 			const char *str_arg = args;
 			size_t len = strlen(str_arg);
 			args += len + 1;
-			str = string(str, end, (char *)str_arg, field_width,
-					precision, flags);
-			continue;
+			str = string(str, end, (char *)str_arg, spec);
+			break;
 		}
 
-		case 'p':
-			str = pointer(fmt+1, str, end, get_arg(void *),
-					field_width, precision, flags);
-			/* Skip all alphanumeric pointer suffixes */
-			while (isalnum(fmt[1]))
+		case FORMAT_TYPE_PTR:
+			str = pointer(fmt+1, str, end, get_arg(void *), spec);
+			while (isalnum(*fmt))
 				fmt++;
-			continue;
-
-		case 'n':
-			/* skip %n */
-			continue;
+			break;
 
-		case '%':
+		case FORMAT_TYPE_PERCENT_CHAR:
 			if (str < end)
 				*str = '%';
 			++str;
-			continue;
-
-		/* integer number formats - set up the flags and "break" */
-		case 'o':
-			base = 8;
-			break;
-
-		case 'x':
-			flags |= SMALL;
-		case 'X':
-			base = 16;
 			break;
 
-		case 'd':
-		case 'i':
-			flags |= SIGN;
-		case 'u':
-			break;
-
-		default:
+		case FORMAT_TYPE_INVALID:
 			if (str < end)
 				*str = '%';
 			++str;
@@ -1439,36 +1531,54 @@ repeat:
 			} else {
 				--fmt;
 			}
-			continue;
-		}
-		if (qualifier == 'L')
-			num = get_arg(long long);
-		else if (qualifier == 'l') {
-			num = get_arg(unsigned long);
-			if (flags & SIGN)
-				num = (signed long) num;
-		} else if (qualifier == 'Z' || qualifier == 'z') {
-			num = get_arg(size_t);
-		} else if (qualifier == 't') {
-			num = get_arg(ptrdiff_t);
-		} else if (qualifier == 'h') {
-			num = (unsigned short) get_arg(short);
-			if (flags & SIGN)
-				num = (signed short) num;
-		} else {
-			num = get_arg(unsigned int);
-			if (flags & SIGN)
-				num = (signed int) num;
+			break;
+
+		case FORMAT_TYPE_NRCHARS:
+			/* skip */
+			break;
+
+		default:
+			switch (spec.type) {
+
+			case FORMAT_TYPE_LONG_LONG:
+				num = get_arg(long long);
+				break;
+			case FORMAT_TYPE_ULONG:
+				num = get_arg(unsigned long);
+				break;
+			case FORMAT_TYPE_LONG:
+				num = get_arg(unsigned long);
+				break;
+			case FORMAT_TYPE_SIZE_T:
+				num = get_arg(size_t);
+				break;
+			case FORMAT_TYPE_PTRDIFF:
+				num = get_arg(ptrdiff_t);
+				break;
+			case FORMAT_TYPE_USHORT:
+				num = get_arg(unsigned short);
+				break;
+			case FORMAT_TYPE_SHORT:
+				num = get_arg(short);
+				break;
+			case FORMAT_TYPE_UINT:
+				num = get_arg(unsigned int);
+				break;
+			default:
+				num = get_arg(int);
+			}
+
+			str = number(str, end, num, spec);
 		}
-		str = number(str, end, num, base,
-				field_width, precision, flags);
 	}
+
 	if (size > 0) {
 		if (str < end)
 			*str = '\0';
 		else
 			end[-1] = '\0';
 	}
+
 #undef get_arg
 
 	/* the trailing null byte doesn't count towards the total */
-- 
1.6.1



^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH 0/5] Binary ftrace_printk
  2009-02-28  9:13                       ` Ingo Molnar
                                           ` (5 preceding siblings ...)
  2009-03-01  2:34                         ` [PATCH 5/5 v3] " Frederic Weisbecker
@ 2009-03-01  3:31                         ` Frederic Weisbecker
  6 siblings, 0 replies; 44+ messages in thread
From: Frederic Weisbecker @ 2009-03-01  3:31 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Andrew Morton, Lai Jiangshan, Linus Torvalds, Steven Rostedt,
	Peter Zijlstra, linux-kernel

(Sorry I had a problem with my first sending, hopefully I'm not spamming
each of you twice).

An new optimization is making its way to ftrace. Its purpose is to
make ftrace_printk() consuming less memory and become faster.

Written by Lai Jiangshan, the approach is to delay the formatting
job from tracing time to output time.
Currently, a call to ftrace_printk will format the whole string and
insert it into the ring buffer. Then you can read it on /debug/tracing/trace
file.

The new implementation stores the address of the format string and
the binary parameters into the ring buffer, making the packet more compact
and faster to insert.
Later, when the user exports the traces, the format string is retrieved
with the binary parameters and the formatting job is eventually done.

Here is the result of a small comparative benchmark while putting the following
ftrace_printk on the timer interrupt. ftrace_printk is the old implementation,
ftrace_bprintk is a the new one:

ftrace_printk("This is the timer interrupt: %llu", jiffies_64);

After some time running on low load (no X, no really active processes):

ftrace_printk:  duration average: 2044 ns, avg of bytes stored per entry: 39
ftrace_bprintk: duration average: 1426 ns, avg of bytes stored per entry: 16

Higher load (started X and launched a cat running on a X console looping on
traces printing):

ftrace_printk:  duration average: 8812 ns
ftrace_bprintk: duration average: 2611 ns

Which means the new implementation can be 70 % faster on higher load.
And it consumes lesser memory on the ring buffer.

The three first patches rebase against latest -tip the ftrace_bprintk work done
by Lai few monthes ago.

The two others integrate ftrace_bprintk as a replacement for the old
ftrace_printk implementation and factorize the printf style format decoding
which is now used by three functions.

---

Frederic Weisbecker (2):
  tracing/core: drop the old ftrace_printk implementation in favour of
    ftrace_bprintk
  vsprintf: unify the format decoding layer for its 3 users

Lai Jiangshan (3):
  add binary printf
  ftrace: infrastructure for supporting binary record
  ftrace: add ftrace_bprintk()

 include/asm-generic/vmlinux.lds.h    |    3 +
 include/linux/ftrace.h               |   44 ++-
 include/linux/module.h               |    5 +
 include/linux/string.h               |    7 +
 kernel/module.c                      |    6 +
 kernel/trace/Kconfig                 |    1 +
 kernel/trace/Makefile                |    1 +
 kernel/trace/trace.c                 |  139 +++---
 kernel/trace/trace.h                 |    8 +-
 kernel/trace/trace_functions_graph.c |    6 +-
 kernel/trace/trace_mmiotrace.c       |    9 +-
 kernel/trace/trace_output.c          |   31 +-
 kernel/trace/trace_output.h          |    2 +
 kernel/trace/trace_printk.c          |  178 ++++++
 lib/Kconfig                          |    3 +
 lib/vsprintf.c                       | 1006 ++++++++++++++++++++++++++--------
 16 files changed, 1127 insertions(+), 322 deletions(-)
 create mode 100644 kernel/trace/trace_printk.c



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record
  2009-02-28 20:16                         ` [PATCH 2/5] ftrace: infrastructure for supporting binary record Frederic Weisbecker
@ 2009-03-02 16:27                           ` Steven Rostedt
  2009-03-02 17:39                             ` Frédéric Weisbecker
  0 siblings, 1 reply; 44+ messages in thread
From: Steven Rostedt @ 2009-03-02 16:27 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Andrew Morton, Lai Jiangshan, Linus Torvalds,
	Peter Zijlstra, linux-kernel



On Sat, 28 Feb 2009, Frederic Weisbecker wrote:

> From: Lai Jiangshan <laijs@cn.fujitsu.com>
> 
> Impact: save on memory for tracing
> 
> Current tracers are typically using a struct(like struct ftrace_entry,
> struct ctx_switch_entry, struct special_entr etc...)to record a binary
> event. These structs can only record a their own kind of events.
> A new kind of tracer need a new struct and a lot of code too handle it.
> 
> So we need a generic binary record for events. This infrastructure
> is for this purpose.
> 
> [fweisbec@gmail.com: rebase against latest -tip]
> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> ---
>  include/linux/ftrace.h       |    3 +
>  kernel/trace/Kconfig         |    6 +++
>  kernel/trace/Makefile        |    1 +
>  kernel/trace/trace.c         |   55 ++++++++++++++++++++++++++
>  kernel/trace/trace.h         |   12 ++++++
>  kernel/trace/trace_bprintk.c |   87 ++++++++++++++++++++++++++++++++++++++++++
>  kernel/trace/trace_output.c  |   76 ++++++++++++++++++++++++++++++++++++
>  7 files changed, 240 insertions(+), 0 deletions(-)
> 
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 847bb3c..8ac2fac 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -222,6 +222,9 @@ extern int ftrace_make_nop(struct module *mod,
>   */
>  extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>  
> +#ifdef CONFIG_TRACE_BPRINTK
> +extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
> +#endif
>  
>  /* May be defined in arch */
>  extern int ftrace_arch_read_dyn_info(char *buf, int size);
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 02bc5d6..2e84fc7 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -86,6 +86,12 @@ config FUNCTION_GRAPH_TRACER
>  	  This is done by setting the current return address on the current
>  	  task structure into a stack of calls.
>  
> +config TRACE_BPRINTK
> +	bool "Binary printk for tracing"
> +	default y
> +	depends on TRACING
> +	select BINARY_PRINTF
> +
>  config IRQSOFF_TRACER
>  	bool "Interrupts-off Latency Tracer"
>  	default n
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index c931fe0..dba6157 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -22,6 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o
>  obj-$(CONFIG_TRACING) += trace_clock.o
>  obj-$(CONFIG_TRACING) += trace_output.o
>  obj-$(CONFIG_TRACING) += trace_stat.o
> +obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
>  obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
>  obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
>  obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index c5e39cd..3a3ae19 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -3572,6 +3572,61 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
>  }
>  EXPORT_SYMBOL_GPL(__ftrace_vprintk);
>  
> +/**
> + * trace_vbprintk - write binary msg to tracing buffer
> + *
> + * Caller must insure @fmt are valid when msg is in tracing buffer.
> + */
> +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
> +{
> +	static DEFINE_SPINLOCK(trace_buf_lock);
> +	static u32 trace_buf[TRACE_BUF_SIZE];
> +
> +	struct ring_buffer_event *event;
> +	struct trace_array *tr = &global_trace;
> +	struct trace_array_cpu *data;
> +	struct bprintk_entry *entry;
> +	unsigned long flags;
> +	int cpu, len = 0, size, pc;
> +
> +	if (tracing_disabled || !trace_bprintk_enable)
> +		return 0;
> +

If this is called in the scheduler (we might want a ftrace_printk there),
then the preempt_enable might cause a recursive crash.

If a ftrace_printk is in schedule() but before we clear NEED_RESCHED, the 
preempt_enable below will cause schedule to be called again. Then we would 
again execute this trace and call schedule again, over and over till we 
corrupt the stack and crash the system.

That is what ftrace_preempt_disable() is for.

It's defined in kernel/trace/trace.h:

The way you use it is:

	int resched;
	[...]

	resched = ftrace_preempt_disable();
	[...]

	ftrace_preempt_enable(resched);

This will safely disable preemption and enable it correctly.


> +	pc = preempt_count();
> +	preempt_disable_notrace();
> +	cpu = raw_smp_processor_id();
> +	data = tr->data[cpu];
> +
> +	if (unlikely(atomic_read(&data->disabled)))
> +		goto out;
> +
> +	spin_lock_irqsave(&trace_buf_lock, flags);
> +	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
> +
> +	if (len > TRACE_BUF_SIZE || len < 0)
> +		goto out_unlock;
> +
> +	size = sizeof(*entry) + sizeof(u32) * len;
> +	event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc);
> +	if (!event)
> +		goto out_unlock;
> +	entry = ring_buffer_event_data(event);
> +	entry->ip			= ip;
> +	entry->fmt			= fmt;
> +
> +	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
> +	ring_buffer_unlock_commit(tr->buffer, event);
> +
> +out_unlock:
> +	spin_unlock_irqrestore(&trace_buf_lock, flags);
> +
> +out:
> +	preempt_enable_notrace();
> +
> +	return len;
> +}
> +EXPORT_SYMBOL_GPL(trace_vbprintk);
> +
>  static int trace_panic_handler(struct notifier_block *this,
>  			       unsigned long event, void *unused)
>  {
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index f6fa0b9..92703b1 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -20,6 +20,7 @@ enum trace_type {
>  	TRACE_WAKE,
>  	TRACE_STACK,
>  	TRACE_PRINT,
> +	TRACE_BPRINTK,
>  	TRACE_SPECIAL,
>  	TRACE_MMIO_RW,
>  	TRACE_MMIO_MAP,
> @@ -124,6 +125,16 @@ struct print_entry {
>  	char			buf[];
>  };
>  
> +struct bprintk_entry {
> +	struct trace_entry ent;
> +	unsigned long ip;
> +	const char *fmt;
> +	u32 buf[];
> +};
> +#ifdef CONFIG_TRACE_BPRINTK
> +extern int trace_bprintk_enable;
> +#endif
> +
>  #define TRACE_OLD_SIZE		88
>  
>  struct trace_field_cont {
> @@ -284,6 +295,7 @@ extern void __ftrace_bad_type(void);
>  		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
>  		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
>  		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
> +		IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
>  		IF_ASSIGN(var, ent, struct special_entry, 0);		\
>  		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
>  			  TRACE_MMIO_RW);				\
> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
> new file mode 100644
> index 0000000..1f8e532
> --- /dev/null
> +++ b/kernel/trace/trace_bprintk.c
> @@ -0,0 +1,87 @@
> +/*
> + * trace binary printk
> + *
> + * Copyright (C) 2008 Lai Jiangshan <laijs@cn.fujitsu.com>
> + *
> + */
> +#include <linux/kernel.h>
> +#include <linux/ftrace.h>
> +#include <linux/string.h>
> +#include <linux/ctype.h>
> +#include <linux/list.h>
> +#include <linux/mutex.h>
> +#include <linux/slab.h>
> +#include <linux/module.h>
> +#include <linux/seq_file.h>
> +#include <linux/fs.h>
> +#include <linux/marker.h>
> +#include <linux/uaccess.h>
> +
> +#include "trace.h"
> +
> +/* binary printk basic */
> +static DEFINE_MUTEX(btrace_mutex);
> +static int btrace_metadata_count;
> +
> +static inline void lock_btrace(void)
> +{
> +	mutex_lock(&btrace_mutex);
> +}
> +
> +static inline void unlock_btrace(void)
> +{
> +	mutex_unlock(&btrace_mutex);
> +}

Is there a reason to add these wrappers around the mutex?

-- Steve

> +
> +static void get_btrace_metadata(void)
> +{
> +	lock_btrace();
> +	btrace_metadata_count++;
> +	unlock_btrace();
> +}
> +
> +static void put_btrace_metadata(void)
> +{
> +	lock_btrace();
> +	btrace_metadata_count--;
> +	unlock_btrace();
> +}
> +
> +/* events tracer */
> +int trace_bprintk_enable;
> +
> +static void start_bprintk_trace(struct trace_array *tr)
> +{
> +	get_btrace_metadata();
> +	tracing_reset_online_cpus(tr);
> +	trace_bprintk_enable = 1;
> +}
> +
> +static void stop_bprintk_trace(struct trace_array *tr)
> +{
> +	trace_bprintk_enable = 0;
> +	tracing_reset_online_cpus(tr);
> +	put_btrace_metadata();
> +}
> +
> +static int init_bprintk_trace(struct trace_array *tr)
> +{
> +	start_bprintk_trace(tr);
> +	return 0;
> +}
> +
> +static struct tracer bprintk_trace __read_mostly =
> +{
> +	.name	     = "events",
> +	.init	     = init_bprintk_trace,
> +	.reset	     = stop_bprintk_trace,
> +	.start	     = start_bprintk_trace,
> +	.stop	     = stop_bprintk_trace,
> +};
> +
> +static __init int init_bprintk(void)
> +{
> +	return register_tracer(&bprintk_trace);
> +}
> +
> +device_initcall(init_bprintk);
> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> index 9fc8150..7897b5d 100644
> --- a/kernel/trace/trace_output.c
> +++ b/kernel/trace/trace_output.c
> @@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
>  	return len;
>  }
>  
> +static int
> +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
> +{
> +	int len = (PAGE_SIZE - 1) - s->len;
> +	int ret;
> +
> +	if (!len)
> +		return 0;
> +
> +	ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
> +
> +	/* If we can't write it all, don't bother writing anything */
> +	if (ret >= len)
> +		return 0;
> +
> +	s->len += ret;
> +
> +	return len;
> +}
> +
>  /**
>   * trace_seq_puts - trace sequence printing of simple string
>   * @s: trace sequence descriptor
> @@ -887,6 +907,61 @@ static struct trace_event trace_print_event = {
>  	.raw		= trace_print_raw,
>  };
>  
> +/* TRACE_BPRINTK */
> +static enum print_line_t
> +trace_bprintk_print(struct trace_iterator *iter, int flags)
> +{
> +	struct trace_entry *entry = iter->ent;
> +	struct trace_seq *s = &iter->seq;
> +	struct bprintk_entry *field;
> +
> +	trace_assign_type(field, entry);
> +
> +	if (!seq_print_ip_sym(s, field->ip, flags))
> +		goto partial;
> +
> +	if (!trace_seq_puts(s, ": "))
> +		goto partial;
> +
> +	if (!trace_seq_bprintf(s, field->fmt, field->buf))
> +		goto partial;
> +
> +	return TRACE_TYPE_HANDLED;
> +
> + partial:
> +	return TRACE_TYPE_PARTIAL_LINE;
> +}
> +
> +static enum print_line_t
> +trace_bprintk_raw(struct trace_iterator *iter, int flags)
> +{
> +	struct trace_entry *entry = iter->ent;
> +	struct trace_seq *s = &iter->seq;
> +	struct bprintk_entry *field;
> +
> +	trace_assign_type(field, entry);
> +
> +	if (!trace_seq_printf(s, ": %lx : ", field->ip))
> +		goto partial;
> +
> +	if (!trace_seq_bprintf(s, field->fmt, field->buf))
> +		goto partial;
> +
> +	return TRACE_TYPE_HANDLED;
> +
> + partial:
> +	return TRACE_TYPE_PARTIAL_LINE;
> +}
> +
> +static struct trace_event trace_bprintk_event = {
> +	.type	 	= TRACE_BPRINTK,
> +	.trace		= trace_bprintk_print,
> +	.latency_trace	= trace_bprintk_print,
> +	.raw		= trace_bprintk_raw,
> +	.hex		= trace_nop_print,
> +	.binary		= trace_nop_print,
> +};
> +
>  static struct trace_event *events[] __initdata = {
>  	&trace_fn_event,
>  	&trace_ctx_event,
> @@ -895,6 +970,7 @@ static struct trace_event *events[] __initdata = {
>  	&trace_stack_event,
>  	&trace_user_stack_event,
>  	&trace_print_event,
> +	&trace_bprintk_event,
>  	NULL
>  };
>  
> -- 
> 1.6.1
> 
> 
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 3/5] ftrace: add ftrace_bprintk()
  2009-02-28 21:30                         ` [PATCH 3/5] ftrace: add ftrace_bprintk() Frederic Weisbecker
@ 2009-03-02 16:34                           ` Steven Rostedt
  2009-03-02 17:45                             ` Frédéric Weisbecker
  0 siblings, 1 reply; 44+ messages in thread
From: Steven Rostedt @ 2009-03-02 16:34 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Andrew Morton, Lai Jiangshan, Linus Torvalds,
	Peter Zijlstra, linux-kernel


On Sat, 28 Feb 2009, Frederic Weisbecker wrote:

> From: Lai Jiangshan <laijs@cn.fujitsu.com>
> 
> Impact: Add a generic printk() for ftrace, like ftrace_printk()
> 
> ftrace_bprintk() use the infrastructure to record events on ring_buffer.
> 
> [fweisbec@gmail.com: ported to latest -tip and made it work if !CONFIG_MODULES]
> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> ---
>  include/asm-generic/vmlinux.lds.h |    3 +
>  include/linux/ftrace.h            |   21 ++++++
>  include/linux/module.h            |    5 ++
>  kernel/module.c                   |    6 ++
>  kernel/trace/trace.c              |   15 ++++
>  kernel/trace/trace_bprintk.c      |  132 ++++++++++++++++++++++++++++++++++++-
>  6 files changed, 181 insertions(+), 1 deletions(-)
> 
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index 9d97491..13f20f4 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -100,6 +100,9 @@
>  		*(__vermagic)		/* Kernel version magic */	\
>  		*(__markers_strings)	/* Markers: strings */		\
>  		*(__tracepoints_strings)/* Tracepoints: strings */	\
> +		VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .;	\
> +		*(__trace_printk_fmt)	/* Trace_printk fmt' pointer */	\
> +		VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .;		\

If this can only be done when tracing is enabled, I would wrap that up
in a macro that is a nop when tracing is disabled.

>  	}								\
>  									\
>  	.rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {		\
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 8ac2fac..f82b986 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -224,6 +224,27 @@ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>  
>  #ifdef CONFIG_TRACE_BPRINTK
>  extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
> +extern int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
> +		__attribute__ ((format (printf, 2, 3)));
> +
> +static inline void  ____ftrace_bprintk_check_format(const char *fmt, ...)
> +		__attribute__ ((format (printf, 1, 2)));
> +static inline void ____ftrace_bprintk_check_format(const char *fmt, ...) {}
> +#define __ftrace_bprintk_check_format(fmt, args...)			\
> +do {									\
> +	if (0)								\
> +		____ftrace_bprintk_check_format(fmt, ##args);		\
> +} while (0)
> +
> +#define ftrace_bprintk(fmt, args...)					\
> +do {									\
> +	static char *__attribute__((section("__trace_bprintk_fmt")))	\
> +			ftrace_bprintk_fmt = fmt;			\
> +	__ftrace_bprintk_check_format(fmt, ##args);			\
> +	__ftrace_bprintk(_THIS_IP_, ftrace_bprintk_fmt, ##args);	\
> +} while (0)
> +#else
> +#define ftrace_bprintk ftrace_printk
>  #endif
>  
>  /* May be defined in arch */
> diff --git a/include/linux/module.h b/include/linux/module.h
> index 07b1cb4..c4f2845 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -347,6 +347,11 @@ struct module
>  	unsigned int num_tracepoints;
>  #endif
>  
> +#ifdef CONFIG_TRACE_BPRINTK
> +	const char **trace_bprintk_fmt_start;
> +	unsigned int num_trace_bprintk_fmt;
> +#endif
> +
>  #ifdef CONFIG_MODULE_UNLOAD
>  	/* What modules depend on me? */
>  	struct list_head modules_which_use_me;
> diff --git a/kernel/module.c b/kernel/module.c
> index 90a6d63..fa96909 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -2192,6 +2192,12 @@ static noinline struct module *load_module(void __user *umod,
>  					&mod->num_tracepoints);
>  #endif
>  
> +#ifdef CONFIG_TRACE_BPRINTK
> +	mod->trace_bprintk_fmt_start = section_objs(hdr, sechdrs, secstrings,
> +			"__trace_bprintk_fmt", sizeof(char *),
> +			&mod->num_trace_bprintk_fmt);
> +#endif
> +
>  #ifdef CONFIG_MODVERSIONS
>  	if ((mod->num_syms && !mod->crcs)
>  	    || (mod->num_gpl_syms && !mod->gpl_crcs)
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 3a3ae19..0e30dce 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -3627,6 +3627,21 @@ out:
>  }
>  EXPORT_SYMBOL_GPL(trace_vbprintk);
>  
> +int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
> +{
> +	int ret;
> +	va_list ap;
> +
> +	if (!fmt)
> +		return 0;
> +
> +	va_start(ap, fmt);
> +	ret = trace_vbprintk(ip, fmt, ap);
> +	va_end(ap);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(__ftrace_bprintk);
> +
>  static int trace_panic_handler(struct notifier_block *this,
>  			       unsigned long event, void *unused)
>  {
> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
> index 1f8e532..28e6286 100644
> --- a/kernel/trace/trace_bprintk.c
> +++ b/kernel/trace/trace_bprintk.c
> @@ -19,9 +19,23 @@
>  
>  #include "trace.h"
>  
> +#ifdef CONFIG_MODULE> +
>  /* binary printk basic */
>  static DEFINE_MUTEX(btrace_mutex);
>  static int btrace_metadata_count;
> +/*
> + * modules ftrace_bprintk()'s formats are autosaved in struct trace_bprintk_fmt
> + * which are queued on trace_bprintk_fmt_list.
> + */
> +static LIST_HEAD(trace_bprintk_fmt_list);
> +
> +struct trace_bprintk_fmt {
> +	struct list_head list;
> +	int count;
> +	char fmt[0];
> +};
> +
>  
>  static inline void lock_btrace(void)
>  {
> @@ -33,6 +47,93 @@ static inline void unlock_btrace(void)
>  	mutex_unlock(&btrace_mutex);
>  }
>  
> +/*
> + * If trace_bprintk() is not active, we release the trace_bprintk's
> + * formats that no modules has reference to.
> + */
> +static inline void shrink_trace_bprintk_fmt(void)
> +{
> +	struct trace_bprintk_fmt *pos, *next;
> +	list_for_each_entry_safe(pos, next, &trace_bprintk_fmt_list, list) {
> +		if (!pos->count) {
> +			list_del(&pos->list);
> +			kfree(pos);
> +		}
> +	}
> +}
> +
> +static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
> +{
> +	struct trace_bprintk_fmt *pos;
> +	list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
> +		if (!strcmp(pos->fmt, fmt))
> +			return pos;
> +	}
> +	return NULL;
> +}
> +
> +static
> +void hold_module_trace_bprintk_format(const char **start, const char **end)
> +{
> +	const char **iter;
> +	lock_btrace();
> +	for (iter = start; iter < end; iter++) {
> +		struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
> +		if (tb_fmt) {
> +			tb_fmt->count++;
> +			*iter = tb_fmt->fmt;
> +			continue;
> +		}
> +
> +		tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt)
> +				+ strlen(*iter) + 1, GFP_KERNEL);
> +		if (tb_fmt) {
> +			list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
> +			tb_fmt->count = 1;
> +			strcpy(tb_fmt->fmt, *iter);
> +			*iter = tb_fmt->fmt;
> +		} else
> +			*iter = NULL;
> +	}
> +	unlock_btrace();
> +}
> +
> +static
> +void release_module_trace_bprintk_format(const char **start, const char **end)
> +{
> +	const char **iter;
> +	lock_btrace();
> +	for (iter = start; iter < end; iter++) {
> +		struct trace_bprintk_fmt *tb_fmt;
> +		if (!*iter)
> +			continue;
> +
> +		tb_fmt = container_of(*iter, struct trace_bprintk_fmt, fmt[0]);
> +		tb_fmt->count--;
> +		if (!tb_fmt->count && !btrace_metadata_count) {
> +			list_del(&tb_fmt->list);
> +			kfree(tb_fmt);

Shouldn't *iter get assigned to NULL somewhere here?

-- Steve


> +		}
> +	}
> +	unlock_btrace();
> +}
> +
> +static int module_trace_bprintk_format_notify(struct notifier_block *self,
> +		unsigned long val, void *data)
> +{
> +	struct module *mod = data;
> +	if (mod->num_trace_bprintk_fmt) {
> +		const char **start = mod->trace_bprintk_fmt_start;
> +		const char **end = start + mod->num_trace_bprintk_fmt;
> +
> +		if (val == MODULE_STATE_COMING)
> +			hold_module_trace_bprintk_format(start, end);
> +		else if (val == MODULE_STATE_GOING)
> +			release_module_trace_bprintk_format(start, end);
> +	}
> +	return 0;
> +}
> +
>  static void get_btrace_metadata(void)
>  {
>  	lock_btrace();
> @@ -44,9 +145,31 @@ static void put_btrace_metadata(void)
>  {
>  	lock_btrace();
>  	btrace_metadata_count--;
> +
> +	if (!btrace_metadata_count) {
> +		shrink_trace_bprintk_fmt();
> +	}
>  	unlock_btrace();
>  }
>  
> +#else /* !CONFIG_MODULES */
> +__init static int
> +module_trace_bprintk_format_notify(struct notifier_block *self,
> +		unsigned long val, void *data)
> +{
> +	return 0;
> +}
> +
> +static inline void get_btrace_metadata(void) { }
> +static inline void put_btrace_metadata(void) { }
> +#endif /* CONFIG_MODULES */
> +
> +
> +__initdata_or_module static
> +struct notifier_block module_trace_bprintk_format_nb = {
> +	.notifier_call = module_trace_bprintk_format_notify,
> +};
> +
>  /* events tracer */
>  int trace_bprintk_enable;
>  
> @@ -81,7 +204,14 @@ static struct tracer bprintk_trace __read_mostly =
>  
>  static __init int init_bprintk(void)
>  {
> -	return register_tracer(&bprintk_trace);
> +	int ret = register_module_notifier(&module_trace_bprintk_format_nb);
> +	if (ret)
> +		return ret;
> +
> +	ret = register_tracer(&bprintk_trace);
> +	if (ret)
> +		unregister_module_notifier(&module_trace_bprintk_format_nb);
> +	return ret;
>  }
>  
>  device_initcall(init_bprintk);
> -- 
> 1.6.1
> 
> 
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 4/5] tracing/core: drop the old ftrace_printk implementation in favour of ftrace_bprintk
  2009-02-28 22:16                         ` [PATCH 4/5] tracing/core: drop the old ftrace_printk implementation in favour of ftrace_bprintk Frederic Weisbecker
@ 2009-03-02 16:37                           ` Steven Rostedt
  2009-03-02 17:47                             ` Frédéric Weisbecker
  0 siblings, 1 reply; 44+ messages in thread
From: Steven Rostedt @ 2009-03-02 16:37 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Andrew Morton, Lai Jiangshan, Linus Torvalds,
	Peter Zijlstra, linux-kernel


On Sat, 28 Feb 2009, Frederic Weisbecker wrote:

> Impact: faster and lighter tracing
> 
> Now that we have ftrace_bprintk() which is faster and consume lesser
> memory than ftrace_printk() and has the same purpose, we can now drop
> the old implementation in favour of the binary one from ftrace_bprintk(),
> which means we move all the implementation of ftrace_bprintk() to
> ftrace_printk(), so the Api doesn't change except that we must now use
> trace_seq_bprintk() to print the TRACE_PRINT entries.
> 
> Some changes result of this:
> 
> - Previously, ftrace_bprintk depended of a single tracer and couldn't
>   work without. This tracer has been dropped and the whole implementation
>   of ftrace_printk() (like the module formats management) is now integrated
>   in the tracing core (comes with CONFIG_TRACING), though we keep the file
>   trace_bprintk (previously trace_bprintk.c) where we can find the module

    ^^^^^^^^^^^^^

 Do you mean trace_printk.c ?

>   management. Thus we don't overflow trace.c
> 
> - changes some parts to use trace_seq_bprintk() to print TRACE_PRINT entries.
> 
> - change a bit ftrace_printk/ftrace_vprintk macros to support non-builtin formats
>   constants, and fix 'const' qualifiers warnings. But this is all transparent for
>   developers.
> 
> - etc...
> 
> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> ---
>  include/linux/ftrace.h                           |   68 ++++----
>  include/linux/module.h                           |    2 +-
>  kernel/trace/Kconfig                             |    7 +-
>  kernel/trace/Makefile                            |    2 +-
>  kernel/trace/trace.c                             |  209 +++++++---------------
>  kernel/trace/trace.h                             |   14 +--
>  kernel/trace/trace_functions_graph.c             |    6 +-
>  kernel/trace/trace_mmiotrace.c                   |    9 +-
>  kernel/trace/trace_output.c                      |   69 +------
>  kernel/trace/trace_output.h                      |    2 +
>  kernel/trace/{trace_bprintk.c => trace_printk.c} |  113 ++++--------
>  11 files changed, 162 insertions(+), 339 deletions(-)
> 
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index f82b986..696a09b 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -222,31 +222,6 @@ extern int ftrace_make_nop(struct module *mod,
>   */
>  extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>  
> -#ifdef CONFIG_TRACE_BPRINTK
> -extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
> -extern int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
> -		__attribute__ ((format (printf, 2, 3)));
> -
> -static inline void  ____ftrace_bprintk_check_format(const char *fmt, ...)
> -		__attribute__ ((format (printf, 1, 2)));
> -static inline void ____ftrace_bprintk_check_format(const char *fmt, ...) {}
> -#define __ftrace_bprintk_check_format(fmt, args...)			\
> -do {									\
> -	if (0)								\
> -		____ftrace_bprintk_check_format(fmt, ##args);		\
> -} while (0)
> -
> -#define ftrace_bprintk(fmt, args...)					\
> -do {									\
> -	static char *__attribute__((section("__trace_bprintk_fmt")))	\
> -			ftrace_bprintk_fmt = fmt;			\
> -	__ftrace_bprintk_check_format(fmt, ##args);			\
> -	__ftrace_bprintk(_THIS_IP_, ftrace_bprintk_fmt, ##args);	\
> -} while (0)
> -#else
> -#define ftrace_bprintk ftrace_printk
> -#endif
> -
>  /* May be defined in arch */
>  extern int ftrace_arch_read_dyn_info(char *buf, int size);
>  
> @@ -367,15 +342,40 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
>   * Please refrain from leaving ftrace_printks scattered around in
>   * your code.
>   */
> -# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt)
> -extern int
> -__ftrace_printk(unsigned long ip, const char *fmt, ...)
> -	__attribute__ ((format (printf, 2, 3)));
> -# define ftrace_vprintk(fmt, ap) __ftrace_printk(_THIS_IP_, fmt, ap)
> -extern int
> -__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
> +extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list args);
> +extern int __ftrace_printk(unsigned long ip, const char *fmt, ...)
> +		__attribute__ ((format (printf, 2, 3)));
> +
> +static inline void  ____ftrace_printk_check_format(const char *fmt, ...)
> +		__attribute__ ((format (printf, 1, 2)));
> +static inline void ____ftrace_printk_check_format(const char *fmt, ...) {}
> +#define __ftrace_printk_check_format(fmt, args...)			\
> +do {									\
> +	if (0)								\
> +		____ftrace_printk_check_format(fmt, ##args);		\
> +} while (0)
> +
> +#define ftrace_vprintk(fmt, vargs)					\
> +do {									\
> +	static const char *ftrace_printk_fmt				\
> +	__attribute__((section("__trace_bprintk_fmt")));		\
> +	ftrace_printk_fmt = fmt;					\
> +	__ftrace_vprintk(_THIS_IP_, ftrace_printk_fmt, vargs);		\
> +} while (0)
> +
> +#define ftrace_printk(fmt, args...)					\
> +do {									\
> +	static const char *ftrace_printk_fmt				\
> +	__attribute__((section("__trace_bprintk_fmt")));		\
> +	ftrace_printk_fmt = fmt;					\
> +	__ftrace_printk_check_format(fmt, ##args);			\
> +	__ftrace_printk(_THIS_IP_, ftrace_printk_fmt, ##args);		\
> +} while (0)
> +
>  extern void ftrace_dump(void);
> -#else
> +
> +#else /* !CONFIG_TRACING */
> +
>  static inline void
>  ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
>  static inline int
> @@ -395,7 +395,7 @@ ftrace_vprintk(const char *fmt, va_list ap)
>  	return 0;
>  }
>  static inline void ftrace_dump(void) { }
> -#endif
> +#endif /* CONFIG_TRACING */
>  
>  #ifdef CONFIG_FTRACE_MCOUNT_RECORD
>  extern void ftrace_init(void);
> diff --git a/include/linux/module.h b/include/linux/module.h
> index c4f2845..e418da1 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -347,7 +347,7 @@ struct module
>  	unsigned int num_tracepoints;
>  #endif
>  
> -#ifdef CONFIG_TRACE_BPRINTK
> +#ifdef CONFIG_TRACING
>  	const char **trace_bprintk_fmt_start;
>  	unsigned int num_trace_bprintk_fmt;
>  #endif
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 2e84fc7..52439c8 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -52,6 +52,7 @@ config TRACING
>  	select STACKTRACE if STACKTRACE_SUPPORT
>  	select TRACEPOINTS
>  	select NOP_TRACER
> +	select BINARY_PRINTF
>  
>  menu "Tracers"
>  
> @@ -86,12 +87,6 @@ config FUNCTION_GRAPH_TRACER
>  	  This is done by setting the current return address on the current
>  	  task structure into a stack of calls.
>  
> -config TRACE_BPRINTK
> -	bool "Binary printk for tracing"
> -	default y
> -	depends on TRACING
> -	select BINARY_PRINTF
> -
>  config IRQSOFF_TRACER
>  	bool "Interrupts-off Latency Tracer"
>  	default n
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index dba6157..25e243f 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -22,7 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o
>  obj-$(CONFIG_TRACING) += trace_clock.o
>  obj-$(CONFIG_TRACING) += trace_output.o
>  obj-$(CONFIG_TRACING) += trace_stat.o
> -obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
> +obj-$(CONFIG_TRACING) += trace_printk.o
>  obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
>  obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
>  obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 0e30dce..2ad285b 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -1171,6 +1171,67 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
>  }
>  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>  
> +
> +/**
> + * trace_vprintk - write binary msg to tracing buffer
> + *
> + * Caller must insure @fmt are valid when msg is in tracing buffer.
> + */
> +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
> +{
> +	static DEFINE_SPINLOCK(trace_buf_lock);
> +	static u32 trace_buf[TRACE_BUF_SIZE];
> +
> +	struct ring_buffer_event *event;
> +	struct trace_array *tr = &global_trace;
> +	struct trace_array_cpu *data;
> +	struct print_entry *entry;
> +	unsigned long flags;
> +	int cpu, len = 0, size, pc;
> +
> +	if (unlikely(tracing_selftest_running || tracing_disabled))
> +		return 0;
> +
> +	/* Don't pollute graph traces with trace_vprintk internals */
> +	pause_graph_tracing();
> +
> +	pc = preempt_count();
> +	preempt_disable_notrace();
> +	cpu = raw_smp_processor_id();
> +	data = tr->data[cpu];
> +
> +	if (unlikely(atomic_read(&data->disabled)))
> +		goto out;
> +
> +	spin_lock_irqsave(&trace_buf_lock, flags);
> +	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
> +
> +	if (len > TRACE_BUF_SIZE || len < 0)
> +		goto out_unlock;
> +
> +	size = sizeof(*entry) + sizeof(u32) * len;
> +	event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc);
> +	if (!event)
> +		goto out_unlock;
> +	entry = ring_buffer_event_data(event);
> +	entry->ip			= ip;
> +	entry->depth			= depth;
> +	entry->fmt			= fmt;
> +
> +	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
> +	ring_buffer_unlock_commit(tr->buffer, event);
> +
> +out_unlock:
> +	spin_unlock_irqrestore(&trace_buf_lock, flags);
> +
> +out:
> +	preempt_enable_notrace();
> +	unpause_graph_tracing();
> +
> +	return len;
> +}
> +EXPORT_SYMBOL_GPL(trace_vprintk);
> +
>  enum trace_file_type {
>  	TRACE_FILE_LAT_FMT	= 1,
>  	TRACE_FILE_ANNOTATE	= 2,
> @@ -1588,7 +1649,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
>  
>  	trace_assign_type(field, entry);
>  
> -	ret = trace_seq_printf(s, "%s", field->buf);
> +	ret = trace_seq_bprintf(s, field->fmt, field->buf);
>  	if (!ret)
>  		return TRACE_TYPE_PARTIAL_LINE;
>  
> @@ -3496,152 +3557,6 @@ static __init int tracer_init_debugfs(void)
>  	return 0;
>  }
>  
> -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
> -{
> -	static DEFINE_SPINLOCK(trace_buf_lock);
> -	static char trace_buf[TRACE_BUF_SIZE];
> -
> -	struct ring_buffer_event *event;
> -	struct trace_array *tr = &global_trace;
> -	struct trace_array_cpu *data;
> -	int cpu, len = 0, size, pc;
> -	struct print_entry *entry;
> -	unsigned long irq_flags;
> -
> -	if (tracing_disabled || tracing_selftest_running)
> -		return 0;
> -
> -	pc = preempt_count();
> -	preempt_disable_notrace();
> -	cpu = raw_smp_processor_id();
> -	data = tr->data[cpu];
> -
> -	if (unlikely(atomic_read(&data->disabled)))
> -		goto out;
> -
> -	pause_graph_tracing();
> -	spin_lock_irqsave(&trace_buf_lock, irq_flags);
> -	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
> -
> -	len = min(len, TRACE_BUF_SIZE-1);
> -	trace_buf[len] = 0;
> -
> -	size = sizeof(*entry) + len + 1;
> -	event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
> -	if (!event)
> -		goto out_unlock;
> -	entry = ring_buffer_event_data(event);
> -	entry->ip			= ip;
> -	entry->depth			= depth;
> -
> -	memcpy(&entry->buf, trace_buf, len);
> -	entry->buf[len] = 0;
> -	ring_buffer_unlock_commit(tr->buffer, event);
> -
> - out_unlock:
> -	spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
> -	unpause_graph_tracing();
> - out:
> -	preempt_enable_notrace();
> -
> -	return len;
> -}
> -EXPORT_SYMBOL_GPL(trace_vprintk);
> -
> -int __ftrace_printk(unsigned long ip, const char *fmt, ...)
> -{
> -	int ret;
> -	va_list ap;
> -
> -	if (!(trace_flags & TRACE_ITER_PRINTK))
> -		return 0;
> -
> -	va_start(ap, fmt);
> -	ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
> -	va_end(ap);
> -	return ret;
> -}
> -EXPORT_SYMBOL_GPL(__ftrace_printk);
> -
> -int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
> -{
> -	if (!(trace_flags & TRACE_ITER_PRINTK))
> -		return 0;
> -
> -	return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
> -}
> -EXPORT_SYMBOL_GPL(__ftrace_vprintk);
> -
> -/**
> - * trace_vbprintk - write binary msg to tracing buffer
> - *
> - * Caller must insure @fmt are valid when msg is in tracing buffer.
> - */
> -int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
> -{
> -	static DEFINE_SPINLOCK(trace_buf_lock);
> -	static u32 trace_buf[TRACE_BUF_SIZE];
> -
> -	struct ring_buffer_event *event;
> -	struct trace_array *tr = &global_trace;
> -	struct trace_array_cpu *data;
> -	struct bprintk_entry *entry;
> -	unsigned long flags;
> -	int cpu, len = 0, size, pc;
> -
> -	if (tracing_disabled || !trace_bprintk_enable)
> -		return 0;
> -
> -	pc = preempt_count();
> -	preempt_disable_notrace();
> -	cpu = raw_smp_processor_id();
> -	data = tr->data[cpu];
> -
> -	if (unlikely(atomic_read(&data->disabled)))
> -		goto out;
> -
> -	spin_lock_irqsave(&trace_buf_lock, flags);
> -	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
> -
> -	if (len > TRACE_BUF_SIZE || len < 0)
> -		goto out_unlock;
> -
> -	size = sizeof(*entry) + sizeof(u32) * len;
> -	event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc);
> -	if (!event)
> -		goto out_unlock;
> -	entry = ring_buffer_event_data(event);
> -	entry->ip			= ip;
> -	entry->fmt			= fmt;
> -
> -	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
> -	ring_buffer_unlock_commit(tr->buffer, event);
> -
> -out_unlock:
> -	spin_unlock_irqrestore(&trace_buf_lock, flags);
> -
> -out:
> -	preempt_enable_notrace();
> -
> -	return len;
> -}
> -EXPORT_SYMBOL_GPL(trace_vbprintk);
> -
> -int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
> -{
> -	int ret;
> -	va_list ap;
> -
> -	if (!fmt)
> -		return 0;
> -
> -	va_start(ap, fmt);
> -	ret = trace_vbprintk(ip, fmt, ap);
> -	va_end(ap);
> -	return ret;
> -}
> -EXPORT_SYMBOL_GPL(__ftrace_bprintk);
> -
>  static int trace_panic_handler(struct notifier_block *this,
>  			       unsigned long event, void *unused)
>  {
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 92703b1..0ccc678 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -20,7 +20,6 @@ enum trace_type {
>  	TRACE_WAKE,
>  	TRACE_STACK,
>  	TRACE_PRINT,
> -	TRACE_BPRINTK,
>  	TRACE_SPECIAL,
>  	TRACE_MMIO_RW,
>  	TRACE_MMIO_MAP,
> @@ -120,16 +119,10 @@ struct userstack_entry {
>   */
>  struct print_entry {
>  	struct trace_entry	ent;
> -	unsigned long		ip;
> +	unsigned long 		ip;
>  	int			depth;
> -	char			buf[];
> -};
> -
> -struct bprintk_entry {
> -	struct trace_entry ent;
> -	unsigned long ip;
> -	const char *fmt;
> -	u32 buf[];
> +	const char		*fmt;
> +	u32 			buf[];
>  };
>  #ifdef CONFIG_TRACE_BPRINTK
>  extern int trace_bprintk_enable;
> @@ -295,7 +288,6 @@ extern void __ftrace_bad_type(void);
>  		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
>  		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
>  		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
> -		IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
>  		IF_ASSIGN(var, ent, struct special_entry, 0);		\
>  		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
>  			  TRACE_MMIO_RW);				\
> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> index 2461732..13e362d 100644
> --- a/kernel/trace/trace_functions_graph.c
> +++ b/kernel/trace/trace_functions_graph.c
> @@ -742,7 +742,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
>  		}
>  
>  	/* The comment */
> -	ret = trace_seq_printf(s, "/* %s", trace->buf);
> +	ret = trace_seq_printf(s, "/* ");
> +	if (!ret)
> +		return TRACE_TYPE_PARTIAL_LINE;
> +
> +	ret = trace_seq_bprintf(s, trace->fmt, trace->buf);
>  	if (!ret)
>  		return TRACE_TYPE_PARTIAL_LINE;
>  
> diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
> index c401b90..23e346a 100644
> --- a/kernel/trace/trace_mmiotrace.c
> +++ b/kernel/trace/trace_mmiotrace.c
> @@ -254,15 +254,18 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
>  {
>  	struct trace_entry *entry = iter->ent;
>  	struct print_entry *print = (struct print_entry *)entry;
> -	const char *msg		= print->buf;
>  	struct trace_seq *s	= &iter->seq;
>  	unsigned long long t	= ns2usecs(iter->ts);
> -	unsigned long usec_rem	= do_div(t, 1000000ULL);
> +	unsigned long usec_rem	= do_div(t, USEC_PER_SEC);
>  	unsigned secs		= (unsigned long)t;
>  	int ret;
>  
>  	/* The trailing newline must be in the message. */
> -	ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
> +	ret = trace_seq_printf(s, "MARK %u.%06lu ", secs, usec_rem);
> +	if (!ret)
> +		return TRACE_TYPE_PARTIAL_LINE;
> +
> +	ret = trace_seq_bprintf(s, print->fmt, print->buf);
>  	if (!ret)
>  		return TRACE_TYPE_PARTIAL_LINE;
>  
> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> index 7897b5d..a020a9b 100644
> --- a/kernel/trace/trace_output.c
> +++ b/kernel/trace/trace_output.c
> @@ -53,8 +53,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
>  	return len;
>  }
>  
> -static int
> -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
> +int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
>  {
>  	int len = (PAGE_SIZE - 1) - s->len;
>  	int ret;
> @@ -876,50 +875,6 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
>  	if (!seq_print_ip_sym(s, field->ip, flags))
>  		goto partial;
>  
> -	if (!trace_seq_printf(s, ": %s", field->buf))
> -		goto partial;
> -
> -	return TRACE_TYPE_HANDLED;
> -
> - partial:
> -	return TRACE_TYPE_PARTIAL_LINE;
> -}
> -
> -static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
> -{
> -	struct print_entry *field;
> -
> -	trace_assign_type(field, iter->ent);
> -
> -	if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf))
> -		goto partial;
> -
> -	return TRACE_TYPE_HANDLED;
> -
> - partial:
> -	return TRACE_TYPE_PARTIAL_LINE;
> -}
> -
> -static struct trace_event trace_print_event = {
> -	.type	 	= TRACE_PRINT,
> -	.trace		= trace_print_print,
> -	.latency_trace	= trace_print_print,
> -	.raw		= trace_print_raw,
> -};
> -
> -/* TRACE_BPRINTK */
> -static enum print_line_t
> -trace_bprintk_print(struct trace_iterator *iter, int flags)
> -{
> -	struct trace_entry *entry = iter->ent;
> -	struct trace_seq *s = &iter->seq;
> -	struct bprintk_entry *field;
> -
> -	trace_assign_type(field, entry);
> -
> -	if (!seq_print_ip_sym(s, field->ip, flags))
> -		goto partial;
> -
>  	if (!trace_seq_puts(s, ": "))
>  		goto partial;
>  
> @@ -932,14 +887,13 @@ trace_bprintk_print(struct trace_iterator *iter, int flags)
>  	return TRACE_TYPE_PARTIAL_LINE;
>  }
>  
> -static enum print_line_t
> -trace_bprintk_raw(struct trace_iterator *iter, int flags)
> +
> +static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
>  {
> -	struct trace_entry *entry = iter->ent;
> +	struct print_entry *field;
>  	struct trace_seq *s = &iter->seq;
> -	struct bprintk_entry *field;
>  
> -	trace_assign_type(field, entry);
> +	trace_assign_type(field, iter->ent);
>  
>  	if (!trace_seq_printf(s, ": %lx : ", field->ip))
>  		goto partial;
> @@ -953,13 +907,11 @@ trace_bprintk_raw(struct trace_iterator *iter, int flags)
>  	return TRACE_TYPE_PARTIAL_LINE;
>  }
>  
> -static struct trace_event trace_bprintk_event = {
> -	.type	 	= TRACE_BPRINTK,
> -	.trace		= trace_bprintk_print,
> -	.latency_trace	= trace_bprintk_print,
> -	.raw		= trace_bprintk_raw,
> -	.hex		= trace_nop_print,
> -	.binary		= trace_nop_print,
> +static struct trace_event trace_print_event = {
> +	.type	 	= TRACE_PRINT,
> +	.trace		= trace_print_print,
> +	.latency_trace	= trace_print_print,
> +	.raw		= trace_print_raw,
>  };
>  
>  static struct trace_event *events[] __initdata = {
> @@ -970,7 +922,6 @@ static struct trace_event *events[] __initdata = {
>  	&trace_stack_event,
>  	&trace_user_stack_event,
>  	&trace_print_event,
> -	&trace_bprintk_event,
>  	NULL
>  };
>  
> diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
> index 551a25a..6d39010 100644
> --- a/kernel/trace/trace_output.h
> +++ b/kernel/trace/trace_output.h
> @@ -19,6 +19,8 @@ struct trace_event {
>  extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
>  	__attribute__ ((format (printf, 2, 3)));
>  extern int
> +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
> +extern int
>  seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
>  		unsigned long sym_flags);
>  extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_printk.c
> similarity index 65%
> rename from kernel/trace/trace_bprintk.c
> rename to kernel/trace/trace_printk.c
> index 28e6286..09f4ace 100644
> --- a/kernel/trace/trace_bprintk.c
> +++ b/kernel/trace/trace_printk.c
> @@ -21,32 +21,21 @@
>  
>  #ifdef CONFIG_MODULES
>  
> -/* binary printk basic */
> -static DEFINE_MUTEX(btrace_mutex);
> -static int btrace_metadata_count;
>  /*
> - * modules ftrace_bprintk()'s formats are autosaved in struct trace_bprintk_fmt
> + * modules ftrace_printk()'s formats are autosaved in struct trace_bprintk_fmt
>   * which are queued on trace_bprintk_fmt_list.
>   */
>  static LIST_HEAD(trace_bprintk_fmt_list);
>  
> +/* serialize accesses to trace_bprintk_fmt_list */
> +static DEFINE_MUTEX(btrace_mutex);
> +
>  struct trace_bprintk_fmt {
>  	struct list_head list;
>  	int count;
>  	char fmt[0];
>  };
>  
> -
> -static inline void lock_btrace(void)
> -{
> -	mutex_lock(&btrace_mutex);
> -}
> -
> -static inline void unlock_btrace(void)
> -{
> -	mutex_unlock(&btrace_mutex);
> -}
> -

Heh, OK, you removed it ;-)

-- Steve

>  /*
>   * If trace_bprintk() is not active, we release the trace_bprintk's
>   * formats that no modules has reference to.
> @@ -76,7 +65,8 @@ static
>  void hold_module_trace_bprintk_format(const char **start, const char **end)
>  {
>  	const char **iter;
> -	lock_btrace();
> +
> +	mutex_lock(&btrace_mutex);
>  	for (iter = start; iter < end; iter++) {
>  		struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
>  		if (tb_fmt) {
> @@ -95,14 +85,15 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
>  		} else
>  			*iter = NULL;
>  	}
> -	unlock_btrace();
> +	mutex_unlock(&btrace_mutex);
>  }
>  
>  static
>  void release_module_trace_bprintk_format(const char **start, const char **end)
>  {
>  	const char **iter;
> -	lock_btrace();
> +
> +	mutex_lock(&btrace_mutex);
>  	for (iter = start; iter < end; iter++) {
>  		struct trace_bprintk_fmt *tb_fmt;
>  		if (!*iter)
> @@ -110,12 +101,12 @@ void release_module_trace_bprintk_format(const char **start, const char **end)
>  
>  		tb_fmt = container_of(*iter, struct trace_bprintk_fmt, fmt[0]);
>  		tb_fmt->count--;
> -		if (!tb_fmt->count && !btrace_metadata_count) {
> +		if (!tb_fmt->count) {
>  			list_del(&tb_fmt->list);
>  			kfree(tb_fmt);
>  		}
>  	}
> -	unlock_btrace();
> +	mutex_unlock(&btrace_mutex);
>  }
>  
>  static int module_trace_bprintk_format_notify(struct notifier_block *self,
> @@ -134,24 +125,6 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self,
>  	return 0;
>  }
>  
> -static void get_btrace_metadata(void)
> -{
> -	lock_btrace();
> -	btrace_metadata_count++;
> -	unlock_btrace();
> -}
> -
> -static void put_btrace_metadata(void)
> -{
> -	lock_btrace();
> -	btrace_metadata_count--;
> -
> -	if (!btrace_metadata_count) {
> -		shrink_trace_bprintk_fmt();
> -	}
> -	unlock_btrace();
> -}
> -
>  #else /* !CONFIG_MODULES */
>  __init static int
>  module_trace_bprintk_format_notify(struct notifier_block *self,
> @@ -159,9 +132,6 @@ module_trace_bprintk_format_notify(struct notifier_block *self,
>  {
>  	return 0;
>  }
> -
> -static inline void get_btrace_metadata(void) { }
> -static inline void put_btrace_metadata(void) { }
>  #endif /* CONFIG_MODULES */
>  
>  
> @@ -170,48 +140,39 @@ struct notifier_block module_trace_bprintk_format_nb = {
>  	.notifier_call = module_trace_bprintk_format_notify,
>  };
>  
> -/* events tracer */
> -int trace_bprintk_enable;
> -
> -static void start_bprintk_trace(struct trace_array *tr)
> +int __ftrace_printk(unsigned long ip, const char *fmt, ...)
>  {
> -	get_btrace_metadata();
> -	tracing_reset_online_cpus(tr);
> -	trace_bprintk_enable = 1;
> -}
> +	int ret;
> +	va_list ap;
>  
> -static void stop_bprintk_trace(struct trace_array *tr)
> -{
> -	trace_bprintk_enable = 0;
> -	tracing_reset_online_cpus(tr);
> -	put_btrace_metadata();
> -}
> +	if (unlikely(!fmt))
> +		return 0;
>  
> -static int init_bprintk_trace(struct trace_array *tr)
> -{
> -	start_bprintk_trace(tr);
> -	return 0;
> +	if (!(trace_flags & TRACE_ITER_PRINTK))
> +		return 0;
> +
> +	va_start(ap, fmt);
> +	ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
> +	va_end(ap);
> +	return ret;
>  }
> +EXPORT_SYMBOL_GPL(__ftrace_printk);
>  
> -static struct tracer bprintk_trace __read_mostly =
> +int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
>  {
> -	.name	     = "events",
> -	.init	     = init_bprintk_trace,
> -	.reset	     = stop_bprintk_trace,
> -	.start	     = start_bprintk_trace,
> -	.stop	     = stop_bprintk_trace,
> -};
> +	if (unlikely(!fmt))
> +		return 0;
>  
> -static __init int init_bprintk(void)
> -{
> -	int ret = register_module_notifier(&module_trace_bprintk_format_nb);
> -	if (ret)
> -		return ret;
> +	if (!(trace_flags & TRACE_ITER_PRINTK))
> +		return 0;
>  
> -	ret = register_tracer(&bprintk_trace);
> -	if (ret)
> -		unregister_module_notifier(&module_trace_bprintk_format_nb);
> -	return ret;
> +	return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
>  }
> +EXPORT_SYMBOL_GPL(__ftrace_vprintk);
> +
>  
> -device_initcall(init_bprintk);
> +static __init int init_trace_printk(void)
> +{
> +	return register_module_notifier(&module_trace_bprintk_format_nb);
> +}
> +early_initcall(init_trace_printk);
> -- 
> 1.6.1
> 
> 
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 2/5] ftrace: infrastructure for supporting binary record
  2009-03-02 16:27                           ` Steven Rostedt
@ 2009-03-02 17:39                             ` Frédéric Weisbecker
  0 siblings, 0 replies; 44+ messages in thread
From: Frédéric Weisbecker @ 2009-03-02 17:39 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Ingo Molnar, Andrew Morton, Lai Jiangshan, Linus Torvalds,
	Peter Zijlstra, linux-kernel

2009/3/2 Steven Rostedt <rostedt@goodmis.org>:
>
>
> On Sat, 28 Feb 2009, Frederic Weisbecker wrote:
>
>> From: Lai Jiangshan <laijs@cn.fujitsu.com>
>>
>> Impact: save on memory for tracing
>>
>> Current tracers are typically using a struct(like struct ftrace_entry,
>> struct ctx_switch_entry, struct special_entr etc...)to record a binary
>> event. These structs can only record a their own kind of events.
>> A new kind of tracer need a new struct and a lot of code too handle it.
>>
>> So we need a generic binary record for events. This infrastructure
>> is for this purpose.
>>
>> [fweisbec@gmail.com: rebase against latest -tip]
>>
>> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
>> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
>> ---
>>  include/linux/ftrace.h       |    3 +
>>  kernel/trace/Kconfig         |    6 +++
>>  kernel/trace/Makefile        |    1 +
>>  kernel/trace/trace.c         |   55 ++++++++++++++++++++++++++
>>  kernel/trace/trace.h         |   12 ++++++
>>  kernel/trace/trace_bprintk.c |   87 ++++++++++++++++++++++++++++++++++++++++++
>>  kernel/trace/trace_output.c  |   76 ++++++++++++++++++++++++++++++++++++
>>  7 files changed, 240 insertions(+), 0 deletions(-)
>>
>> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
>> index 847bb3c..8ac2fac 100644
>> --- a/include/linux/ftrace.h
>> +++ b/include/linux/ftrace.h
>> @@ -222,6 +222,9 @@ extern int ftrace_make_nop(struct module *mod,
>>   */
>>  extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>>
>> +#ifdef CONFIG_TRACE_BPRINTK
>> +extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
>> +#endif
>>
>>  /* May be defined in arch */
>>  extern int ftrace_arch_read_dyn_info(char *buf, int size);
>> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
>> index 02bc5d6..2e84fc7 100644
>> --- a/kernel/trace/Kconfig
>> +++ b/kernel/trace/Kconfig
>> @@ -86,6 +86,12 @@ config FUNCTION_GRAPH_TRACER
>>         This is done by setting the current return address on the current
>>         task structure into a stack of calls.
>>
>> +config TRACE_BPRINTK
>> +     bool "Binary printk for tracing"
>> +     default y
>> +     depends on TRACING
>> +     select BINARY_PRINTF
>> +
>>  config IRQSOFF_TRACER
>>       bool "Interrupts-off Latency Tracer"
>>       default n
>> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
>> index c931fe0..dba6157 100644
>> --- a/kernel/trace/Makefile
>> +++ b/kernel/trace/Makefile
>> @@ -22,6 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o
>>  obj-$(CONFIG_TRACING) += trace_clock.o
>>  obj-$(CONFIG_TRACING) += trace_output.o
>>  obj-$(CONFIG_TRACING) += trace_stat.o
>> +obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
>>  obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
>>  obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
>>  obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>> index c5e39cd..3a3ae19 100644
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -3572,6 +3572,61 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
>>  }
>>  EXPORT_SYMBOL_GPL(__ftrace_vprintk);
>>
>> +/**
>> + * trace_vbprintk - write binary msg to tracing buffer
>> + *
>> + * Caller must insure @fmt are valid when msg is in tracing buffer.
>> + */
>> +int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
>> +{
>> +     static DEFINE_SPINLOCK(trace_buf_lock);
>> +     static u32 trace_buf[TRACE_BUF_SIZE];
>> +
>> +     struct ring_buffer_event *event;
>> +     struct trace_array *tr = &global_trace;
>> +     struct trace_array_cpu *data;
>> +     struct bprintk_entry *entry;
>> +     unsigned long flags;
>> +     int cpu, len = 0, size, pc;
>> +
>> +     if (tracing_disabled || !trace_bprintk_enable)
>> +             return 0;
>> +
>
> If this is called in the scheduler (we might want a ftrace_printk there),
> then the preempt_enable might cause a recursive crash.
>
> If a ftrace_printk is in schedule() but before we clear NEED_RESCHED, the
> preempt_enable below will cause schedule to be called again. Then we would
> again execute this trace and call schedule again, over and over till we
> corrupt the stack and crash the system.


Oh, right!


> That is what ftrace_preempt_disable() is for.
>
> It's defined in kernel/trace/trace.h:
>
> The way you use it is:
>
>        int resched;
>        [...]
>
>        resched = ftrace_preempt_disable();
>        [...]
>
>        ftrace_preempt_enable(resched);
>
> This will safely disable preemption and enable it correctly.
>


Ok, will fix it.


>> +     pc = preempt_count();
>> +     preempt_disable_notrace();
>> +     cpu = raw_smp_processor_id();
>> +     data = tr->data[cpu];
>> +
>> +     if (unlikely(atomic_read(&data->disabled)))
>> +             goto out;
>> +
>> +     spin_lock_irqsave(&trace_buf_lock, flags);
>> +     len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
>> +
>> +     if (len > TRACE_BUF_SIZE || len < 0)
>> +             goto out_unlock;
>> +
>> +     size = sizeof(*entry) + sizeof(u32) * len;
>> +     event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc);
>> +     if (!event)
>> +             goto out_unlock;
>> +     entry = ring_buffer_event_data(event);
>> +     entry->ip                       = ip;
>> +     entry->fmt                      = fmt;
>> +
>> +     memcpy(entry->buf, trace_buf, sizeof(u32) * len);
>> +     ring_buffer_unlock_commit(tr->buffer, event);
>> +
>> +out_unlock:
>> +     spin_unlock_irqrestore(&trace_buf_lock, flags);
>> +
>> +out:
>> +     preempt_enable_notrace();
>> +
>> +     return len;
>> +}
>> +EXPORT_SYMBOL_GPL(trace_vbprintk);
>> +
>>  static int trace_panic_handler(struct notifier_block *this,
>>                              unsigned long event, void *unused)
>>  {
>> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
>> index f6fa0b9..92703b1 100644
>> --- a/kernel/trace/trace.h
>> +++ b/kernel/trace/trace.h
>> @@ -20,6 +20,7 @@ enum trace_type {
>>       TRACE_WAKE,
>>       TRACE_STACK,
>>       TRACE_PRINT,
>> +     TRACE_BPRINTK,
>>       TRACE_SPECIAL,
>>       TRACE_MMIO_RW,
>>       TRACE_MMIO_MAP,
>> @@ -124,6 +125,16 @@ struct print_entry {
>>       char                    buf[];
>>  };
>>
>> +struct bprintk_entry {
>> +     struct trace_entry ent;
>> +     unsigned long ip;
>> +     const char *fmt;
>> +     u32 buf[];
>> +};
>> +#ifdef CONFIG_TRACE_BPRINTK
>> +extern int trace_bprintk_enable;
>> +#endif
>> +
>>  #define TRACE_OLD_SIZE               88
>>
>>  struct trace_field_cont {
>> @@ -284,6 +295,7 @@ extern void __ftrace_bad_type(void);
>>               IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);   \
>>               IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
>>               IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);   \
>> +             IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
>>               IF_ASSIGN(var, ent, struct special_entry, 0);           \
>>               IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,          \
>>                         TRACE_MMIO_RW);                               \
>> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
>> new file mode 100644
>> index 0000000..1f8e532
>> --- /dev/null
>> +++ b/kernel/trace/trace_bprintk.c
>> @@ -0,0 +1,87 @@
>> +/*
>> + * trace binary printk
>> + *
>> + * Copyright (C) 2008 Lai Jiangshan <laijs@cn.fujitsu.com>
>> + *
>> + */
>> +#include <linux/kernel.h>
>> +#include <linux/ftrace.h>
>> +#include <linux/string.h>
>> +#include <linux/ctype.h>
>> +#include <linux/list.h>
>> +#include <linux/mutex.h>
>> +#include <linux/slab.h>
>> +#include <linux/module.h>
>> +#include <linux/seq_file.h>
>> +#include <linux/fs.h>
>> +#include <linux/marker.h>
>> +#include <linux/uaccess.h>
>> +
>> +#include "trace.h"
>> +
>> +/* binary printk basic */
>> +static DEFINE_MUTEX(btrace_mutex);
>> +static int btrace_metadata_count;
>> +
>> +static inline void lock_btrace(void)
>> +{
>> +     mutex_lock(&btrace_mutex);
>> +}
>> +
>> +static inline void unlock_btrace(void)
>> +{
>> +     mutex_unlock(&btrace_mutex);
>> +}
>
> Is there a reason to add these wrappers around the mutex?
>
> -- Steve

Removed on the end of the serie :-)

Thanks!

>
>> +
>> +static void get_btrace_metadata(void)
>> +{
>> +     lock_btrace();
>> +     btrace_metadata_count++;
>> +     unlock_btrace();
>> +}
>> +
>> +static void put_btrace_metadata(void)
>> +{
>> +     lock_btrace();
>> +     btrace_metadata_count--;
>> +     unlock_btrace();
>> +}
>> +
>> +/* events tracer */
>> +int trace_bprintk_enable;
>> +
>> +static void start_bprintk_trace(struct trace_array *tr)
>> +{
>> +     get_btrace_metadata();
>> +     tracing_reset_online_cpus(tr);
>> +     trace_bprintk_enable = 1;
>> +}
>> +
>> +static void stop_bprintk_trace(struct trace_array *tr)
>> +{
>> +     trace_bprintk_enable = 0;
>> +     tracing_reset_online_cpus(tr);
>> +     put_btrace_metadata();
>> +}
>> +
>> +static int init_bprintk_trace(struct trace_array *tr)
>> +{
>> +     start_bprintk_trace(tr);
>> +     return 0;
>> +}
>> +
>> +static struct tracer bprintk_trace __read_mostly =
>> +{
>> +     .name        = "events",
>> +     .init        = init_bprintk_trace,
>> +     .reset       = stop_bprintk_trace,
>> +     .start       = start_bprintk_trace,
>> +     .stop        = stop_bprintk_trace,
>> +};
>> +
>> +static __init int init_bprintk(void)
>> +{
>> +     return register_tracer(&bprintk_trace);
>> +}
>> +
>> +device_initcall(init_bprintk);
>> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
>> index 9fc8150..7897b5d 100644
>> --- a/kernel/trace/trace_output.c
>> +++ b/kernel/trace/trace_output.c
>> @@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
>>       return len;
>>  }
>>
>> +static int
>> +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
>> +{
>> +     int len = (PAGE_SIZE - 1) - s->len;
>> +     int ret;
>> +
>> +     if (!len)
>> +             return 0;
>> +
>> +     ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
>> +
>> +     /* If we can't write it all, don't bother writing anything */
>> +     if (ret >= len)
>> +             return 0;
>> +
>> +     s->len += ret;
>> +
>> +     return len;
>> +}
>> +
>>  /**
>>   * trace_seq_puts - trace sequence printing of simple string
>>   * @s: trace sequence descriptor
>> @@ -887,6 +907,61 @@ static struct trace_event trace_print_event = {
>>       .raw            = trace_print_raw,
>>  };
>>
>> +/* TRACE_BPRINTK */
>> +static enum print_line_t
>> +trace_bprintk_print(struct trace_iterator *iter, int flags)
>> +{
>> +     struct trace_entry *entry = iter->ent;
>> +     struct trace_seq *s = &iter->seq;
>> +     struct bprintk_entry *field;
>> +
>> +     trace_assign_type(field, entry);
>> +
>> +     if (!seq_print_ip_sym(s, field->ip, flags))
>> +             goto partial;
>> +
>> +     if (!trace_seq_puts(s, ": "))
>> +             goto partial;
>> +
>> +     if (!trace_seq_bprintf(s, field->fmt, field->buf))
>> +             goto partial;
>> +
>> +     return TRACE_TYPE_HANDLED;
>> +
>> + partial:
>> +     return TRACE_TYPE_PARTIAL_LINE;
>> +}
>> +
>> +static enum print_line_t
>> +trace_bprintk_raw(struct trace_iterator *iter, int flags)
>> +{
>> +     struct trace_entry *entry = iter->ent;
>> +     struct trace_seq *s = &iter->seq;
>> +     struct bprintk_entry *field;
>> +
>> +     trace_assign_type(field, entry);
>> +
>> +     if (!trace_seq_printf(s, ": %lx : ", field->ip))
>> +             goto partial;
>> +
>> +     if (!trace_seq_bprintf(s, field->fmt, field->buf))
>> +             goto partial;
>> +
>> +     return TRACE_TYPE_HANDLED;
>> +
>> + partial:
>> +     return TRACE_TYPE_PARTIAL_LINE;
>> +}
>> +
>> +static struct trace_event trace_bprintk_event = {
>> +     .type           = TRACE_BPRINTK,
>> +     .trace          = trace_bprintk_print,
>> +     .latency_trace  = trace_bprintk_print,
>> +     .raw            = trace_bprintk_raw,
>> +     .hex            = trace_nop_print,
>> +     .binary         = trace_nop_print,
>> +};
>> +
>>  static struct trace_event *events[] __initdata = {
>>       &trace_fn_event,
>>       &trace_ctx_event,
>> @@ -895,6 +970,7 @@ static struct trace_event *events[] __initdata = {
>>       &trace_stack_event,
>>       &trace_user_stack_event,
>>       &trace_print_event,
>> +     &trace_bprintk_event,
>>       NULL
>>  };
>>
>> --
>> 1.6.1
>>
>>
>>
>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 3/5] ftrace: add ftrace_bprintk()
  2009-03-02 16:34                           ` Steven Rostedt
@ 2009-03-02 17:45                             ` Frédéric Weisbecker
  2009-03-02 17:53                               ` Steven Rostedt
  0 siblings, 1 reply; 44+ messages in thread
From: Frédéric Weisbecker @ 2009-03-02 17:45 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Ingo Molnar, Andrew Morton, Lai Jiangshan, Linus Torvalds,
	Peter Zijlstra, linux-kernel

2009/3/2 Steven Rostedt <rostedt@goodmis.org>:
>
> On Sat, 28 Feb 2009, Frederic Weisbecker wrote:
>
>> From: Lai Jiangshan <laijs@cn.fujitsu.com>
>>
>> Impact: Add a generic printk() for ftrace, like ftrace_printk()
>>
>> ftrace_bprintk() use the infrastructure to record events on ring_buffer.
>>
>> [fweisbec@gmail.com: ported to latest -tip and made it work if !CONFIG_MODULES]
>>
>> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
>> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
>> ---
>>  include/asm-generic/vmlinux.lds.h |    3 +
>>  include/linux/ftrace.h            |   21 ++++++
>>  include/linux/module.h            |    5 ++
>>  kernel/module.c                   |    6 ++
>>  kernel/trace/trace.c              |   15 ++++
>>  kernel/trace/trace_bprintk.c      |  132 ++++++++++++++++++++++++++++++++++++-
>>  6 files changed, 181 insertions(+), 1 deletions(-)
>>
>> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
>> index 9d97491..13f20f4 100644
>> --- a/include/asm-generic/vmlinux.lds.h
>> +++ b/include/asm-generic/vmlinux.lds.h
>> @@ -100,6 +100,9 @@
>>               *(__vermagic)           /* Kernel version magic */      \
>>               *(__markers_strings)    /* Markers: strings */          \
>>               *(__tracepoints_strings)/* Tracepoints: strings */      \
>> +             VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .;        \
>> +             *(__trace_printk_fmt)   /* Trace_printk fmt' pointer */ \
>> +             VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .;         \
>
> If this can only be done when tracing is enabled, I would wrap that up
> in a macro that is a nop when tracing is disabled.


Ah, good catch.


>>       }                                                               \
>>                                                                       \
>>       .rodata1          : AT(ADDR(.rodata1) - LOAD_OFFSET) {          \
>> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
>> index 8ac2fac..f82b986 100644
>> --- a/include/linux/ftrace.h
>> +++ b/include/linux/ftrace.h
>> @@ -224,6 +224,27 @@ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>>
>>  #ifdef CONFIG_TRACE_BPRINTK
>>  extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
>> +extern int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
>> +             __attribute__ ((format (printf, 2, 3)));
>> +
>> +static inline void  ____ftrace_bprintk_check_format(const char *fmt, ...)
>> +             __attribute__ ((format (printf, 1, 2)));
>> +static inline void ____ftrace_bprintk_check_format(const char *fmt, ...) {}
>> +#define __ftrace_bprintk_check_format(fmt, args...)                  \
>> +do {                                                                 \
>> +     if (0)                                                          \
>> +             ____ftrace_bprintk_check_format(fmt, ##args);           \
>> +} while (0)
>> +
>> +#define ftrace_bprintk(fmt, args...)                                 \
>> +do {                                                                 \
>> +     static char *__attribute__((section("__trace_bprintk_fmt")))    \
>> +                     ftrace_bprintk_fmt = fmt;                       \
>> +     __ftrace_bprintk_check_format(fmt, ##args);                     \
>> +     __ftrace_bprintk(_THIS_IP_, ftrace_bprintk_fmt, ##args);        \
>> +} while (0)
>> +#else
>> +#define ftrace_bprintk ftrace_printk
>>  #endif
>>
>>  /* May be defined in arch */
>> diff --git a/include/linux/module.h b/include/linux/module.h
>> index 07b1cb4..c4f2845 100644
>> --- a/include/linux/module.h
>> +++ b/include/linux/module.h
>> @@ -347,6 +347,11 @@ struct module
>>       unsigned int num_tracepoints;
>>  #endif
>>
>> +#ifdef CONFIG_TRACE_BPRINTK
>> +     const char **trace_bprintk_fmt_start;
>> +     unsigned int num_trace_bprintk_fmt;
>> +#endif
>> +
>>  #ifdef CONFIG_MODULE_UNLOAD
>>       /* What modules depend on me? */
>>       struct list_head modules_which_use_me;
>> diff --git a/kernel/module.c b/kernel/module.c
>> index 90a6d63..fa96909 100644
>> --- a/kernel/module.c
>> +++ b/kernel/module.c
>> @@ -2192,6 +2192,12 @@ static noinline struct module *load_module(void __user *umod,
>>                                       &mod->num_tracepoints);
>>  #endif
>>
>> +#ifdef CONFIG_TRACE_BPRINTK
>> +     mod->trace_bprintk_fmt_start = section_objs(hdr, sechdrs, secstrings,
>> +                     "__trace_bprintk_fmt", sizeof(char *),
>> +                     &mod->num_trace_bprintk_fmt);
>> +#endif
>> +
>>  #ifdef CONFIG_MODVERSIONS
>>       if ((mod->num_syms && !mod->crcs)
>>           || (mod->num_gpl_syms && !mod->gpl_crcs)
>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>> index 3a3ae19..0e30dce 100644
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -3627,6 +3627,21 @@ out:
>>  }
>>  EXPORT_SYMBOL_GPL(trace_vbprintk);
>>
>> +int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
>> +{
>> +     int ret;
>> +     va_list ap;
>> +
>> +     if (!fmt)
>> +             return 0;
>> +
>> +     va_start(ap, fmt);
>> +     ret = trace_vbprintk(ip, fmt, ap);
>> +     va_end(ap);
>> +     return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(__ftrace_bprintk);
>> +
>>  static int trace_panic_handler(struct notifier_block *this,
>>                              unsigned long event, void *unused)
>>  {
>> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
>> index 1f8e532..28e6286 100644
>> --- a/kernel/trace/trace_bprintk.c
>> +++ b/kernel/trace/trace_bprintk.c
>> @@ -19,9 +19,23 @@
>>
>>  #include "trace.h"
>>
>> +#ifdef CONFIG_MODULE> +
>>  /* binary printk basic */
>>  static DEFINE_MUTEX(btrace_mutex);
>>  static int btrace_metadata_count;
>> +/*
>> + * modules ftrace_bprintk()'s formats are autosaved in struct trace_bprintk_fmt
>> + * which are queued on trace_bprintk_fmt_list.
>> + */
>> +static LIST_HEAD(trace_bprintk_fmt_list);
>> +
>> +struct trace_bprintk_fmt {
>> +     struct list_head list;
>> +     int count;
>> +     char fmt[0];
>> +};
>> +
>>
>>  static inline void lock_btrace(void)
>>  {
>> @@ -33,6 +47,93 @@ static inline void unlock_btrace(void)
>>       mutex_unlock(&btrace_mutex);
>>  }
>>
>> +/*
>> + * If trace_bprintk() is not active, we release the trace_bprintk's
>> + * formats that no modules has reference to.
>> + */
>> +static inline void shrink_trace_bprintk_fmt(void)
>> +{
>> +     struct trace_bprintk_fmt *pos, *next;
>> +     list_for_each_entry_safe(pos, next, &trace_bprintk_fmt_list, list) {
>> +             if (!pos->count) {
>> +                     list_del(&pos->list);
>> +                     kfree(pos);
>> +             }
>> +     }
>> +}
>> +
>> +static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
>> +{
>> +     struct trace_bprintk_fmt *pos;
>> +     list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
>> +             if (!strcmp(pos->fmt, fmt))
>> +                     return pos;
>> +     }
>> +     return NULL;
>> +}
>> +
>> +static
>> +void hold_module_trace_bprintk_format(const char **start, const char **end)
>> +{
>> +     const char **iter;
>> +     lock_btrace();
>> +     for (iter = start; iter < end; iter++) {
>> +             struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
>> +             if (tb_fmt) {
>> +                     tb_fmt->count++;
>> +                     *iter = tb_fmt->fmt;
>> +                     continue;
>> +             }
>> +
>> +             tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt)
>> +                             + strlen(*iter) + 1, GFP_KERNEL);
>> +             if (tb_fmt) {
>> +                     list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
>> +                     tb_fmt->count = 1;
>> +                     strcpy(tb_fmt->fmt, *iter);
>> +                     *iter = tb_fmt->fmt;
>> +             } else
>> +                     *iter = NULL;
>> +     }
>> +     unlock_btrace();
>> +}
>> +
>> +static
>> +void release_module_trace_bprintk_format(const char **start, const char **end)
>> +{
>> +     const char **iter;
>> +     lock_btrace();
>> +     for (iter = start; iter < end; iter++) {
>> +             struct trace_bprintk_fmt *tb_fmt;
>> +             if (!*iter)
>> +                     continue;
>> +
>> +             tb_fmt = container_of(*iter, struct trace_bprintk_fmt, fmt[0]);
>> +             tb_fmt->count--;
>> +             if (!tb_fmt->count && !btrace_metadata_count) {
>> +                     list_del(&tb_fmt->list);
>> +                     kfree(tb_fmt);
>
> Shouldn't *iter get assigned to NULL somewhere here?
>
> -- Steve


Hm, why?

>
>> +             }
>> +     }
>> +     unlock_btrace();
>> +}
>> +
>> +static int module_trace_bprintk_format_notify(struct notifier_block *self,
>> +             unsigned long val, void *data)
>> +{
>> +     struct module *mod = data;
>> +     if (mod->num_trace_bprintk_fmt) {
>> +             const char **start = mod->trace_bprintk_fmt_start;
>> +             const char **end = start + mod->num_trace_bprintk_fmt;
>> +
>> +             if (val == MODULE_STATE_COMING)
>> +                     hold_module_trace_bprintk_format(start, end);
>> +             else if (val == MODULE_STATE_GOING)
>> +                     release_module_trace_bprintk_format(start, end);
>> +     }
>> +     return 0;
>> +}
>> +
>>  static void get_btrace_metadata(void)
>>  {
>>       lock_btrace();
>> @@ -44,9 +145,31 @@ static void put_btrace_metadata(void)
>>  {
>>       lock_btrace();
>>       btrace_metadata_count--;
>> +
>> +     if (!btrace_metadata_count) {
>> +             shrink_trace_bprintk_fmt();
>> +     }
>>       unlock_btrace();
>>  }
>>
>> +#else /* !CONFIG_MODULES */
>> +__init static int
>> +module_trace_bprintk_format_notify(struct notifier_block *self,
>> +             unsigned long val, void *data)
>> +{
>> +     return 0;
>> +}
>> +
>> +static inline void get_btrace_metadata(void) { }
>> +static inline void put_btrace_metadata(void) { }
>> +#endif /* CONFIG_MODULES */
>> +
>> +
>> +__initdata_or_module static
>> +struct notifier_block module_trace_bprintk_format_nb = {
>> +     .notifier_call = module_trace_bprintk_format_notify,
>> +};
>> +
>>  /* events tracer */
>>  int trace_bprintk_enable;
>>
>> @@ -81,7 +204,14 @@ static struct tracer bprintk_trace __read_mostly =
>>
>>  static __init int init_bprintk(void)
>>  {
>> -     return register_tracer(&bprintk_trace);
>> +     int ret = register_module_notifier(&module_trace_bprintk_format_nb);
>> +     if (ret)
>> +             return ret;
>> +
>> +     ret = register_tracer(&bprintk_trace);
>> +     if (ret)
>> +             unregister_module_notifier(&module_trace_bprintk_format_nb);
>> +     return ret;
>>  }
>>
>>  device_initcall(init_bprintk);
>> --
>> 1.6.1
>>
>>
>>
>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 4/5] tracing/core: drop the old ftrace_printk  implementation in favour of ftrace_bprintk
  2009-03-02 16:37                           ` Steven Rostedt
@ 2009-03-02 17:47                             ` Frédéric Weisbecker
  0 siblings, 0 replies; 44+ messages in thread
From: Frédéric Weisbecker @ 2009-03-02 17:47 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Ingo Molnar, Andrew Morton, Lai Jiangshan, Linus Torvalds,
	Peter Zijlstra, linux-kernel

2009/3/2 Steven Rostedt <rostedt@goodmis.org>:
>
> On Sat, 28 Feb 2009, Frederic Weisbecker wrote:
>
>> Impact: faster and lighter tracing
>>
>> Now that we have ftrace_bprintk() which is faster and consume lesser
>> memory than ftrace_printk() and has the same purpose, we can now drop
>> the old implementation in favour of the binary one from ftrace_bprintk(),
>> which means we move all the implementation of ftrace_bprintk() to
>> ftrace_printk(), so the Api doesn't change except that we must now use
>> trace_seq_bprintk() to print the TRACE_PRINT entries.
>>
>> Some changes result of this:
>>
>> - Previously, ftrace_bprintk depended of a single tracer and couldn't
>>   work without. This tracer has been dropped and the whole implementation
>>   of ftrace_printk() (like the module formats management) is now integrated
>>   in the tracing core (comes with CONFIG_TRACING), though we keep the file
>>   trace_bprintk (previously trace_bprintk.c) where we can find the module
>
>    ^^^^^^^^^^^^^
>
>  Do you mean trace_printk.c ?

Exact :-)
I will resend a new iteration of this serie to address your reviews.

Thanks!


>
>>   management. Thus we don't overflow trace.c
>>
>> - changes some parts to use trace_seq_bprintk() to print TRACE_PRINT entries.
>>
>> - change a bit ftrace_printk/ftrace_vprintk macros to support non-builtin formats
>>   constants, and fix 'const' qualifiers warnings. But this is all transparent for
>>   developers.
>>
>> - etc...
>>
>> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
>> ---
>>  include/linux/ftrace.h                           |   68 ++++----
>>  include/linux/module.h                           |    2 +-
>>  kernel/trace/Kconfig                             |    7 +-
>>  kernel/trace/Makefile                            |    2 +-
>>  kernel/trace/trace.c                             |  209 +++++++---------------
>>  kernel/trace/trace.h                             |   14 +--
>>  kernel/trace/trace_functions_graph.c             |    6 +-
>>  kernel/trace/trace_mmiotrace.c                   |    9 +-
>>  kernel/trace/trace_output.c                      |   69 +------
>>  kernel/trace/trace_output.h                      |    2 +
>>  kernel/trace/{trace_bprintk.c => trace_printk.c} |  113 ++++--------
>>  11 files changed, 162 insertions(+), 339 deletions(-)
>>
>> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
>> index f82b986..696a09b 100644
>> --- a/include/linux/ftrace.h
>> +++ b/include/linux/ftrace.h
>> @@ -222,31 +222,6 @@ extern int ftrace_make_nop(struct module *mod,
>>   */
>>  extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
>>
>> -#ifdef CONFIG_TRACE_BPRINTK
>> -extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
>> -extern int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
>> -             __attribute__ ((format (printf, 2, 3)));
>> -
>> -static inline void  ____ftrace_bprintk_check_format(const char *fmt, ...)
>> -             __attribute__ ((format (printf, 1, 2)));
>> -static inline void ____ftrace_bprintk_check_format(const char *fmt, ...) {}
>> -#define __ftrace_bprintk_check_format(fmt, args...)                  \
>> -do {                                                                 \
>> -     if (0)                                                          \
>> -             ____ftrace_bprintk_check_format(fmt, ##args);           \
>> -} while (0)
>> -
>> -#define ftrace_bprintk(fmt, args...)                                 \
>> -do {                                                                 \
>> -     static char *__attribute__((section("__trace_bprintk_fmt")))    \
>> -                     ftrace_bprintk_fmt = fmt;                       \
>> -     __ftrace_bprintk_check_format(fmt, ##args);                     \
>> -     __ftrace_bprintk(_THIS_IP_, ftrace_bprintk_fmt, ##args);        \
>> -} while (0)
>> -#else
>> -#define ftrace_bprintk ftrace_printk
>> -#endif
>> -
>>  /* May be defined in arch */
>>  extern int ftrace_arch_read_dyn_info(char *buf, int size);
>>
>> @@ -367,15 +342,40 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
>>   * Please refrain from leaving ftrace_printks scattered around in
>>   * your code.
>>   */
>> -# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt)
>> -extern int
>> -__ftrace_printk(unsigned long ip, const char *fmt, ...)
>> -     __attribute__ ((format (printf, 2, 3)));
>> -# define ftrace_vprintk(fmt, ap) __ftrace_printk(_THIS_IP_, fmt, ap)
>> -extern int
>> -__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
>> +extern int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list args);
>> +extern int __ftrace_printk(unsigned long ip, const char *fmt, ...)
>> +             __attribute__ ((format (printf, 2, 3)));
>> +
>> +static inline void  ____ftrace_printk_check_format(const char *fmt, ...)
>> +             __attribute__ ((format (printf, 1, 2)));
>> +static inline void ____ftrace_printk_check_format(const char *fmt, ...) {}
>> +#define __ftrace_printk_check_format(fmt, args...)                   \
>> +do {                                                                 \
>> +     if (0)                                                          \
>> +             ____ftrace_printk_check_format(fmt, ##args);            \
>> +} while (0)
>> +
>> +#define ftrace_vprintk(fmt, vargs)                                   \
>> +do {                                                                 \
>> +     static const char *ftrace_printk_fmt                            \
>> +     __attribute__((section("__trace_bprintk_fmt")));                \
>> +     ftrace_printk_fmt = fmt;                                        \
>> +     __ftrace_vprintk(_THIS_IP_, ftrace_printk_fmt, vargs);          \
>> +} while (0)
>> +
>> +#define ftrace_printk(fmt, args...)                                  \
>> +do {                                                                 \
>> +     static const char *ftrace_printk_fmt                            \
>> +     __attribute__((section("__trace_bprintk_fmt")));                \
>> +     ftrace_printk_fmt = fmt;                                        \
>> +     __ftrace_printk_check_format(fmt, ##args);                      \
>> +     __ftrace_printk(_THIS_IP_, ftrace_printk_fmt, ##args);          \
>> +} while (0)
>> +
>>  extern void ftrace_dump(void);
>> -#else
>> +
>> +#else /* !CONFIG_TRACING */
>> +
>>  static inline void
>>  ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
>>  static inline int
>> @@ -395,7 +395,7 @@ ftrace_vprintk(const char *fmt, va_list ap)
>>       return 0;
>>  }
>>  static inline void ftrace_dump(void) { }
>> -#endif
>> +#endif /* CONFIG_TRACING */
>>
>>  #ifdef CONFIG_FTRACE_MCOUNT_RECORD
>>  extern void ftrace_init(void);
>> diff --git a/include/linux/module.h b/include/linux/module.h
>> index c4f2845..e418da1 100644
>> --- a/include/linux/module.h
>> +++ b/include/linux/module.h
>> @@ -347,7 +347,7 @@ struct module
>>       unsigned int num_tracepoints;
>>  #endif
>>
>> -#ifdef CONFIG_TRACE_BPRINTK
>> +#ifdef CONFIG_TRACING
>>       const char **trace_bprintk_fmt_start;
>>       unsigned int num_trace_bprintk_fmt;
>>  #endif
>> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
>> index 2e84fc7..52439c8 100644
>> --- a/kernel/trace/Kconfig
>> +++ b/kernel/trace/Kconfig
>> @@ -52,6 +52,7 @@ config TRACING
>>       select STACKTRACE if STACKTRACE_SUPPORT
>>       select TRACEPOINTS
>>       select NOP_TRACER
>> +     select BINARY_PRINTF
>>
>>  menu "Tracers"
>>
>> @@ -86,12 +87,6 @@ config FUNCTION_GRAPH_TRACER
>>         This is done by setting the current return address on the current
>>         task structure into a stack of calls.
>>
>> -config TRACE_BPRINTK
>> -     bool "Binary printk for tracing"
>> -     default y
>> -     depends on TRACING
>> -     select BINARY_PRINTF
>> -
>>  config IRQSOFF_TRACER
>>       bool "Interrupts-off Latency Tracer"
>>       default n
>> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
>> index dba6157..25e243f 100644
>> --- a/kernel/trace/Makefile
>> +++ b/kernel/trace/Makefile
>> @@ -22,7 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o
>>  obj-$(CONFIG_TRACING) += trace_clock.o
>>  obj-$(CONFIG_TRACING) += trace_output.o
>>  obj-$(CONFIG_TRACING) += trace_stat.o
>> -obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
>> +obj-$(CONFIG_TRACING) += trace_printk.o
>>  obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
>>  obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
>>  obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
>> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
>> index 0e30dce..2ad285b 100644
>> --- a/kernel/trace/trace.c
>> +++ b/kernel/trace/trace.c
>> @@ -1171,6 +1171,67 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
>>  }
>>  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>>
>> +
>> +/**
>> + * trace_vprintk - write binary msg to tracing buffer
>> + *
>> + * Caller must insure @fmt are valid when msg is in tracing buffer.
>> + */
>> +int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
>> +{
>> +     static DEFINE_SPINLOCK(trace_buf_lock);
>> +     static u32 trace_buf[TRACE_BUF_SIZE];
>> +
>> +     struct ring_buffer_event *event;
>> +     struct trace_array *tr = &global_trace;
>> +     struct trace_array_cpu *data;
>> +     struct print_entry *entry;
>> +     unsigned long flags;
>> +     int cpu, len = 0, size, pc;
>> +
>> +     if (unlikely(tracing_selftest_running || tracing_disabled))
>> +             return 0;
>> +
>> +     /* Don't pollute graph traces with trace_vprintk internals */
>> +     pause_graph_tracing();
>> +
>> +     pc = preempt_count();
>> +     preempt_disable_notrace();
>> +     cpu = raw_smp_processor_id();
>> +     data = tr->data[cpu];
>> +
>> +     if (unlikely(atomic_read(&data->disabled)))
>> +             goto out;
>> +
>> +     spin_lock_irqsave(&trace_buf_lock, flags);
>> +     len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
>> +
>> +     if (len > TRACE_BUF_SIZE || len < 0)
>> +             goto out_unlock;
>> +
>> +     size = sizeof(*entry) + sizeof(u32) * len;
>> +     event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, flags, pc);
>> +     if (!event)
>> +             goto out_unlock;
>> +     entry = ring_buffer_event_data(event);
>> +     entry->ip                       = ip;
>> +     entry->depth                    = depth;
>> +     entry->fmt                      = fmt;
>> +
>> +     memcpy(entry->buf, trace_buf, sizeof(u32) * len);
>> +     ring_buffer_unlock_commit(tr->buffer, event);
>> +
>> +out_unlock:
>> +     spin_unlock_irqrestore(&trace_buf_lock, flags);
>> +
>> +out:
>> +     preempt_enable_notrace();
>> +     unpause_graph_tracing();
>> +
>> +     return len;
>> +}
>> +EXPORT_SYMBOL_GPL(trace_vprintk);
>> +
>>  enum trace_file_type {
>>       TRACE_FILE_LAT_FMT      = 1,
>>       TRACE_FILE_ANNOTATE     = 2,
>> @@ -1588,7 +1649,7 @@ static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
>>
>>       trace_assign_type(field, entry);
>>
>> -     ret = trace_seq_printf(s, "%s", field->buf);
>> +     ret = trace_seq_bprintf(s, field->fmt, field->buf);
>>       if (!ret)
>>               return TRACE_TYPE_PARTIAL_LINE;
>>
>> @@ -3496,152 +3557,6 @@ static __init int tracer_init_debugfs(void)
>>       return 0;
>>  }
>>
>> -int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
>> -{
>> -     static DEFINE_SPINLOCK(trace_buf_lock);
>> -     static char trace_buf[TRACE_BUF_SIZE];
>> -
>> -     struct ring_buffer_event *event;
>> -     struct trace_array *tr = &global_trace;
>> -     struct trace_array_cpu *data;
>> -     int cpu, len = 0, size, pc;
>> -     struct print_entry *entry;
>> -     unsigned long irq_flags;
>> -
>> -     if (tracing_disabled || tracing_selftest_running)
>> -             return 0;
>> -
>> -     pc = preempt_count();
>> -     preempt_disable_notrace();
>> -     cpu = raw_smp_processor_id();
>> -     data = tr->data[cpu];
>> -
>> -     if (unlikely(atomic_read(&data->disabled)))
>> -             goto out;
>> -
>> -     pause_graph_tracing();
>> -     spin_lock_irqsave(&trace_buf_lock, irq_flags);
>> -     len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
>> -
>> -     len = min(len, TRACE_BUF_SIZE-1);
>> -     trace_buf[len] = 0;
>> -
>> -     size = sizeof(*entry) + len + 1;
>> -     event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
>> -     if (!event)
>> -             goto out_unlock;
>> -     entry = ring_buffer_event_data(event);
>> -     entry->ip                       = ip;
>> -     entry->depth                    = depth;
>> -
>> -     memcpy(&entry->buf, trace_buf, len);
>> -     entry->buf[len] = 0;
>> -     ring_buffer_unlock_commit(tr->buffer, event);
>> -
>> - out_unlock:
>> -     spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
>> -     unpause_graph_tracing();
>> - out:
>> -     preempt_enable_notrace();
>> -
>> -     return len;
>> -}
>> -EXPORT_SYMBOL_GPL(trace_vprintk);
>> -
>> -int __ftrace_printk(unsigned long ip, const char *fmt, ...)
>> -{
>> -     int ret;
>> -     va_list ap;
>> -
>> -     if (!(trace_flags & TRACE_ITER_PRINTK))
>> -             return 0;
>> -
>> -     va_start(ap, fmt);
>> -     ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
>> -     va_end(ap);
>> -     return ret;
>> -}
>> -EXPORT_SYMBOL_GPL(__ftrace_printk);
>> -
>> -int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
>> -{
>> -     if (!(trace_flags & TRACE_ITER_PRINTK))
>> -             return 0;
>> -
>> -     return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
>> -}
>> -EXPORT_SYMBOL_GPL(__ftrace_vprintk);
>> -
>> -/**
>> - * trace_vbprintk - write binary msg to tracing buffer
>> - *
>> - * Caller must insure @fmt are valid when msg is in tracing buffer.
>> - */
>> -int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
>> -{
>> -     static DEFINE_SPINLOCK(trace_buf_lock);
>> -     static u32 trace_buf[TRACE_BUF_SIZE];
>> -
>> -     struct ring_buffer_event *event;
>> -     struct trace_array *tr = &global_trace;
>> -     struct trace_array_cpu *data;
>> -     struct bprintk_entry *entry;
>> -     unsigned long flags;
>> -     int cpu, len = 0, size, pc;
>> -
>> -     if (tracing_disabled || !trace_bprintk_enable)
>> -             return 0;
>> -
>> -     pc = preempt_count();
>> -     preempt_disable_notrace();
>> -     cpu = raw_smp_processor_id();
>> -     data = tr->data[cpu];
>> -
>> -     if (unlikely(atomic_read(&data->disabled)))
>> -             goto out;
>> -
>> -     spin_lock_irqsave(&trace_buf_lock, flags);
>> -     len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
>> -
>> -     if (len > TRACE_BUF_SIZE || len < 0)
>> -             goto out_unlock;
>> -
>> -     size = sizeof(*entry) + sizeof(u32) * len;
>> -     event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc);
>> -     if (!event)
>> -             goto out_unlock;
>> -     entry = ring_buffer_event_data(event);
>> -     entry->ip                       = ip;
>> -     entry->fmt                      = fmt;
>> -
>> -     memcpy(entry->buf, trace_buf, sizeof(u32) * len);
>> -     ring_buffer_unlock_commit(tr->buffer, event);
>> -
>> -out_unlock:
>> -     spin_unlock_irqrestore(&trace_buf_lock, flags);
>> -
>> -out:
>> -     preempt_enable_notrace();
>> -
>> -     return len;
>> -}
>> -EXPORT_SYMBOL_GPL(trace_vbprintk);
>> -
>> -int __ftrace_bprintk(unsigned long ip, const char *fmt, ...)
>> -{
>> -     int ret;
>> -     va_list ap;
>> -
>> -     if (!fmt)
>> -             return 0;
>> -
>> -     va_start(ap, fmt);
>> -     ret = trace_vbprintk(ip, fmt, ap);
>> -     va_end(ap);
>> -     return ret;
>> -}
>> -EXPORT_SYMBOL_GPL(__ftrace_bprintk);
>> -
>>  static int trace_panic_handler(struct notifier_block *this,
>>                              unsigned long event, void *unused)
>>  {
>> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
>> index 92703b1..0ccc678 100644
>> --- a/kernel/trace/trace.h
>> +++ b/kernel/trace/trace.h
>> @@ -20,7 +20,6 @@ enum trace_type {
>>       TRACE_WAKE,
>>       TRACE_STACK,
>>       TRACE_PRINT,
>> -     TRACE_BPRINTK,
>>       TRACE_SPECIAL,
>>       TRACE_MMIO_RW,
>>       TRACE_MMIO_MAP,
>> @@ -120,16 +119,10 @@ struct userstack_entry {
>>   */
>>  struct print_entry {
>>       struct trace_entry      ent;
>> -     unsigned long           ip;
>> +     unsigned long           ip;
>>       int                     depth;
>> -     char                    buf[];
>> -};
>> -
>> -struct bprintk_entry {
>> -     struct trace_entry ent;
>> -     unsigned long ip;
>> -     const char *fmt;
>> -     u32 buf[];
>> +     const char              *fmt;
>> +     u32                     buf[];
>>  };
>>  #ifdef CONFIG_TRACE_BPRINTK
>>  extern int trace_bprintk_enable;
>> @@ -295,7 +288,6 @@ extern void __ftrace_bad_type(void);
>>               IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);   \
>>               IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
>>               IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);   \
>> -             IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
>>               IF_ASSIGN(var, ent, struct special_entry, 0);           \
>>               IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,          \
>>                         TRACE_MMIO_RW);                               \
>> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
>> index 2461732..13e362d 100644
>> --- a/kernel/trace/trace_functions_graph.c
>> +++ b/kernel/trace/trace_functions_graph.c
>> @@ -742,7 +742,11 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
>>               }
>>
>>       /* The comment */
>> -     ret = trace_seq_printf(s, "/* %s", trace->buf);
>> +     ret = trace_seq_printf(s, "/* ");
>> +     if (!ret)
>> +             return TRACE_TYPE_PARTIAL_LINE;
>> +
>> +     ret = trace_seq_bprintf(s, trace->fmt, trace->buf);
>>       if (!ret)
>>               return TRACE_TYPE_PARTIAL_LINE;
>>
>> diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
>> index c401b90..23e346a 100644
>> --- a/kernel/trace/trace_mmiotrace.c
>> +++ b/kernel/trace/trace_mmiotrace.c
>> @@ -254,15 +254,18 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter)
>>  {
>>       struct trace_entry *entry = iter->ent;
>>       struct print_entry *print = (struct print_entry *)entry;
>> -     const char *msg         = print->buf;
>>       struct trace_seq *s     = &iter->seq;
>>       unsigned long long t    = ns2usecs(iter->ts);
>> -     unsigned long usec_rem  = do_div(t, 1000000ULL);
>> +     unsigned long usec_rem  = do_div(t, USEC_PER_SEC);
>>       unsigned secs           = (unsigned long)t;
>>       int ret;
>>
>>       /* The trailing newline must be in the message. */
>> -     ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
>> +     ret = trace_seq_printf(s, "MARK %u.%06lu ", secs, usec_rem);
>> +     if (!ret)
>> +             return TRACE_TYPE_PARTIAL_LINE;
>> +
>> +     ret = trace_seq_bprintf(s, print->fmt, print->buf);
>>       if (!ret)
>>               return TRACE_TYPE_PARTIAL_LINE;
>>
>> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
>> index 7897b5d..a020a9b 100644
>> --- a/kernel/trace/trace_output.c
>> +++ b/kernel/trace/trace_output.c
>> @@ -53,8 +53,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
>>       return len;
>>  }
>>
>> -static int
>> -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
>> +int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
>>  {
>>       int len = (PAGE_SIZE - 1) - s->len;
>>       int ret;
>> @@ -876,50 +875,6 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
>>       if (!seq_print_ip_sym(s, field->ip, flags))
>>               goto partial;
>>
>> -     if (!trace_seq_printf(s, ": %s", field->buf))
>> -             goto partial;
>> -
>> -     return TRACE_TYPE_HANDLED;
>> -
>> - partial:
>> -     return TRACE_TYPE_PARTIAL_LINE;
>> -}
>> -
>> -static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
>> -{
>> -     struct print_entry *field;
>> -
>> -     trace_assign_type(field, iter->ent);
>> -
>> -     if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf))
>> -             goto partial;
>> -
>> -     return TRACE_TYPE_HANDLED;
>> -
>> - partial:
>> -     return TRACE_TYPE_PARTIAL_LINE;
>> -}
>> -
>> -static struct trace_event trace_print_event = {
>> -     .type           = TRACE_PRINT,
>> -     .trace          = trace_print_print,
>> -     .latency_trace  = trace_print_print,
>> -     .raw            = trace_print_raw,
>> -};
>> -
>> -/* TRACE_BPRINTK */
>> -static enum print_line_t
>> -trace_bprintk_print(struct trace_iterator *iter, int flags)
>> -{
>> -     struct trace_entry *entry = iter->ent;
>> -     struct trace_seq *s = &iter->seq;
>> -     struct bprintk_entry *field;
>> -
>> -     trace_assign_type(field, entry);
>> -
>> -     if (!seq_print_ip_sym(s, field->ip, flags))
>> -             goto partial;
>> -
>>       if (!trace_seq_puts(s, ": "))
>>               goto partial;
>>
>> @@ -932,14 +887,13 @@ trace_bprintk_print(struct trace_iterator *iter, int flags)
>>       return TRACE_TYPE_PARTIAL_LINE;
>>  }
>>
>> -static enum print_line_t
>> -trace_bprintk_raw(struct trace_iterator *iter, int flags)
>> +
>> +static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
>>  {
>> -     struct trace_entry *entry = iter->ent;
>> +     struct print_entry *field;
>>       struct trace_seq *s = &iter->seq;
>> -     struct bprintk_entry *field;
>>
>> -     trace_assign_type(field, entry);
>> +     trace_assign_type(field, iter->ent);
>>
>>       if (!trace_seq_printf(s, ": %lx : ", field->ip))
>>               goto partial;
>> @@ -953,13 +907,11 @@ trace_bprintk_raw(struct trace_iterator *iter, int flags)
>>       return TRACE_TYPE_PARTIAL_LINE;
>>  }
>>
>> -static struct trace_event trace_bprintk_event = {
>> -     .type           = TRACE_BPRINTK,
>> -     .trace          = trace_bprintk_print,
>> -     .latency_trace  = trace_bprintk_print,
>> -     .raw            = trace_bprintk_raw,
>> -     .hex            = trace_nop_print,
>> -     .binary         = trace_nop_print,
>> +static struct trace_event trace_print_event = {
>> +     .type           = TRACE_PRINT,
>> +     .trace          = trace_print_print,
>> +     .latency_trace  = trace_print_print,
>> +     .raw            = trace_print_raw,
>>  };
>>
>>  static struct trace_event *events[] __initdata = {
>> @@ -970,7 +922,6 @@ static struct trace_event *events[] __initdata = {
>>       &trace_stack_event,
>>       &trace_user_stack_event,
>>       &trace_print_event,
>> -     &trace_bprintk_event,
>>       NULL
>>  };
>>
>> diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
>> index 551a25a..6d39010 100644
>> --- a/kernel/trace/trace_output.h
>> +++ b/kernel/trace/trace_output.h
>> @@ -19,6 +19,8 @@ struct trace_event {
>>  extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
>>       __attribute__ ((format (printf, 2, 3)));
>>  extern int
>> +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
>> +extern int
>>  seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
>>               unsigned long sym_flags);
>>  extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
>> diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_printk.c
>> similarity index 65%
>> rename from kernel/trace/trace_bprintk.c
>> rename to kernel/trace/trace_printk.c
>> index 28e6286..09f4ace 100644
>> --- a/kernel/trace/trace_bprintk.c
>> +++ b/kernel/trace/trace_printk.c
>> @@ -21,32 +21,21 @@
>>
>>  #ifdef CONFIG_MODULES
>>
>> -/* binary printk basic */
>> -static DEFINE_MUTEX(btrace_mutex);
>> -static int btrace_metadata_count;
>>  /*
>> - * modules ftrace_bprintk()'s formats are autosaved in struct trace_bprintk_fmt
>> + * modules ftrace_printk()'s formats are autosaved in struct trace_bprintk_fmt
>>   * which are queued on trace_bprintk_fmt_list.
>>   */
>>  static LIST_HEAD(trace_bprintk_fmt_list);
>>
>> +/* serialize accesses to trace_bprintk_fmt_list */
>> +static DEFINE_MUTEX(btrace_mutex);
>> +
>>  struct trace_bprintk_fmt {
>>       struct list_head list;
>>       int count;
>>       char fmt[0];
>>  };
>>
>> -
>> -static inline void lock_btrace(void)
>> -{
>> -     mutex_lock(&btrace_mutex);
>> -}
>> -
>> -static inline void unlock_btrace(void)
>> -{
>> -     mutex_unlock(&btrace_mutex);
>> -}
>> -
>
> Heh, OK, you removed it ;-)
>
> -- Steve
>
>>  /*
>>   * If trace_bprintk() is not active, we release the trace_bprintk's
>>   * formats that no modules has reference to.
>> @@ -76,7 +65,8 @@ static
>>  void hold_module_trace_bprintk_format(const char **start, const char **end)
>>  {
>>       const char **iter;
>> -     lock_btrace();
>> +
>> +     mutex_lock(&btrace_mutex);
>>       for (iter = start; iter < end; iter++) {
>>               struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
>>               if (tb_fmt) {
>> @@ -95,14 +85,15 @@ void hold_module_trace_bprintk_format(const char **start, const char **end)
>>               } else
>>                       *iter = NULL;
>>       }
>> -     unlock_btrace();
>> +     mutex_unlock(&btrace_mutex);
>>  }
>>
>>  static
>>  void release_module_trace_bprintk_format(const char **start, const char **end)
>>  {
>>       const char **iter;
>> -     lock_btrace();
>> +
>> +     mutex_lock(&btrace_mutex);
>>       for (iter = start; iter < end; iter++) {
>>               struct trace_bprintk_fmt *tb_fmt;
>>               if (!*iter)
>> @@ -110,12 +101,12 @@ void release_module_trace_bprintk_format(const char **start, const char **end)
>>
>>               tb_fmt = container_of(*iter, struct trace_bprintk_fmt, fmt[0]);
>>               tb_fmt->count--;
>> -             if (!tb_fmt->count && !btrace_metadata_count) {
>> +             if (!tb_fmt->count) {
>>                       list_del(&tb_fmt->list);
>>                       kfree(tb_fmt);
>>               }
>>       }
>> -     unlock_btrace();
>> +     mutex_unlock(&btrace_mutex);
>>  }
>>
>>  static int module_trace_bprintk_format_notify(struct notifier_block *self,
>> @@ -134,24 +125,6 @@ static int module_trace_bprintk_format_notify(struct notifier_block *self,
>>       return 0;
>>  }
>>
>> -static void get_btrace_metadata(void)
>> -{
>> -     lock_btrace();
>> -     btrace_metadata_count++;
>> -     unlock_btrace();
>> -}
>> -
>> -static void put_btrace_metadata(void)
>> -{
>> -     lock_btrace();
>> -     btrace_metadata_count--;
>> -
>> -     if (!btrace_metadata_count) {
>> -             shrink_trace_bprintk_fmt();
>> -     }
>> -     unlock_btrace();
>> -}
>> -
>>  #else /* !CONFIG_MODULES */
>>  __init static int
>>  module_trace_bprintk_format_notify(struct notifier_block *self,
>> @@ -159,9 +132,6 @@ module_trace_bprintk_format_notify(struct notifier_block *self,
>>  {
>>       return 0;
>>  }
>> -
>> -static inline void get_btrace_metadata(void) { }
>> -static inline void put_btrace_metadata(void) { }
>>  #endif /* CONFIG_MODULES */
>>
>>
>> @@ -170,48 +140,39 @@ struct notifier_block module_trace_bprintk_format_nb = {
>>       .notifier_call = module_trace_bprintk_format_notify,
>>  };
>>
>> -/* events tracer */
>> -int trace_bprintk_enable;
>> -
>> -static void start_bprintk_trace(struct trace_array *tr)
>> +int __ftrace_printk(unsigned long ip, const char *fmt, ...)
>>  {
>> -     get_btrace_metadata();
>> -     tracing_reset_online_cpus(tr);
>> -     trace_bprintk_enable = 1;
>> -}
>> +     int ret;
>> +     va_list ap;
>>
>> -static void stop_bprintk_trace(struct trace_array *tr)
>> -{
>> -     trace_bprintk_enable = 0;
>> -     tracing_reset_online_cpus(tr);
>> -     put_btrace_metadata();
>> -}
>> +     if (unlikely(!fmt))
>> +             return 0;
>>
>> -static int init_bprintk_trace(struct trace_array *tr)
>> -{
>> -     start_bprintk_trace(tr);
>> -     return 0;
>> +     if (!(trace_flags & TRACE_ITER_PRINTK))
>> +             return 0;
>> +
>> +     va_start(ap, fmt);
>> +     ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
>> +     va_end(ap);
>> +     return ret;
>>  }
>> +EXPORT_SYMBOL_GPL(__ftrace_printk);
>>
>> -static struct tracer bprintk_trace __read_mostly =
>> +int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
>>  {
>> -     .name        = "events",
>> -     .init        = init_bprintk_trace,
>> -     .reset       = stop_bprintk_trace,
>> -     .start       = start_bprintk_trace,
>> -     .stop        = stop_bprintk_trace,
>> -};
>> +     if (unlikely(!fmt))
>> +             return 0;
>>
>> -static __init int init_bprintk(void)
>> -{
>> -     int ret = register_module_notifier(&module_trace_bprintk_format_nb);
>> -     if (ret)
>> -             return ret;
>> +     if (!(trace_flags & TRACE_ITER_PRINTK))
>> +             return 0;
>>
>> -     ret = register_tracer(&bprintk_trace);
>> -     if (ret)
>> -             unregister_module_notifier(&module_trace_bprintk_format_nb);
>> -     return ret;
>> +     return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
>>  }
>> +EXPORT_SYMBOL_GPL(__ftrace_vprintk);
>> +
>>
>> -device_initcall(init_bprintk);
>> +static __init int init_trace_printk(void)
>> +{
>> +     return register_module_notifier(&module_trace_bprintk_format_nb);
>> +}
>> +early_initcall(init_trace_printk);
>> --
>> 1.6.1
>>
>>
>>
>

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 3/5] ftrace: add ftrace_bprintk()
  2009-03-02 17:45                             ` Frédéric Weisbecker
@ 2009-03-02 17:53                               ` Steven Rostedt
  2009-03-02 18:06                                 ` Frédéric Weisbecker
  0 siblings, 1 reply; 44+ messages in thread
From: Steven Rostedt @ 2009-03-02 17:53 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Ingo Molnar, Andrew Morton, Lai Jiangshan, Linus Torvalds,
	Peter Zijlstra, linux-kernel

[-- Attachment #1: Type: TEXT/PLAIN, Size: 883 bytes --]


On Mon, 2 Mar 2009, Fr?d?ric Weisbecker wrote:
> >> +
> >> +static
> >> +void release_module_trace_bprintk_format(const char **start, const char **end)
> >> +{
> >> +     const char **iter;
> >> +     lock_btrace();
> >> +     for (iter = start; iter < end; iter++) {
> >> +             struct trace_bprintk_fmt *tb_fmt;
> >> +             if (!*iter)
> >> +                     continue;
> >> +
> >> +             tb_fmt = container_of(*iter, struct trace_bprintk_fmt, fmt[0]);
> >> +             tb_fmt->count--;
> >> +             if (!tb_fmt->count && !btrace_metadata_count) {
> >> +                     list_del(&tb_fmt->list);
> >> +                     kfree(tb_fmt);
> >
> > Shouldn't *iter get assigned to NULL somewhere here?
> >
> > -- Steve
> 
> 
> Hm, why?

Well, after we free tb_fmt, the *iter will then point to garbage. Right?

-- Steve

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 3/5] ftrace: add ftrace_bprintk()
  2009-03-02 17:53                               ` Steven Rostedt
@ 2009-03-02 18:06                                 ` Frédéric Weisbecker
  2009-03-02 18:23                                   ` Steven Rostedt
  0 siblings, 1 reply; 44+ messages in thread
From: Frédéric Weisbecker @ 2009-03-02 18:06 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Ingo Molnar, Andrew Morton, Lai Jiangshan, Linus Torvalds,
	Peter Zijlstra, linux-kernel

2009/3/2 Steven Rostedt <rostedt@goodmis.org>:
>
> On Mon, 2 Mar 2009, Fr?d?ric Weisbecker wrote:
>> >> +
>> >> +static
>> >> +void release_module_trace_bprintk_format(const char **start, const char **end)
>> >> +{
>> >> +     const char **iter;
>> >> +     lock_btrace();
>> >> +     for (iter = start; iter < end; iter++) {
>> >> +             struct trace_bprintk_fmt *tb_fmt;
>> >> +             if (!*iter)
>> >> +                     continue;
>> >> +
>> >> +             tb_fmt = container_of(*iter, struct trace_bprintk_fmt, fmt[0]);
>> >> +             tb_fmt->count--;
>> >> +             if (!tb_fmt->count && !btrace_metadata_count) {
>> >> +                     list_del(&tb_fmt->list);
>> >> +                     kfree(tb_fmt);
>> >
>> > Shouldn't *iter get assigned to NULL somewhere here?
>> >
>> > -- Steve
>>
>>
>> Hm, why?
>
> Well, after we free tb_fmt, the *iter will then point to garbage. Right?
>
> -- Steve


Now that you say it, I have some doubts about the possible sites that
can still dereference it
at this point.
I have to review and test it more seriously. I was convinced that the
count field kept track
of all references but now I'm not so sure, there can be still one
pending event that uses it into
the ring buffer, or it can be perhaps in use at the same time it is freed.
We should perhaps use rcu here, will see.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 3/5] ftrace: add ftrace_bprintk()
  2009-03-02 18:06                                 ` Frédéric Weisbecker
@ 2009-03-02 18:23                                   ` Steven Rostedt
  2009-03-02 18:55                                     ` Frédéric Weisbecker
  0 siblings, 1 reply; 44+ messages in thread
From: Steven Rostedt @ 2009-03-02 18:23 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Ingo Molnar, Andrew Morton, Lai Jiangshan, Linus Torvalds,
	Peter Zijlstra, LKML

[-- Attachment #1: Type: TEXT/PLAIN, Size: 2763 bytes --]


On Mon, 2 Mar 2009, Fr?d?ric Weisbecker wrote:

> 2009/3/2 Steven Rostedt <rostedt@goodmis.org>:
> >
> > On Mon, 2 Mar 2009, Fr?d?ric Weisbecker wrote:
> >> >> +
> >> >> +static
> >> >> +void release_module_trace_bprintk_format(const char **start, const char **end)
> >> >> +{
> >> >> +     const char **iter;
> >> >> +     lock_btrace();
> >> >> +     for (iter = start; iter < end; iter++) {
> >> >> +             struct trace_bprintk_fmt *tb_fmt;
> >> >> +             if (!*iter)
> >> >> +                     continue;
> >> >> +
> >> >> +             tb_fmt = container_of(*iter, struct trace_bprintk_fmt, fmt[0]);
> >> >> +             tb_fmt->count--;
> >> >> +             if (!tb_fmt->count && !btrace_metadata_count) {
> >> >> +                     list_del(&tb_fmt->list);
> >> >> +                     kfree(tb_fmt);
> >> >
> >> > Shouldn't *iter get assigned to NULL somewhere here?
> >> >
> >> > -- Steve
> >>
> >>
> >> Hm, why?
> >
> > Well, after we free tb_fmt, the *iter will then point to garbage. Right?
> >
> > -- Steve
> 
> 
> Now that you say it, I have some doubts about the possible sites that
> can still dereference it
> at this point.
> I have to review and test it more seriously. I was convinced that the
> count field kept track
> of all references but now I'm not so sure, there can be still one
> pending event that uses it into
> the ring buffer, or it can be perhaps in use at the same time it is freed.
> We should perhaps use rcu here, will see.
> 

How do you deal with ref counters in the ring buffer? If the ring buffer 
is set to overwrite mode (in which is usually is), then you will never 
know if a print was erased.

I haven't looked too deep into the implementation. But one safe way to
do this, with respect to modules, is the following:

#define ftrace_bprintk(fmt, args...) \
	do { \
		static const char __attribute__((section(ftrace_fmt))\
			*f = fmt; \
		_ftrace_bprintk(&f, args); \
	} while(0)

On output, you can do:

	trace_print_bprintk(...)
	{
		char **f = field->f;

		if (!f)
			trace_seq_printf(s, "MODULE UNLOADED?\n");
		trace_seq_printf(s, *f, ...)

Do you see what I'm doing?

Make the ftrace_printk create a constant pointer to the format instead
of passing in the format. It will istead pass in the address of something
pointing to the format.

Then on module load, you allocate the area and copy in all the ftrace_fmt 
sections.

On module unload, you just NULL out that area. You could probably reuse 
those NULL spots, but you would need some kind of checksum to be added 
such that a new module will be detected on print out.


This is the reason I avoided doing ftrace printk via pointers :-/

-- Steve

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH 3/5] ftrace: add ftrace_bprintk()
  2009-03-02 18:23                                   ` Steven Rostedt
@ 2009-03-02 18:55                                     ` Frédéric Weisbecker
  0 siblings, 0 replies; 44+ messages in thread
From: Frédéric Weisbecker @ 2009-03-02 18:55 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Ingo Molnar, Andrew Morton, Lai Jiangshan, Linus Torvalds,
	Peter Zijlstra, LKML

2009/3/2 Steven Rostedt <rostedt@goodmis.org>:
>
> On Mon, 2 Mar 2009, Fr?d?ric Weisbecker wrote:
>
>> 2009/3/2 Steven Rostedt <rostedt@goodmis.org>:
>> >
>> > On Mon, 2 Mar 2009, Fr?d?ric Weisbecker wrote:
>> >> >> +
>> >> >> +static
>> >> >> +void release_module_trace_bprintk_format(const char **start, const char **end)
>> >> >> +{
>> >> >> +     const char **iter;
>> >> >> +     lock_btrace();
>> >> >> +     for (iter = start; iter < end; iter++) {
>> >> >> +             struct trace_bprintk_fmt *tb_fmt;
>> >> >> +             if (!*iter)
>> >> >> +                     continue;
>> >> >> +
>> >> >> +             tb_fmt = container_of(*iter, struct trace_bprintk_fmt, fmt[0]);
>> >> >> +             tb_fmt->count--;
>> >> >> +             if (!tb_fmt->count && !btrace_metadata_count) {
>> >> >> +                     list_del(&tb_fmt->list);
>> >> >> +                     kfree(tb_fmt);
>> >> >
>> >> > Shouldn't *iter get assigned to NULL somewhere here?
>> >> >
>> >> > -- Steve
>> >>
>> >>
>> >> Hm, why?
>> >
>> > Well, after we free tb_fmt, the *iter will then point to garbage. Right?
>> >
>> > -- Steve
>>
>>
>> Now that you say it, I have some doubts about the possible sites that
>> can still dereference it
>> at this point.
>> I have to review and test it more seriously. I was convinced that the
>> count field kept track
>> of all references but now I'm not so sure, there can be still one
>> pending event that uses it into
>> the ring buffer, or it can be perhaps in use at the same time it is freed.
>> We should perhaps use rcu here, will see.
>>
>
> How do you deal with ref counters in the ring buffer? If the ring buffer
> is set to overwrite mode (in which is usually is), then you will never
> know if a print was erased.


Ah, I didn't think about it.

>
> I haven't looked too deep into the implementation. But one safe way to
> do this, with respect to modules, is the following:
>
> #define ftrace_bprintk(fmt, args...) \
>        do { \
>                static const char __attribute__((section(ftrace_fmt))\
>                        *f = fmt; \
>                _ftrace_bprintk(&f, args); \
>        } while(0)
>
> On output, you can do:
>
>        trace_print_bprintk(...)
>        {
>                char **f = field->f;
>
>                if (!f)
>                        trace_seq_printf(s, "MODULE UNLOADED?\n");
>                trace_seq_printf(s, *f, ...)
>
> Do you see what I'm doing?
>
> Make the ftrace_printk create a constant pointer to the format instead
> of passing in the format. It will istead pass in the address of something
> pointing to the format.
>
> Then on module load, you allocate the area and copy in all the ftrace_fmt
> sections.
>
> On module unload, you just NULL out that area. You could probably reuse
> those NULL spots, but you would need some kind of checksum to be added
> such that a new module will be detected on print out.


Yeah ok, but it seems rather complex.
I think we can still use this list of format pointers but:

- make it a hashlist of format strings
- if a module is loaded we make a copy of each printk formats but
firstly we verify if it already exists into the hashlist. If so, then
only override the format pointer on the module (don't allocate any new
thing)

_when it is unloaded...don't do anything.

So yes, perhaps we will never free these strings, but at least we
avoid duplicated formats on the
list and then somewhat limit the bad consequences.

Hm?

>
> This is the reason I avoided doing ftrace printk via pointers :-/
>
> -- Steve
>

^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2009-03-02 18:56 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-02-24  5:17 [PATCH 1/3] add binary printf Frederic Weisbecker
2009-02-26 13:02 ` Ingo Molnar
2009-02-26 17:05   ` Frederic Weisbecker
2009-02-26 17:43     ` Ingo Molnar
2009-02-26 17:45       ` Frederic Weisbecker
2009-02-26 17:52         ` Ingo Molnar
2009-02-26 18:34           ` Frederic Weisbecker
2009-02-26 18:45             ` Ingo Molnar
2009-02-26 18:52             ` Frederic Weisbecker
2009-02-26 18:56               ` Ingo Molnar
2009-02-26 19:05                 ` Frederic Weisbecker
2009-02-27  6:19                 ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Frederic Weisbecker
2009-02-27  6:46                   ` Andrew Morton
2009-02-27  7:12                     ` Frederic Weisbecker
2009-02-27  7:39                       ` Andrew Morton
2009-02-27  8:32                         ` Ingo Molnar
2009-02-27  8:45                           ` Andrew Morton
2009-02-27  9:35                             ` Ingo Molnar
2009-02-27  8:20                     ` Ingo Molnar
2009-02-27  8:33                       ` Andrew Morton
2009-02-27 15:03                     ` Steven Rostedt
2009-02-28  0:30                       ` Linus Torvalds
2009-02-28  0:39                   ` Linus Torvalds
2009-02-28  8:11                     ` Frederic Weisbecker
2009-02-28  9:13                       ` Ingo Molnar
2009-02-28 19:45                         ` [PATCH 1/5] add binary printf Frederic Weisbecker
2009-02-28 20:16                         ` [PATCH 2/5] ftrace: infrastructure for supporting binary record Frederic Weisbecker
2009-03-02 16:27                           ` Steven Rostedt
2009-03-02 17:39                             ` Frédéric Weisbecker
2009-02-28 21:30                         ` [PATCH 3/5] ftrace: add ftrace_bprintk() Frederic Weisbecker
2009-03-02 16:34                           ` Steven Rostedt
2009-03-02 17:45                             ` Frédéric Weisbecker
2009-03-02 17:53                               ` Steven Rostedt
2009-03-02 18:06                                 ` Frédéric Weisbecker
2009-03-02 18:23                                   ` Steven Rostedt
2009-03-02 18:55                                     ` Frédéric Weisbecker
2009-02-28 22:16                         ` [PATCH 4/5] tracing/core: drop the old ftrace_printk implementation in favour of ftrace_bprintk Frederic Weisbecker
2009-03-02 16:37                           ` Steven Rostedt
2009-03-02 17:47                             ` Frédéric Weisbecker
2009-02-28 23:11                         ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Frederic Weisbecker
2009-03-01  2:34                         ` [PATCH 5/5 v3] " Frederic Weisbecker
2009-03-01  3:31                         ` [PATCH 0/5] Binary ftrace_printk Frederic Weisbecker
2009-02-28 16:54                       ` [PATCH][RFC] vsprintf: unify the format decoding layer for its 3 users Linus Torvalds
2009-02-28 17:18                         ` Frederic Weisbecker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).