All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] jump_label,x86: use text_poke_smp_batch for entries update
@ 2011-04-28 20:52 Jiri Olsa
  2011-04-29 15:15 ` Jason Baron
  2011-05-04  9:41 ` [PATCHv2 0/2] jump_label,x86: make batch update of jump_label entries Jiri Olsa
  0 siblings, 2 replies; 10+ messages in thread
From: Jiri Olsa @ 2011-04-28 20:52 UTC (permalink / raw)
  To: jbaron, rostedt, mingo; +Cc: linux-kernel, Jiri Olsa

Changing the jump label update code to use batch processing
for x86 architectures. 

Currently each jump label update calls text_poke_smp for each
jump label key entry. Thus one key update ends up calling stop
machine multiple times.

This patch is using text_poke_smp_batch, which is called for
all the key's entries.  Ensuring the stop machine is called
only once.

The highest entries count for a single key I found was 20,
thus I set MAX_POKE_PARAMS to 30.

I added arch_jump_label_transform_key function with weak definition
to update all the entries for single key.

Architectures, that will do the batch update in the future can
overload this function as x86 does in the patch. For now they
can stay in current state with no further changes.

I tested this on x86 and s390 archs.

wrb,
jirka


Signed-off-by: Jiri Olsa <jolsa@redhat.com>
---
 arch/x86/kernel/jump_label.c |   69 ++++++++++++++++++++++++++++++++++--------
 include/linux/jump_label.h   |    2 +
 kernel/jump_label.c          |   16 ++++++++-
 3 files changed, 72 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 3fee346..4024c67 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -24,22 +24,65 @@ union jump_code_union {
 	} __attribute__((packed));
 };
 
-void arch_jump_label_transform(struct jump_entry *entry,
-			       enum jump_label_type type)
+struct text_poke_buffer {
+	u8 buf[JUMP_LABEL_NOP_SIZE];
+};
+
+#define MAX_POKE_PARAMS 30
+static struct text_poke_param  poke_params[MAX_POKE_PARAMS];
+static struct text_poke_buffer poke_buffers[MAX_POKE_PARAMS];
+
+static void setup_poke_param(struct text_poke_param *param, u8 *buf,
+			     int enable,
+			     struct jump_entry *entry)
 {
-	union jump_code_union code;
+	union jump_code_union *code = (union jump_code_union *) buf;
 
-	if (type == JUMP_LABEL_ENABLE) {
-		code.jump = 0xe9;
-		code.offset = entry->target -
-				(entry->code + JUMP_LABEL_NOP_SIZE);
+	if (enable == JUMP_LABEL_ENABLE) {
+		code->jump = 0xe9;
+		code->offset = entry->target -
+			       (entry->code + JUMP_LABEL_NOP_SIZE);
 	} else
-		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
-	get_online_cpus();
-	mutex_lock(&text_mutex);
-	text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
-	mutex_unlock(&text_mutex);
-	put_online_cpus();
+		memcpy(code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+
+	param->addr = (void *) entry->code;
+	param->opcode = code;
+	param->len = JUMP_LABEL_NOP_SIZE;
+}
+
+void arch_jump_label_transform_key(struct jump_label_key *key,
+				   struct jump_entry *entry, int enable)
+{
+	int count;
+
+	do {
+		for (count = 0;
+		     (entry->key == (jump_label_t)(unsigned long) key) &&
+		     (count < MAX_POKE_PARAMS);
+		     entry++) {
+
+			/*
+			 * The entry->code set to 0 invalidates module init text
+			 * sections (see jump_label_invalidate_module_init).
+			 */
+			if (!entry->code || !kernel_text_address(entry->code))
+				continue;
+
+			setup_poke_param(&poke_params[count],
+					 poke_buffers[count].buf,
+					 enable, entry);
+			count++;
+		}
+
+		if (count) {
+			get_online_cpus();
+			mutex_lock(&text_mutex);
+			text_poke_smp_batch(poke_params, count);
+			mutex_unlock(&text_mutex);
+			put_online_cpus();
+		}
+
+	} while (count == MAX_POKE_PARAMS);
 }
 
 void arch_jump_label_text_poke_early(jump_label_t addr)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 83e745f..f8afc3e 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -43,6 +43,8 @@ extern struct jump_entry __stop___jump_table[];
 
 extern void jump_label_lock(void);
 extern void jump_label_unlock(void);
+extern void arch_jump_label_transform_key(struct jump_label_key *key,
+				 struct jump_entry *entry, int enable);
 extern void arch_jump_label_transform(struct jump_entry *entry,
 				 enum jump_label_type type);
 extern void arch_jump_label_text_poke_early(jump_label_t addr);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 74d1c09..e2a0a73 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -104,8 +104,14 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
 	return 0;
 }
 
-static void __jump_label_update(struct jump_label_key *key,
-		struct jump_entry *entry, int enable)
+
+void __weak arch_jump_label_transform(struct jump_entry *entry,
+				      enum jump_label_type type)
+{
+}
+
+void __weak arch_jump_label_transform_key(struct jump_label_key *key,
+					  struct jump_entry *entry, int enable)
 {
 	for (; entry->key == (jump_label_t)(unsigned long)key; entry++) {
 		/*
@@ -125,6 +131,12 @@ void __weak arch_jump_label_text_poke_early(jump_label_t addr)
 {
 }
 
+static void __jump_label_update(struct jump_label_key *key,
+		struct jump_entry *entry, int enable)
+{
+	arch_jump_label_transform_key(key, entry, enable);
+}
+
 static __init int jump_label_init(void)
 {
 	struct jump_entry *iter_start = __start___jump_table;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] jump_label,x86: use text_poke_smp_batch for entries update
  2011-04-28 20:52 [PATCH] jump_label,x86: use text_poke_smp_batch for entries update Jiri Olsa
@ 2011-04-29 15:15 ` Jason Baron
  2011-04-29 15:37   ` Jiri Olsa
  2011-05-04  9:41 ` [PATCHv2 0/2] jump_label,x86: make batch update of jump_label entries Jiri Olsa
  1 sibling, 1 reply; 10+ messages in thread
From: Jason Baron @ 2011-04-29 15:15 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: rostedt, mingo, linux-kernel

On Thu, Apr 28, 2011 at 10:52:41PM +0200, Jiri Olsa wrote:
> Changing the jump label update code to use batch processing
> for x86 architectures. 
> 
> Currently each jump label update calls text_poke_smp for each
> jump label key entry. Thus one key update ends up calling stop
> machine multiple times.
> 

cool. Batching the updates is a nice idea.

> This patch is using text_poke_smp_batch, which is called for
> all the key's entries.  Ensuring the stop machine is called
> only once.
> 

Since we walk the modules list per key (calling __jump_label_update()),
this is not always the case. I'm wondering then if maybe what we want to do
is 'queue' each update from the core code. Then, when the queue fills
up, we flush out the queue. In this way, we cover jump labels that might
be in both the core kernel and in modules. This also has the side effect
of moving code such as:

/*
 * The entry->code set to 0 invalidates module
 * init text
 * sections (see
 * jump_label_invalidate_module_init).
 */
if (!entry->code || !kernel_text_address(entry->code))
              continue;


out of the arch code into the generic code, which in my opinion is a bit
cleaner. thoughts?

Thanks,

-Jason

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] jump_label,x86: use text_poke_smp_batch for entries update
  2011-04-29 15:15 ` Jason Baron
@ 2011-04-29 15:37   ` Jiri Olsa
  0 siblings, 0 replies; 10+ messages in thread
From: Jiri Olsa @ 2011-04-29 15:37 UTC (permalink / raw)
  To: Jason Baron; +Cc: rostedt, mingo, linux-kernel

On Fri, Apr 29, 2011 at 11:15:09AM -0400, Jason Baron wrote:
> On Thu, Apr 28, 2011 at 10:52:41PM +0200, Jiri Olsa wrote:
> > Changing the jump label update code to use batch processing
> > for x86 architectures. 
> > 
> > Currently each jump label update calls text_poke_smp for each
> > jump label key entry. Thus one key update ends up calling stop
> > machine multiple times.
> > 
> 
> cool. Batching the updates is a nice idea.
> 
> > This patch is using text_poke_smp_batch, which is called for
> > all the key's entries.  Ensuring the stop machine is called
> > only once.
> > 
> 
> Since we walk the modules list per key (calling __jump_label_update()),
> this is not always the case. I'm wondering then if maybe what we want to do
> is 'queue' each update from the core code. Then, when the queue fills
> up, we flush out the queue. In this way, we cover jump labels that might
> be in both the core kernel and in modules. This also has the side effect
> of moving code such as:
> 
> /*
>  * The entry->code set to 0 invalidates module
>  * init text
>  * sections (see
>  * jump_label_invalidate_module_init).
>  */
> if (!entry->code || !kernel_text_address(entry->code))
>               continue;
> 
> 
> out of the arch code into the generic code, which in my opinion is a bit
> cleaner. thoughts?

nice, we'd safe another few stop machine calls
I'll update the patch

thanks,
jirka

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCHv2 0/2] jump_label,x86: make batch update of jump_label entries
  2011-04-28 20:52 [PATCH] jump_label,x86: use text_poke_smp_batch for entries update Jiri Olsa
  2011-04-29 15:15 ` Jason Baron
@ 2011-05-04  9:41 ` Jiri Olsa
  2011-05-04  9:41   ` [PATCHv2 1/2] jump_label,x86: use text_poke_smp_batch for entries update Jiri Olsa
                     ` (2 more replies)
  1 sibling, 3 replies; 10+ messages in thread
From: Jiri Olsa @ 2011-05-04  9:41 UTC (permalink / raw)
  To: jbaron, rostedt, mingo; +Cc: linux-kernel

hi,

I'm changing the jump label update code to use batch processing
for x86 architectures. 

Currently each jump label update calls text_poke_smp for each
jump label key entry. Thus one key update ends up calling stop
machine multiple times.

This patch is using text_poke_smp_batch, which is called for
all the key's entries. Thus ensuring the stop machine is called
only once per jump_label key.

attached patches:
1/2 - jump_label,x86: use text_poke_smp_batch for entries update
	- added jump_label_update_end function which is paired with
	the key's entries update
	- jump_label_update_end calls arch_jump_label_update_end which
	is overloaded by x86 arch and makes the batch update of all the
	entries queued by arch_jump_label_transform function.

2/2 - jump_label,x86: using static arrays before dynamic allocation is needed
	- in the first patch, the queue array, which stores jump_label
	entries is allocated/resized dynamically.
	- due to the fact that many jump_label entries have low number
	of callers, it seems appropriate to use static sized array
	when the update starts and if needed (in case of high number
	of jump_label entries) allocate/use the dynamic array


Patch 2/2 and could be ommited if the benefit/complexity ratio
would seem too low.. ;)

I tested this on x86 and s390 archs.

v2 changes:
 - queueing all entries for single key and process them
   all at one time

wrb,
jirka
---
 arch/x86/kernel/jump_label.c |  177 +++++++++++++++++++++++++++++++++++++++--
 include/linux/jump_label.h   |    1 +
 kernel/jump_label.c          |   16 ++++-
 3 files changed, 183 insertions(+), 11 deletions(-)

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCHv2 1/2] jump_label,x86: use text_poke_smp_batch for entries update
  2011-05-04  9:41 ` [PATCHv2 0/2] jump_label,x86: make batch update of jump_label entries Jiri Olsa
@ 2011-05-04  9:41   ` Jiri Olsa
  2011-05-04  9:41   ` [PATCHv2 2/2] jump_label,x86: using static arrays before dynamic allocation is needed Jiri Olsa
  2011-05-09 18:38   ` [PATCHv3] jump_label,x86: make batch update of jump_label entries Jiri Olsa
  2 siblings, 0 replies; 10+ messages in thread
From: Jiri Olsa @ 2011-05-04  9:41 UTC (permalink / raw)
  To: jbaron, rostedt, mingo; +Cc: linux-kernel, Jiri Olsa

Changing the jump label update code to use batch processing
for x86 architectures. 

Currently each jump label update calls text_poke_smp for each
jump label key entry. Thus one key update ends up calling stop
machine multiple times.

This patch is using text_poke_smp_batch, which is called for
all the key's entries. Thus ensuring the stop machine is called
only once per jump_label key.

Added jump_label_update_end function which is paired with
the key's entries update.

The jump_label_update_end calls arch_jump_label_update_end
(with generic weak definition) which is overloaded by x86
arch and makes the batch update of all the entries queued
by arch_jump_label_transform function.

Added dynamic array/handling for queueing jump_label entries
with the same key.

Tested this on x86 and s390 archs.


Signed-off-by: Jiri Olsa <jolsa@redhat.com>
---
 arch/x86/kernel/jump_label.c |  128 +++++++++++++++++++++++++++++++++++++++---
 include/linux/jump_label.h   |    1 +
 kernel/jump_label.c          |   16 +++++-
 3 files changed, 134 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 3fee346..7a4dd32 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -11,6 +11,7 @@
 #include <linux/list.h>
 #include <linux/jhash.h>
 #include <linux/cpu.h>
+#include <linux/slab.h>
 #include <asm/kprobes.h>
 #include <asm/alternative.h>
 
@@ -24,24 +25,133 @@ union jump_code_union {
 	} __attribute__((packed));
 };
 
-void arch_jump_label_transform(struct jump_entry *entry,
-			       enum jump_label_type type)
+struct text_poke_buffer {
+	u8 code[JUMP_LABEL_NOP_SIZE];
+};
+
+#define POKE_ALLOC_CNT 30
+
+static struct text_poke_param  *poke_pars;
+static struct text_poke_buffer *poke_bufs;
+static int poke_cnt, poke_size;
+
+static void poke_setup(struct text_poke_param *param, u8 *buf,
+		       int enable,
+		       struct jump_entry *entry)
 {
-	union jump_code_union code;
+	union jump_code_union *code = (union jump_code_union *) buf;
 
-	if (type == JUMP_LABEL_ENABLE) {
-		code.jump = 0xe9;
-		code.offset = entry->target -
-				(entry->code + JUMP_LABEL_NOP_SIZE);
+	if (enable == JUMP_LABEL_ENABLE) {
+		code->jump = 0xe9;
+		code->offset = entry->target -
+			       (entry->code + JUMP_LABEL_NOP_SIZE);
 	} else
-		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+		memcpy(code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+
+	param->addr = (void *) entry->code;
+	param->opcode = code;
+	param->len = JUMP_LABEL_NOP_SIZE;
+}
+
+static int poke_alloc(void)
+{
+	struct text_poke_param  *pars;
+	struct text_poke_buffer *bufs;
+
+	if (poke_cnt % POKE_ALLOC_CNT)
+		return 0;
+
+	poke_size += POKE_ALLOC_CNT;
+
+	pars = krealloc(poke_pars, poke_size * sizeof(*pars),
+			GFP_KERNEL);
+	if (!pars)
+		return -ENOMEM;
+
+	bufs = krealloc(poke_bufs, poke_size * sizeof(*bufs),
+			GFP_KERNEL);
+	if (!bufs) {
+		kfree(pars);
+		return -ENOMEM;
+	}
+
+	poke_pars = pars;
+	poke_bufs = bufs;
+	return 0;
+}
+
+static void poke_free(void)
+{
+	kfree(poke_pars);
+	kfree(poke_bufs);
+
+	poke_cnt = poke_size = 0;
+	poke_pars = NULL;
+	poke_bufs = NULL;
+}
+
+static void poke_process(struct text_poke_param *par, int cnt)
+{
 	get_online_cpus();
 	mutex_lock(&text_mutex);
-	text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+	text_poke_smp_batch(par, cnt);
 	mutex_unlock(&text_mutex);
 	put_online_cpus();
 }
 
+static void poke_end(void)
+{
+	if (!poke_cnt)
+		return;
+
+	poke_process(poke_pars, poke_cnt);
+	poke_free();
+}
+
+static void process_entry(struct jump_entry *entry,
+			     enum jump_label_type enable)
+{
+	struct text_poke_param  par;
+	struct text_poke_buffer buf;
+
+	poke_setup(&par, buf.code, enable, entry);
+	poke_process(&par, 1);
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type enable)
+{
+	struct text_poke_param  *par;
+	struct text_poke_buffer *buf;
+
+	if (poke_alloc()) {
+		/*
+		 * Allocation failed, let's finish what we
+		 * gather so far and process this entry
+		 * separatelly. Next call we try the allocation
+		 * again.
+		 */
+		poke_end();
+		process_entry(entry, enable);
+		return;
+	}
+
+	par = &poke_pars[poke_cnt];
+	buf = &poke_bufs[poke_cnt];
+
+	poke_setup(par, buf->code, enable, entry);
+	poke_cnt++;
+}
+
+/*
+ * Called after arch_jump_label_transform is called for
+ * all entries of a single key.
+ */
+void arch_jump_label_update_end(void)
+{
+	poke_end();
+}
+
 void arch_jump_label_text_poke_early(jump_label_t addr)
 {
 	text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5],
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 83e745f..e7a8fa3 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -46,6 +46,7 @@ extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
 				 enum jump_label_type type);
 extern void arch_jump_label_text_poke_early(jump_label_t addr);
+extern void arch_jump_label_update_end(void);
 extern int jump_label_text_reserved(void *start, void *end);
 extern void jump_label_inc(struct jump_label_key *key);
 extern void jump_label_dec(struct jump_label_key *key);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 74d1c09..6657a37 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -125,6 +125,15 @@ void __weak arch_jump_label_text_poke_early(jump_label_t addr)
 {
 }
 
+void __weak arch_jump_label_update_end(void)
+{
+}
+
+void jump_label_update_end(void)
+{
+	arch_jump_label_update_end();
+}
+
 static __init int jump_label_init(void)
 {
 	struct jump_entry *iter_start = __start___jump_table;
@@ -244,10 +253,11 @@ static int jump_label_add_module(struct module *mod)
 		jlm->next = key->next;
 		key->next = jlm;
 
-		if (jump_label_enabled(key))
+		if (jump_label_enabled(key)) {
 			__jump_label_update(key, iter, JUMP_LABEL_ENABLE);
+			jump_label_update_end();
+		}
 	}
-
 	return 0;
 }
 
@@ -376,6 +386,8 @@ static void jump_label_update(struct jump_label_key *key, int enable)
 #ifdef CONFIG_MODULES
 	__jump_label_mod_update(key, enable);
 #endif
+
+	jump_label_update_end();
 }
 
 #endif
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCHv2 2/2] jump_label,x86: using static arrays before dynamic allocation is needed
  2011-05-04  9:41 ` [PATCHv2 0/2] jump_label,x86: make batch update of jump_label entries Jiri Olsa
  2011-05-04  9:41   ` [PATCHv2 1/2] jump_label,x86: use text_poke_smp_batch for entries update Jiri Olsa
@ 2011-05-04  9:41   ` Jiri Olsa
  2011-05-09 18:38   ` [PATCHv3] jump_label,x86: make batch update of jump_label entries Jiri Olsa
  2 siblings, 0 replies; 10+ messages in thread
From: Jiri Olsa @ 2011-05-04  9:41 UTC (permalink / raw)
  To: jbaron, rostedt, mingo; +Cc: linux-kernel, Jiri Olsa

Originally the queue array, which stores jump_label entries is
allocated/resized dynamically.

Due to the fact that many jump_label entries have low number
of callers, it seems appropriate to use static sized array
when the update starts and if needed (in case of high number
of jump_label entries) allocate/use the dynamic array.

The initial value of POKE_STATIC_CNT is set to 10.
This value is based on entries count for keys distribution
that I've got on Fedora kernel config and my current kernel
config.

Most of the keys have up to 10 entries, with few exceptions.

 # entries   # keys
 ------------------
 Fedora:
     1         201
     2           6
     3           1
     4           1
    82           1
 Mine:
     1         113
     2           6
     3           4
     4           1
     5           1
     8           1
    20           1
    69           1

Tested this on x86 and s390 archs.


Signed-off-by: Jiri Olsa <jolsa@redhat.com>
---
 arch/x86/kernel/jump_label.c |   73 +++++++++++++++++++++++++++++++++++-------
 1 files changed, 61 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 7a4dd32..15f3a46 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -30,9 +30,19 @@ struct text_poke_buffer {
 };
 
 #define POKE_ALLOC_CNT 30
+#define POKE_STATIC_CNT 10
+#define IS_STATIC(cnt) ((cnt) < POKE_STATIC_CNT)
+#define WAS_STATIC(cnt) IS_STATIC(cnt - 1)
 
-static struct text_poke_param  *poke_pars;
-static struct text_poke_buffer *poke_bufs;
+static struct text_poke_param  poke_pars_static[POKE_STATIC_CNT];
+static struct text_poke_buffer poke_bufs_static[POKE_STATIC_CNT];
+
+/*
+ * Initially we start with static array and switch to dynamic
+ * once we reach POKE_STATIC_CNT number of entries
+ */
+static struct text_poke_param  *poke_pars = poke_pars_static;
+static struct text_poke_buffer *poke_bufs = poke_bufs_static;
 static int poke_cnt, poke_size;
 
 static void poke_setup(struct text_poke_param *param, u8 *buf,
@@ -58,7 +68,19 @@ static int poke_alloc(void)
 	struct text_poke_param  *pars;
 	struct text_poke_buffer *bufs;
 
-	if (poke_cnt % POKE_ALLOC_CNT)
+	/* So far, so static.. nothing to allocate. */
+	if (IS_STATIC(poke_cnt))
+		return 0;
+
+	/*
+	 * We just hit dynamic allocation count, so let's go through
+	 * and allocate the first round.
+	 * Otherwise return if we are inside the dynamic allocation.
+	 */
+	if (WAS_STATIC(poke_cnt)) {
+		poke_pars = NULL;
+		poke_bufs = NULL;
+	} else if (poke_cnt % POKE_ALLOC_CNT)
 		return 0;
 
 	poke_size += POKE_ALLOC_CNT;
@@ -66,28 +88,55 @@ static int poke_alloc(void)
 	pars = krealloc(poke_pars, poke_size * sizeof(*pars),
 			GFP_KERNEL);
 	if (!pars)
-		return -ENOMEM;
+		goto out_nomem;
 
 	bufs = krealloc(poke_bufs, poke_size * sizeof(*bufs),
 			GFP_KERNEL);
-	if (!bufs) {
-		kfree(pars);
-		return -ENOMEM;
-	}
+	if (!bufs)
+		goto out_nomem;
 
 	poke_pars = pars;
 	poke_bufs = bufs;
+
+	/*
+	 * If we just started dynamic allocation, copy the static
+	 * contents to new fresh dynamic array.
+	 */
+	if (WAS_STATIC(poke_cnt)) {
+		memcpy(poke_pars, poke_pars_static, sizeof(poke_pars_static));
+		memcpy(poke_bufs, poke_bufs_static, sizeof(poke_bufs_static));
+	}
 	return 0;
+
+ out_nomem:
+	kfree(pars);
+
+	if (WAS_STATIC(poke_cnt)) {
+		poke_pars = poke_pars_static;
+		poke_bufs = poke_bufs_static;
+	}
+	return -ENOMEM;
 }
 
 static void poke_free(void)
 {
-	kfree(poke_pars);
-	kfree(poke_bufs);
+	/*
+	 * Using WAS_STATIC, since poke_cnt was already incremented
+	 * for the last element.
+	 */
+	if (!WAS_STATIC(poke_cnt)) {
+		kfree(poke_pars);
+		kfree(poke_bufs);
+	}
 
 	poke_cnt = poke_size = 0;
-	poke_pars = NULL;
-	poke_bufs = NULL;
+
+	/*
+	 * Going from the start again, initialize to
+	 * static array.
+	 */
+	poke_pars = poke_pars_static;
+	poke_bufs = poke_bufs_static;
 }
 
 static void poke_process(struct text_poke_param *par, int cnt)
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCHv3] jump_label,x86: make batch update of jump_label entries
  2011-05-04  9:41 ` [PATCHv2 0/2] jump_label,x86: make batch update of jump_label entries Jiri Olsa
  2011-05-04  9:41   ` [PATCHv2 1/2] jump_label,x86: use text_poke_smp_batch for entries update Jiri Olsa
  2011-05-04  9:41   ` [PATCHv2 2/2] jump_label,x86: using static arrays before dynamic allocation is needed Jiri Olsa
@ 2011-05-09 18:38   ` Jiri Olsa
  2011-05-09 19:27     ` Jason Baron
  2 siblings, 1 reply; 10+ messages in thread
From: Jiri Olsa @ 2011-05-09 18:38 UTC (permalink / raw)
  To: jbaron, rostedt, mingo; +Cc: linux-kernel

On Wed, May 04, 2011 at 11:41:41AM +0200, Jiri Olsa wrote:
> hi,
> 
> I'm changing the jump label update code to use batch processing
> for x86 architectures. 
> 
> Currently each jump label update calls text_poke_smp for each
> jump label key entry. Thus one key update ends up calling stop
> machine multiple times.
> 
> This patch is using text_poke_smp_batch, which is called for
> all the key's entries. Thus ensuring the stop machine is called
> only once per jump_label key.
> 
> attached patches:
> 1/2 - jump_label,x86: use text_poke_smp_batch for entries update
> 	- added jump_label_update_end function which is paired with
> 	the key's entries update
> 	- jump_label_update_end calls arch_jump_label_update_end which
> 	is overloaded by x86 arch and makes the batch update of all the
> 	entries queued by arch_jump_label_transform function.
> 
> 2/2 - jump_label,x86: using static arrays before dynamic allocation is needed
> 	- in the first patch, the queue array, which stores jump_label
> 	entries is allocated/resized dynamically.
> 	- due to the fact that many jump_label entries have low number
> 	of callers, it seems appropriate to use static sized array
> 	when the update starts and if needed (in case of high number
> 	of jump_label entries) allocate/use the dynamic array
> 
> 
> Patch 2/2 and could be ommited if the benefit/complexity ratio
> would seem too low.. ;)
> 
> I tested this on x86 and s390 archs.
> 
> v2 changes:
>  - queueing all entries for single key and process them
>    all at one time
> 
> wrb,
> jirka
> ---
>  arch/x86/kernel/jump_label.c |  177 +++++++++++++++++++++++++++++++++++++++--
>  include/linux/jump_label.h   |    1 +
>  kernel/jump_label.c          |   16 ++++-
>  3 files changed, 183 insertions(+), 11 deletions(-)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

hi,

I did jump_label entries statistics on allyesconfig kernel
and got following numbers:

callers - keys
      1 - 964
      2 - 28
      3 - 5
      4 - 1
      6 - 1
     11 - 2
     12 - 1
     14 - 2
     17 - 2
     21 - 1
    170 - 1


So the maximum is 170 callers and just for one key,
which is the key used in trace_module_get function.

Jason suggested we might stay with the static way because for most
of the entries the maximum number of callers is up to 21.

I'm attaching the static version for consideration.

wbr,
jirka

---
Changing the jump label update code to use batch processing
for x86 architectures.

Currently each jump label update calls text_poke_smp for each
jump label key entry. Thus one key update ends up calling stop
machine multiple times.

This patch is using text_poke_smp_batch, which is called for
mmultiple entries at a time.

Added jump_label_update_end function which is paired with
the key's entries update.

The jump_label_update_end calls arch_jump_label_update_end
(with generic weak definition) which is overloaded by x86
arch and makes the batch update of all the entries queued
by arch_jump_label_transform function.

The number of entries that can be updated at a single time
is set to 30. This number is based on statistics from allyesconfig
kernel showing most of the keys having upto 30 callers.

callers - keys
      1 - 964
      2 - 28
      3 - 5
      4 - 1
      6 - 1
     11 - 2
     12 - 1
     14 - 2
     17 - 2
     21 - 1
    170 - 1


Signed-off-by: Jiri Olsa <jolsa@redhat.com> 
---
 arch/x86/kernel/jump_label.c |   68 ++++++++++++++++++++++++++++++++++++-----
 include/linux/jump_label.h   |    1 +
 kernel/jump_label.c          |   16 ++++++++-
 3 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 3fee346..bbde5db 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -24,24 +24,74 @@ union jump_code_union {
 	} __attribute__((packed));
 };
 
-void arch_jump_label_transform(struct jump_entry *entry,
-			       enum jump_label_type type)
+struct text_poke_buffer {
+	u8 code[JUMP_LABEL_NOP_SIZE];
+};
+
+#define POKE_CNT_MAX 30
+
+static struct text_poke_param  poke_pars[POKE_CNT_MAX];
+static struct text_poke_buffer poke_bufs[POKE_CNT_MAX];
+static int poke_cnt;
+
+static void poke_setup(struct text_poke_param *param, u8 *buf,
+		       int enable,
+		       struct jump_entry *entry)
 {
-	union jump_code_union code;
+	union jump_code_union *code = (union jump_code_union *) buf;
 
-	if (type == JUMP_LABEL_ENABLE) {
-		code.jump = 0xe9;
-		code.offset = entry->target -
-				(entry->code + JUMP_LABEL_NOP_SIZE);
+	if (enable == JUMP_LABEL_ENABLE) {
+		code->jump = 0xe9;
+		code->offset = entry->target -
+			       (entry->code + JUMP_LABEL_NOP_SIZE);
 	} else
-		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+		memcpy(code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+
+	param->addr = (void *) entry->code;
+	param->opcode = code;
+	param->len = JUMP_LABEL_NOP_SIZE;
+}
+
+static void poke_process(void)
+{
 	get_online_cpus();
 	mutex_lock(&text_mutex);
-	text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+
+	text_poke_smp_batch(poke_pars, poke_cnt);
+	poke_cnt = 0;
+
 	mutex_unlock(&text_mutex);
 	put_online_cpus();
 }
 
+static void poke_end(void)
+{
+	if (!poke_cnt)
+		return;
+
+	poke_process();
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type enable)
+{
+	if (poke_cnt == POKE_CNT_MAX)
+		poke_process();
+
+	poke_setup(&poke_pars[poke_cnt], poke_bufs[poke_cnt].code,
+		   enable, entry);
+	poke_cnt++;
+}
+
+/*
+ * Called after arch_jump_label_transform is called for
+ * all entries of a single key.
+ */
+void arch_jump_label_update_end(void)
+{
+	poke_end();
+}
+
 void arch_jump_label_text_poke_early(jump_label_t addr)
 {
 	text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5],
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 83e745f..e7a8fa3 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -46,6 +46,7 @@ extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
 				 enum jump_label_type type);
 extern void arch_jump_label_text_poke_early(jump_label_t addr);
+extern void arch_jump_label_update_end(void);
 extern int jump_label_text_reserved(void *start, void *end);
 extern void jump_label_inc(struct jump_label_key *key);
 extern void jump_label_dec(struct jump_label_key *key);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 74d1c09..6657a37 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -125,6 +125,15 @@ void __weak arch_jump_label_text_poke_early(jump_label_t addr)
 {
 }
 
+void __weak arch_jump_label_update_end(void)
+{
+}
+
+void jump_label_update_end(void)
+{
+	arch_jump_label_update_end();
+}
+
 static __init int jump_label_init(void)
 {
 	struct jump_entry *iter_start = __start___jump_table;
@@ -244,10 +253,11 @@ static int jump_label_add_module(struct module *mod)
 		jlm->next = key->next;
 		key->next = jlm;
 
-		if (jump_label_enabled(key))
+		if (jump_label_enabled(key)) {
 			__jump_label_update(key, iter, JUMP_LABEL_ENABLE);
+			jump_label_update_end();
+		}
 	}
-
 	return 0;
 }
 
@@ -376,6 +386,8 @@ static void jump_label_update(struct jump_label_key *key, int enable)
 #ifdef CONFIG_MODULES
 	__jump_label_mod_update(key, enable);
 #endif
+
+	jump_label_update_end();
 }
 
 #endif
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCHv3] jump_label,x86: make batch update of jump_label entries
  2011-05-09 18:38   ` [PATCHv3] jump_label,x86: make batch update of jump_label entries Jiri Olsa
@ 2011-05-09 19:27     ` Jason Baron
  2011-05-23 16:45       ` Jiri Olsa
  0 siblings, 1 reply; 10+ messages in thread
From: Jason Baron @ 2011-05-09 19:27 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: rostedt, mingo, linux-kernel

On Mon, May 09, 2011 at 08:38:16PM +0200, Jiri Olsa wrote:
> On Wed, May 04, 2011 at 11:41:41AM +0200, Jiri Olsa wrote:
> > hi,
> > 
> > I'm changing the jump label update code to use batch processing
> > for x86 architectures. 
> > 
> > Currently each jump label update calls text_poke_smp for each
> > jump label key entry. Thus one key update ends up calling stop
> > machine multiple times.
> > 
> > This patch is using text_poke_smp_batch, which is called for
> > all the key's entries. Thus ensuring the stop machine is called
> > only once per jump_label key.
> > 
> > attached patches:
> > 1/2 - jump_label,x86: use text_poke_smp_batch for entries update
> > 	- added jump_label_update_end function which is paired with
> > 	the key's entries update
> > 	- jump_label_update_end calls arch_jump_label_update_end which
> > 	is overloaded by x86 arch and makes the batch update of all the
> > 	entries queued by arch_jump_label_transform function.
> > 
> > 2/2 - jump_label,x86: using static arrays before dynamic allocation is needed
> > 	- in the first patch, the queue array, which stores jump_label
> > 	entries is allocated/resized dynamically.
> > 	- due to the fact that many jump_label entries have low number
> > 	of callers, it seems appropriate to use static sized array
> > 	when the update starts and if needed (in case of high number
> > 	of jump_label entries) allocate/use the dynamic array
> > 
> > 
> > Patch 2/2 and could be ommited if the benefit/complexity ratio
> > would seem too low.. ;)
> > 
> > I tested this on x86 and s390 archs.
> > 
> > v2 changes:
> >  - queueing all entries for single key and process them
> >    all at one time
> > 
> > wrb,
> > jirka
> > ---
> >  arch/x86/kernel/jump_label.c |  177 +++++++++++++++++++++++++++++++++++++++--
> >  include/linux/jump_label.h   |    1 +
> >  kernel/jump_label.c          |   16 ++++-
> >  3 files changed, 183 insertions(+), 11 deletions(-)
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> 
> hi,
> 
> I did jump_label entries statistics on allyesconfig kernel
> and got following numbers:
> 
> callers - keys
>       1 - 964
>       2 - 28
>       3 - 5
>       4 - 1
>       6 - 1
>      11 - 2
>      12 - 1
>      14 - 2
>      17 - 2
>      21 - 1
>     170 - 1
> 
> 
> So the maximum is 170 callers and just for one key,
> which is the key used in trace_module_get function.
> 
> Jason suggested we might stay with the static way because for most
> of the entries the maximum number of callers is up to 21.
> 
> I'm attaching the static version for consideration.
> 
> wbr,
> jirka
> 
> ---
> Changing the jump label update code to use batch processing
> for x86 architectures.
> 
> Currently each jump label update calls text_poke_smp for each
> jump label key entry. Thus one key update ends up calling stop
> machine multiple times.
> 
> This patch is using text_poke_smp_batch, which is called for
> mmultiple entries at a time.
> 
> Added jump_label_update_end function which is paired with
> the key's entries update.
> 
> The jump_label_update_end calls arch_jump_label_update_end
> (with generic weak definition) which is overloaded by x86
> arch and makes the batch update of all the entries queued
> by arch_jump_label_transform function.
> 
> The number of entries that can be updated at a single time
> is set to 30. This number is based on statistics from allyesconfig
> kernel showing most of the keys having upto 30 callers.
> 
> callers - keys
>       1 - 964
>       2 - 28
>       3 - 5
>       4 - 1
>       6 - 1
>      11 - 2
>      12 - 1
>      14 - 2
>      17 - 2
>      21 - 1
>     170 - 1
> 
> 
> Signed-off-by: Jiri Olsa <jolsa@redhat.com> 
> ---
>  arch/x86/kernel/jump_label.c |   68 ++++++++++++++++++++++++++++++++++++-----
>  include/linux/jump_label.h   |    1 +
>  kernel/jump_label.c          |   16 ++++++++-
>  3 files changed, 74 insertions(+), 11 deletions(-)
> 

For me, this version is much simpler (less than half the code size of
the original patch), while being optimal for 1007/1008 of the keys.

Acked-by: Jason Baron <jbaron@redhat.com> 


Thanks.

> diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> index 3fee346..bbde5db 100644
> --- a/arch/x86/kernel/jump_label.c
> +++ b/arch/x86/kernel/jump_label.c
> @@ -24,24 +24,74 @@ union jump_code_union {
>  	} __attribute__((packed));
>  };
>  
> -void arch_jump_label_transform(struct jump_entry *entry,
> -			       enum jump_label_type type)
> +struct text_poke_buffer {
> +	u8 code[JUMP_LABEL_NOP_SIZE];
> +};
> +
> +#define POKE_CNT_MAX 30
> +
> +static struct text_poke_param  poke_pars[POKE_CNT_MAX];
> +static struct text_poke_buffer poke_bufs[POKE_CNT_MAX];
> +static int poke_cnt;
> +
> +static void poke_setup(struct text_poke_param *param, u8 *buf,
> +		       int enable,
> +		       struct jump_entry *entry)
>  {
> -	union jump_code_union code;
> +	union jump_code_union *code = (union jump_code_union *) buf;
>  
> -	if (type == JUMP_LABEL_ENABLE) {
> -		code.jump = 0xe9;
> -		code.offset = entry->target -
> -				(entry->code + JUMP_LABEL_NOP_SIZE);
> +	if (enable == JUMP_LABEL_ENABLE) {
> +		code->jump = 0xe9;
> +		code->offset = entry->target -
> +			       (entry->code + JUMP_LABEL_NOP_SIZE);
>  	} else
> -		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
> +		memcpy(code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
> +
> +	param->addr = (void *) entry->code;
> +	param->opcode = code;
> +	param->len = JUMP_LABEL_NOP_SIZE;
> +}
> +
> +static void poke_process(void)
> +{
>  	get_online_cpus();
>  	mutex_lock(&text_mutex);
> -	text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
> +
> +	text_poke_smp_batch(poke_pars, poke_cnt);
> +	poke_cnt = 0;
> +
>  	mutex_unlock(&text_mutex);
>  	put_online_cpus();
>  }
>  
> +static void poke_end(void)
> +{
> +	if (!poke_cnt)
> +		return;
> +
> +	poke_process();
> +}
> +
> +void arch_jump_label_transform(struct jump_entry *entry,
> +			       enum jump_label_type enable)
> +{
> +	if (poke_cnt == POKE_CNT_MAX)
> +		poke_process();
> +
> +	poke_setup(&poke_pars[poke_cnt], poke_bufs[poke_cnt].code,
> +		   enable, entry);
> +	poke_cnt++;
> +}
> +
> +/*
> + * Called after arch_jump_label_transform is called for
> + * all entries of a single key.
> + */
> +void arch_jump_label_update_end(void)
> +{
> +	poke_end();
> +}
> +
>  void arch_jump_label_text_poke_early(jump_label_t addr)
>  {
>  	text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5],
> diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
> index 83e745f..e7a8fa3 100644
> --- a/include/linux/jump_label.h
> +++ b/include/linux/jump_label.h
> @@ -46,6 +46,7 @@ extern void jump_label_unlock(void);
>  extern void arch_jump_label_transform(struct jump_entry *entry,
>  				 enum jump_label_type type);
>  extern void arch_jump_label_text_poke_early(jump_label_t addr);
> +extern void arch_jump_label_update_end(void);
>  extern int jump_label_text_reserved(void *start, void *end);
>  extern void jump_label_inc(struct jump_label_key *key);
>  extern void jump_label_dec(struct jump_label_key *key);
> diff --git a/kernel/jump_label.c b/kernel/jump_label.c
> index 74d1c09..6657a37 100644
> --- a/kernel/jump_label.c
> +++ b/kernel/jump_label.c
> @@ -125,6 +125,15 @@ void __weak arch_jump_label_text_poke_early(jump_label_t addr)
>  {
>  }
>  
> +void __weak arch_jump_label_update_end(void)
> +{
> +}
> +
> +void jump_label_update_end(void)
> +{
> +	arch_jump_label_update_end();
> +}
> +
>  static __init int jump_label_init(void)
>  {
>  	struct jump_entry *iter_start = __start___jump_table;
> @@ -244,10 +253,11 @@ static int jump_label_add_module(struct module *mod)
>  		jlm->next = key->next;
>  		key->next = jlm;
>  
> -		if (jump_label_enabled(key))
> +		if (jump_label_enabled(key)) {
>  			__jump_label_update(key, iter, JUMP_LABEL_ENABLE);
> +			jump_label_update_end();
> +		}
>  	}
> -
>  	return 0;
>  }
>  
> @@ -376,6 +386,8 @@ static void jump_label_update(struct jump_label_key *key, int enable)
>  #ifdef CONFIG_MODULES
>  	__jump_label_mod_update(key, enable);
>  #endif
> +
> +	jump_label_update_end();
>  }
>  
>  #endif
> -- 
> 1.7.1
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv3] jump_label,x86: make batch update of jump_label entries
  2011-05-09 19:27     ` Jason Baron
@ 2011-05-23 16:45       ` Jiri Olsa
  2011-05-23 20:53         ` Steven Rostedt
  0 siblings, 1 reply; 10+ messages in thread
From: Jiri Olsa @ 2011-05-23 16:45 UTC (permalink / raw)
  To: rostedt; +Cc: Jason Baron, mingo, linux-kernel

hi,
any feedback?

thanks,
jirka

On Mon, May 09, 2011 at 03:27:26PM -0400, Jason Baron wrote:
> On Mon, May 09, 2011 at 08:38:16PM +0200, Jiri Olsa wrote:
> > On Wed, May 04, 2011 at 11:41:41AM +0200, Jiri Olsa wrote:
> > > hi,
> > > 
> > > I'm changing the jump label update code to use batch processing
> > > for x86 architectures. 
> > > 
> > > Currently each jump label update calls text_poke_smp for each
> > > jump label key entry. Thus one key update ends up calling stop
> > > machine multiple times.
> > > 
> > > This patch is using text_poke_smp_batch, which is called for
> > > all the key's entries. Thus ensuring the stop machine is called
> > > only once per jump_label key.
> > > 
> > > attached patches:
> > > 1/2 - jump_label,x86: use text_poke_smp_batch for entries update
> > > 	- added jump_label_update_end function which is paired with
> > > 	the key's entries update
> > > 	- jump_label_update_end calls arch_jump_label_update_end which
> > > 	is overloaded by x86 arch and makes the batch update of all the
> > > 	entries queued by arch_jump_label_transform function.
> > > 
> > > 2/2 - jump_label,x86: using static arrays before dynamic allocation is needed
> > > 	- in the first patch, the queue array, which stores jump_label
> > > 	entries is allocated/resized dynamically.
> > > 	- due to the fact that many jump_label entries have low number
> > > 	of callers, it seems appropriate to use static sized array
> > > 	when the update starts and if needed (in case of high number
> > > 	of jump_label entries) allocate/use the dynamic array
> > > 
> > > 
> > > Patch 2/2 and could be ommited if the benefit/complexity ratio
> > > would seem too low.. ;)
> > > 
> > > I tested this on x86 and s390 archs.
> > > 
> > > v2 changes:
> > >  - queueing all entries for single key and process them
> > >    all at one time
> > > 
> > > wrb,
> > > jirka
> > > ---
> > >  arch/x86/kernel/jump_label.c |  177 +++++++++++++++++++++++++++++++++++++++--
> > >  include/linux/jump_label.h   |    1 +
> > >  kernel/jump_label.c          |   16 ++++-
> > >  3 files changed, 183 insertions(+), 11 deletions(-)
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > Please read the FAQ at  http://www.tux.org/lkml/
> > 
> > hi,
> > 
> > I did jump_label entries statistics on allyesconfig kernel
> > and got following numbers:
> > 
> > callers - keys
> >       1 - 964
> >       2 - 28
> >       3 - 5
> >       4 - 1
> >       6 - 1
> >      11 - 2
> >      12 - 1
> >      14 - 2
> >      17 - 2
> >      21 - 1
> >     170 - 1
> > 
> > 
> > So the maximum is 170 callers and just for one key,
> > which is the key used in trace_module_get function.
> > 
> > Jason suggested we might stay with the static way because for most
> > of the entries the maximum number of callers is up to 21.
> > 
> > I'm attaching the static version for consideration.
> > 
> > wbr,
> > jirka
> > 
> > ---
> > Changing the jump label update code to use batch processing
> > for x86 architectures.
> > 
> > Currently each jump label update calls text_poke_smp for each
> > jump label key entry. Thus one key update ends up calling stop
> > machine multiple times.
> > 
> > This patch is using text_poke_smp_batch, which is called for
> > mmultiple entries at a time.
> > 
> > Added jump_label_update_end function which is paired with
> > the key's entries update.
> > 
> > The jump_label_update_end calls arch_jump_label_update_end
> > (with generic weak definition) which is overloaded by x86
> > arch and makes the batch update of all the entries queued
> > by arch_jump_label_transform function.
> > 
> > The number of entries that can be updated at a single time
> > is set to 30. This number is based on statistics from allyesconfig
> > kernel showing most of the keys having upto 30 callers.
> > 
> > callers - keys
> >       1 - 964
> >       2 - 28
> >       3 - 5
> >       4 - 1
> >       6 - 1
> >      11 - 2
> >      12 - 1
> >      14 - 2
> >      17 - 2
> >      21 - 1
> >     170 - 1
> > 
> > 
> > Signed-off-by: Jiri Olsa <jolsa@redhat.com> 
> > ---
> >  arch/x86/kernel/jump_label.c |   68 ++++++++++++++++++++++++++++++++++++-----
> >  include/linux/jump_label.h   |    1 +
> >  kernel/jump_label.c          |   16 ++++++++-
> >  3 files changed, 74 insertions(+), 11 deletions(-)
> > 
> 
> For me, this version is much simpler (less than half the code size of
> the original patch), while being optimal for 1007/1008 of the keys.
> 
> Acked-by: Jason Baron <jbaron@redhat.com> 
> 
> 
> Thanks.
> 
> > diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
> > index 3fee346..bbde5db 100644
> > --- a/arch/x86/kernel/jump_label.c
> > +++ b/arch/x86/kernel/jump_label.c
> > @@ -24,24 +24,74 @@ union jump_code_union {
> >  	} __attribute__((packed));
> >  };
> >  
> > -void arch_jump_label_transform(struct jump_entry *entry,
> > -			       enum jump_label_type type)
> > +struct text_poke_buffer {
> > +	u8 code[JUMP_LABEL_NOP_SIZE];
> > +};
> > +
> > +#define POKE_CNT_MAX 30
> > +
> > +static struct text_poke_param  poke_pars[POKE_CNT_MAX];
> > +static struct text_poke_buffer poke_bufs[POKE_CNT_MAX];
> > +static int poke_cnt;
> > +
> > +static void poke_setup(struct text_poke_param *param, u8 *buf,
> > +		       int enable,
> > +		       struct jump_entry *entry)
> >  {
> > -	union jump_code_union code;
> > +	union jump_code_union *code = (union jump_code_union *) buf;
> >  
> > -	if (type == JUMP_LABEL_ENABLE) {
> > -		code.jump = 0xe9;
> > -		code.offset = entry->target -
> > -				(entry->code + JUMP_LABEL_NOP_SIZE);
> > +	if (enable == JUMP_LABEL_ENABLE) {
> > +		code->jump = 0xe9;
> > +		code->offset = entry->target -
> > +			       (entry->code + JUMP_LABEL_NOP_SIZE);
> >  	} else
> > -		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
> > +		memcpy(code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
> > +
> > +	param->addr = (void *) entry->code;
> > +	param->opcode = code;
> > +	param->len = JUMP_LABEL_NOP_SIZE;
> > +}
> > +
> > +static void poke_process(void)
> > +{
> >  	get_online_cpus();
> >  	mutex_lock(&text_mutex);
> > -	text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
> > +
> > +	text_poke_smp_batch(poke_pars, poke_cnt);
> > +	poke_cnt = 0;
> > +
> >  	mutex_unlock(&text_mutex);
> >  	put_online_cpus();
> >  }
> >  
> > +static void poke_end(void)
> > +{
> > +	if (!poke_cnt)
> > +		return;
> > +
> > +	poke_process();
> > +}
> > +
> > +void arch_jump_label_transform(struct jump_entry *entry,
> > +			       enum jump_label_type enable)
> > +{
> > +	if (poke_cnt == POKE_CNT_MAX)
> > +		poke_process();
> > +
> > +	poke_setup(&poke_pars[poke_cnt], poke_bufs[poke_cnt].code,
> > +		   enable, entry);
> > +	poke_cnt++;
> > +}
> > +
> > +/*
> > + * Called after arch_jump_label_transform is called for
> > + * all entries of a single key.
> > + */
> > +void arch_jump_label_update_end(void)
> > +{
> > +	poke_end();
> > +}
> > +
> >  void arch_jump_label_text_poke_early(jump_label_t addr)
> >  {
> >  	text_poke_early((void *)addr, ideal_nops[NOP_ATOMIC5],
> > diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
> > index 83e745f..e7a8fa3 100644
> > --- a/include/linux/jump_label.h
> > +++ b/include/linux/jump_label.h
> > @@ -46,6 +46,7 @@ extern void jump_label_unlock(void);
> >  extern void arch_jump_label_transform(struct jump_entry *entry,
> >  				 enum jump_label_type type);
> >  extern void arch_jump_label_text_poke_early(jump_label_t addr);
> > +extern void arch_jump_label_update_end(void);
> >  extern int jump_label_text_reserved(void *start, void *end);
> >  extern void jump_label_inc(struct jump_label_key *key);
> >  extern void jump_label_dec(struct jump_label_key *key);
> > diff --git a/kernel/jump_label.c b/kernel/jump_label.c
> > index 74d1c09..6657a37 100644
> > --- a/kernel/jump_label.c
> > +++ b/kernel/jump_label.c
> > @@ -125,6 +125,15 @@ void __weak arch_jump_label_text_poke_early(jump_label_t addr)
> >  {
> >  }
> >  
> > +void __weak arch_jump_label_update_end(void)
> > +{
> > +}
> > +
> > +void jump_label_update_end(void)
> > +{
> > +	arch_jump_label_update_end();
> > +}
> > +
> >  static __init int jump_label_init(void)
> >  {
> >  	struct jump_entry *iter_start = __start___jump_table;
> > @@ -244,10 +253,11 @@ static int jump_label_add_module(struct module *mod)
> >  		jlm->next = key->next;
> >  		key->next = jlm;
> >  
> > -		if (jump_label_enabled(key))
> > +		if (jump_label_enabled(key)) {
> >  			__jump_label_update(key, iter, JUMP_LABEL_ENABLE);
> > +			jump_label_update_end();
> > +		}
> >  	}
> > -
> >  	return 0;
> >  }
> >  
> > @@ -376,6 +386,8 @@ static void jump_label_update(struct jump_label_key *key, int enable)
> >  #ifdef CONFIG_MODULES
> >  	__jump_label_mod_update(key, enable);
> >  #endif
> > +
> > +	jump_label_update_end();
> >  }
> >  
> >  #endif
> > -- 
> > 1.7.1
> > 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCHv3] jump_label,x86: make batch update of jump_label entries
  2011-05-23 16:45       ` Jiri Olsa
@ 2011-05-23 20:53         ` Steven Rostedt
  0 siblings, 0 replies; 10+ messages in thread
From: Steven Rostedt @ 2011-05-23 20:53 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: Jason Baron, mingo, linux-kernel

On Mon, 2011-05-23 at 18:45 +0200, Jiri Olsa wrote:
> hi,
> any feedback?
> 

Hi Jiri,

I'm just dealing with some bugs people reported related to some of my
work I recently done. I'll start working on this after that. This may
not make it into this merge window, but I'll make sure I push this off
to Ingo for 2.6.41.

Thanks,

-- Steve



^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-05-23 20:53 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-04-28 20:52 [PATCH] jump_label,x86: use text_poke_smp_batch for entries update Jiri Olsa
2011-04-29 15:15 ` Jason Baron
2011-04-29 15:37   ` Jiri Olsa
2011-05-04  9:41 ` [PATCHv2 0/2] jump_label,x86: make batch update of jump_label entries Jiri Olsa
2011-05-04  9:41   ` [PATCHv2 1/2] jump_label,x86: use text_poke_smp_batch for entries update Jiri Olsa
2011-05-04  9:41   ` [PATCHv2 2/2] jump_label,x86: using static arrays before dynamic allocation is needed Jiri Olsa
2011-05-09 18:38   ` [PATCHv3] jump_label,x86: make batch update of jump_label entries Jiri Olsa
2011-05-09 19:27     ` Jason Baron
2011-05-23 16:45       ` Jiri Olsa
2011-05-23 20:53         ` Steven Rostedt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.