All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] trace-cmd/kernel-shark: Use one quick hash algorithm
@ 2019-09-20 15:15 Steven Rostedt
  2019-09-20 15:15 ` [PATCH 1/2] trace-cmd: Make a global tracecmd_quick_hash() instead of a local knuth_hash() Steven Rostedt
  2019-09-20 15:15 ` [PATCH 2/2] kernel-shark: Increase the size of the task hash Steven Rostedt
  0 siblings, 2 replies; 4+ messages in thread
From: Steven Rostedt @ 2019-09-20 15:15 UTC (permalink / raw)
  To: linux-trace-devel; +Cc: Yordan Karadzhov


Instead of having multiple "knuth_hash()" static functions, make one
generic tracecmd_quick_hash() function and use that. It also adds
a "bits" field so that the result can be limited to any power of
2 that is less than 2^32.


Steven Rostedt (VMware) (2):
      trace-cmd: Make a global tracecmd_quick_hash() instead of a local knuth_hash()
      kernel-shark: Increase the size of the task hash

----
 include/trace-cmd/trace-filter-hash.h | 24 ++++++++++++++++++++++++
 kernel-shark/src/libkshark.c          | 18 ++++--------------
 kernel-shark/src/libkshark.h          |  3 ++-
 lib/trace-cmd/trace-filter-hash.c     | 20 +++++---------------
 4 files changed, 35 insertions(+), 30 deletions(-)


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/2] trace-cmd: Make a global tracecmd_quick_hash() instead of a local knuth_hash()
  2019-09-20 15:15 [PATCH 0/2] trace-cmd/kernel-shark: Use one quick hash algorithm Steven Rostedt
@ 2019-09-20 15:15 ` Steven Rostedt
  2019-09-20 15:15 ` [PATCH 2/2] kernel-shark: Increase the size of the task hash Steven Rostedt
  1 sibling, 0 replies; 4+ messages in thread
From: Steven Rostedt @ 2019-09-20 15:15 UTC (permalink / raw)
  To: linux-trace-devel; +Cc: Yordan Karadzhov

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

As the 32 bit Knuth algorith produces the same results as the small
sizes[1], create a single algorithm that takes in a @bits parameter that
will return a masked result. And replace the local version of knuth_hash()
used by the trace-cmd filter code. This will also be used to remove other
copies of knuth_hash().

[1] https://lore.kernel.org/r/20190829114913.5df4ced9@gandalf.local.home

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/trace-cmd/trace-filter-hash.h | 24 ++++++++++++++++++++++++
 lib/trace-cmd/trace-filter-hash.c     | 20 +++++---------------
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/include/trace-cmd/trace-filter-hash.h b/include/trace-cmd/trace-filter-hash.h
index e94bc87d1e1d..4111c41eeb2d 100644
--- a/include/trace-cmd/trace-filter-hash.h
+++ b/include/trace-cmd/trace-filter-hash.h
@@ -19,6 +19,30 @@ struct tracecmd_filter_id {
 	int				count;
 };
 
+/**
+ * tracecmd_quick_hash - A quick (non secured) hash alogirthm
+ * @val: The value to perform the hash on
+ * @bits: The size in bits you need to return
+ *
+ * This is a quick hashing function adapted from Donald E. Knuth's 32
+ * bit multiplicative hash.  See The Art of Computer Programming (TAOCP).
+ * Multiplication by the Prime number, closest to the golden ratio of
+ * 2^32.
+ *
+ * @bits is used to max the result for use cases that require
+ * a power of 2 return value that is less than 32 bits. Any value
+ * of @bits greater than 31 (or zero), will simply return the full hash on @val.
+ */
+static inline uint32_t tracecmd_quick_hash(uint32_t val, unsigned int bits)
+{
+	val *= UINT32_C(2654435761);
+
+	if (!bits || bits > 31)
+		return val;
+
+	return val & ((1 << bits) - 1);
+}
+
 struct tracecmd_filter_id_item *
   tracecmd_filter_id_find(struct tracecmd_filter_id *hash, int id);
 void tracecmd_filter_id_add(struct tracecmd_filter_id *hash, int id);
diff --git a/lib/trace-cmd/trace-filter-hash.c b/lib/trace-cmd/trace-filter-hash.c
index 45ca68c2959e..f5f0fb09403b 100644
--- a/lib/trace-cmd/trace-filter-hash.c
+++ b/lib/trace-cmd/trace-filter-hash.c
@@ -12,23 +12,13 @@
 
 #include "trace-filter-hash.h"
 
-#define FILTER_HASH_SIZE	256
-
-static inline uint8_t knuth_hash(uint32_t val)
-{
-	/*
-	 * Small table hashing function adapted from Donald E. Knuth's 32 bit
-	 * multiplicative hash.  See The Art of Computer Programming (TAOCP).
-	 * Multiplication by the Prime number, closest to the golden ratio of
-	 * 2^8.
-	 */
-	return UINT8_C(val) * UINT8_C(157);
-}
+#define FILTER_HASH_BITS	8
+#define FILTER_HASH_SIZE	(1 << FILTER_HASH_BITS)
 
 struct tracecmd_filter_id_item *
 tracecmd_filter_id_find(struct tracecmd_filter_id *hash, int id)
 {
-	int key = knuth_hash(id);
+	int key = tracecmd_quick_hash(id, FILTER_HASH_BITS);
 	struct tracecmd_filter_id_item *item = hash->hash[key];
 
 	while (item) {
@@ -42,7 +32,7 @@ tracecmd_filter_id_find(struct tracecmd_filter_id *hash, int id)
 
 void tracecmd_filter_id_add(struct tracecmd_filter_id *hash, int id)
 {
-	int key = knuth_hash(id);
+	int key = tracecmd_quick_hash(id, FILTER_HASH_BITS);
 	struct tracecmd_filter_id_item *item;
 
 	item = calloc(1, sizeof(*item));
@@ -57,7 +47,7 @@ void tracecmd_filter_id_add(struct tracecmd_filter_id *hash, int id)
 
 void tracecmd_filter_id_remove(struct tracecmd_filter_id *hash, int id)
 {
-	int key = knuth_hash(id);
+	int key = tracecmd_quick_hash(id, FILTER_HASH_BITS);
 	struct tracecmd_filter_id_item **next = &hash->hash[key];
 	struct tracecmd_filter_id_item *item;
 
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] kernel-shark: Increase the size of the task hash
  2019-09-20 15:15 [PATCH 0/2] trace-cmd/kernel-shark: Use one quick hash algorithm Steven Rostedt
  2019-09-20 15:15 ` [PATCH 1/2] trace-cmd: Make a global tracecmd_quick_hash() instead of a local knuth_hash() Steven Rostedt
@ 2019-09-20 15:15 ` Steven Rostedt
  2019-09-20 15:47   ` Yordan Karadzhov (VMware)
  1 sibling, 1 reply; 4+ messages in thread
From: Steven Rostedt @ 2019-09-20 15:15 UTC (permalink / raw)
  To: linux-trace-devel; +Cc: Yordan Karadzhov

From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

When loading a data file that contained 100,000s of tasks, using a 256
bucket size hash crippled it. By increasing the hash to 2^16 (65536) it
solves the issue (still small enough not to waste too much memory).

Also switched to the tracecmd_quick_hash() which is basically the same
as the local knuth_hash() function in libkshark.c.

Link: http://lore.kernel.org/linux-trace-devel/20190828140016.3ce1be4f@gandalf.local.home

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel-shark/src/libkshark.c | 18 ++++--------------
 kernel-shark/src/libkshark.h |  3 ++-
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/kernel-shark/src/libkshark.c b/kernel-shark/src/libkshark.c
index 4207ae6ffdb2..a36157835ce0 100644
--- a/kernel-shark/src/libkshark.c
+++ b/kernel-shark/src/libkshark.c
@@ -252,19 +252,8 @@ void kshark_free(struct kshark_context *kshark_ctx)
 	free(kshark_ctx);
 }
 
-static inline uint8_t knuth_hash(uint32_t val)
-{
-	/*
-	 * Small table hashing function adapted from Donald E. Knuth's 32 bit
-	 * multiplicative hash.  See The Art of Computer Programming (TAOCP).
-	 * Multiplication by the Prime number, closest to the golden ratio of
-	 * 2^8.
-	 */
-	return UINT8_C(val) * UINT8_C(157);
-}
-
 static struct kshark_task_list *
-kshark_find_task(struct kshark_context *kshark_ctx, uint8_t key, int pid)
+kshark_find_task(struct kshark_context *kshark_ctx, uint32_t key, int pid)
 {
 	struct kshark_task_list *list;
 
@@ -280,9 +269,10 @@ static struct kshark_task_list *
 kshark_add_task(struct kshark_context *kshark_ctx, int pid)
 {
 	struct kshark_task_list *list;
-	uint8_t key;
+	uint32_t key;
+
+	key = tracecmd_quick_hash(pid, KS_TASK_HASH_SHIFT);
 
-	key = knuth_hash(pid);
 	list = kshark_find_task(kshark_ctx, key, pid);
 	if (list)
 		return list;
diff --git a/kernel-shark/src/libkshark.h b/kernel-shark/src/libkshark.h
index 04e9cbfc71df..3407db197320 100644
--- a/kernel-shark/src/libkshark.h
+++ b/kernel-shark/src/libkshark.h
@@ -72,7 +72,8 @@ struct kshark_entry {
 };
 
 /** Size of the task's hash table. */
-#define KS_TASK_HASH_SIZE 256
+#define KS_TASK_HASH_SHIFT 16
+#define KS_TASK_HASH_SIZE (1 << KS_TASK_HASH_SHIFT)
 
 /** Linked list of tasks. */
 struct kshark_task_list {
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2] kernel-shark: Increase the size of the task hash
  2019-09-20 15:15 ` [PATCH 2/2] kernel-shark: Increase the size of the task hash Steven Rostedt
@ 2019-09-20 15:47   ` Yordan Karadzhov (VMware)
  0 siblings, 0 replies; 4+ messages in thread
From: Yordan Karadzhov (VMware) @ 2019-09-20 15:47 UTC (permalink / raw)
  To: Steven Rostedt, linux-trace-devel



On 20.09.19 г. 18:15 ч., Steven Rostedt wrote:
> From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> 
> When loading a data file that contained 100,000s of tasks, using a 256
> bucket size hash crippled it. By increasing the hash to 2^16 (65536) it
> solves the issue (still small enough not to waste too much memory).
> 
> Also switched to the tracecmd_quick_hash() which is basically the same
> as the local knuth_hash() function in libkshark.c.
> 
> Link: http://lore.kernel.org/linux-trace-devel/20190828140016.3ce1be4f@gandalf.local.home
> 
> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> ---
>   kernel-shark/src/libkshark.c | 18 ++++--------------
>   kernel-shark/src/libkshark.h |  3 ++-
>   2 files changed, 6 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel-shark/src/libkshark.c b/kernel-shark/src/libkshark.c
> index 4207ae6ffdb2..a36157835ce0 100644
> --- a/kernel-shark/src/libkshark.c
> +++ b/kernel-shark/src/libkshark.c
> @@ -252,19 +252,8 @@ void kshark_free(struct kshark_context *kshark_ctx)
>   	free(kshark_ctx);
>   }
>   
> -static inline uint8_t knuth_hash(uint32_t val)
> -{
> -	/*
> -	 * Small table hashing function adapted from Donald E. Knuth's 32 bit
> -	 * multiplicative hash.  See The Art of Computer Programming (TAOCP).
> -	 * Multiplication by the Prime number, closest to the golden ratio of
> -	 * 2^8.
> -	 */
> -	return UINT8_C(val) * UINT8_C(157);
> -}
> -
>   static struct kshark_task_list *
> -kshark_find_task(struct kshark_context *kshark_ctx, uint8_t key, int pid)
> +kshark_find_task(struct kshark_context *kshark_ctx, uint32_t key, int pid)
>   {
>   	struct kshark_task_list *list;
>   
> @@ -280,9 +269,10 @@ static struct kshark_task_list *
>   kshark_add_task(struct kshark_context *kshark_ctx, int pid)
>   {
>   	struct kshark_task_list *list;
> -	uint8_t key;
> +	uint32_t key;
> +
> +	key = tracecmd_quick_hash(pid, KS_TASK_HASH_SHIFT);
>   
> -	key = knuth_hash(pid);
>   	list = kshark_find_task(kshark_ctx, key, pid);
>   	if (list)
>   		return list;
> diff --git a/kernel-shark/src/libkshark.h b/kernel-shark/src/libkshark.h
> index 04e9cbfc71df..3407db197320 100644
> --- a/kernel-shark/src/libkshark.h
> +++ b/kernel-shark/src/libkshark.h
> @@ -72,7 +72,8 @@ struct kshark_entry {
>   };
>   
>   /** Size of the task's hash table. */
> -#define KS_TASK_HASH_SIZE 256
> +#define KS_TASK_HASH_SHIFT 16
> +#define KS_TASK_HASH_SIZE (1 << KS_TASK_HASH_SHIFT)
>   
>   /** Linked list of tasks. */
>   struct kshark_task_list {
> 

Both patches look good to me.
Thanks!


Reviewed-by: Yordan Karadzhov (VMware) <y.karadz@gmail.com>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-09-20 15:47 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-20 15:15 [PATCH 0/2] trace-cmd/kernel-shark: Use one quick hash algorithm Steven Rostedt
2019-09-20 15:15 ` [PATCH 1/2] trace-cmd: Make a global tracecmd_quick_hash() instead of a local knuth_hash() Steven Rostedt
2019-09-20 15:15 ` [PATCH 2/2] kernel-shark: Increase the size of the task hash Steven Rostedt
2019-09-20 15:47   ` Yordan Karadzhov (VMware)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.