All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] hash: fix memcmp function pointer in multi-process environment
@ 2016-03-14  2:16 Dhana Eadala
  2016-03-14  2:30 ` 张伟
                   ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Dhana Eadala @ 2016-03-14  2:16 UTC (permalink / raw)
  To: bruce.richardson, pablo.de.lara.guarch, michael.qiu; +Cc: dev, Dhana Eadala

We found a problem in dpdk-2.2 using under multi-process environment.
Here is the brief description how we are using the dpdk:

We have two processes proc1, proc2 using dpdk. These proc1 and proc2 are
two different compiled binaries.
proc1 is started as primary process and proc2 as secondary process.

proc1:
Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash structure.
As part of this, this api initalized the rte_hash structure and set the
srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1 address space.

proc2:
calls srcHash =  rte_hash_find_existing("src_hash_name").
This function call returns the rte_hash created by proc1.
This srcHash->rte_hash_cmp_eq still points to the address of
memcmp() from proc1 address space.
Later proc2  calls
rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(),
which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
This leads to a crash as h->rte_hash_cmp_eq is an address
from proc1 address space and is invalid address in proc2 address space.

We found, from dpdk documentation, that

"
 The use of function pointers between multiple processes
 running based of different compiled
 binaries is not supported, since the location of a given function
 in one process may be different to
 its location in a second. This prevents the librte_hash library
 from behaving properly as in a  multi-
 threaded instance, since it uses a pointer to the hash function internally.

 To work around this issue, it is recommended that
 multi-process applications perform the hash
 calculations by directly calling the hashing function
 from the code and then using the
 rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions
 instead of the functions which do
 the hashing internally, such as rte_hash_add()/rte_hash_lookup().
"

We did follow the recommended steps by invoking rte_hash_lookup_with_hash().
It was no issue up to and including dpdk-2.0.
In later releases started crashing because rte_hash_cmp_eq is
introduced in dpdk-2.1

We fixed it with the following patch and would like to
submit the patch to dpdk.org.
Patch is created such that, if anyone wanted to use dpdk in
multi-process environment with function pointers not shared, they need to
define RTE_LIB_MP_NO_FUNC_PTR in their Makefile.
Without defining this flag in Makefile, it works as it is now.

Signed-off-by: Dhana Eadala <edreddy@gmail.com>
---
 lib/librte_hash/rte_cuckoo_hash.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index 3e3167c..0946777 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -594,7 +594,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 				prim_bkt->signatures[i].alt == alt_hash) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					prim_bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp(key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				/* Enqueue index of free slot back in the ring. */
 				enqueue_slot_back(h, cached_free_slots, slot_id);
 				/* Update data */
@@ -614,7 +618,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 				sec_bkt->signatures[i].current == alt_hash) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					sec_bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp(key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				/* Enqueue index of free slot back in the ring. */
 				enqueue_slot_back(h, cached_free_slots, slot_id);
 				/* Update data */
@@ -725,7 +733,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 				bkt->signatures[i].sig != NULL_SIGNATURE) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp (key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				if (data != NULL)
 					*data = k->pdata;
 				/*
@@ -748,7 +760,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 				bkt->signatures[i].alt == sig) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp(key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				if (data != NULL)
 					*data = k->pdata;
 				/*
@@ -840,7 +856,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 				bkt->signatures[i].sig != NULL_SIGNATURE) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp(key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				remove_entry(h, bkt, i);
 
 				/*
@@ -863,7 +883,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 				bkt->signatures[i].sig != NULL_SIGNATURE) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp(key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				remove_entry(h, bkt, i);
 
 				/*
@@ -980,7 +1004,11 @@ lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * co
 	unsigned hit;
 	unsigned key_idx;
 
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+	hit = !memcmp(key_slot->key, keys[idx], h->key_len);
+#else
 	hit = !h->rte_hash_cmp_eq(key_slot->key, keys[idx], h->key_len);
+#endif
 	if (data != NULL)
 		data[idx] = key_slot->pdata;
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-14  2:16 [PATCH] hash: fix memcmp function pointer in multi-process environment Dhana Eadala
@ 2016-03-14  2:30 ` 张伟
  2016-03-14  4:38 ` 张伟
  2016-03-22 11:42 ` Thomas Monjalon
  2 siblings, 0 replies; 16+ messages in thread
From: 张伟 @ 2016-03-14  2:30 UTC (permalink / raw)
  To: Dhana Eadala; +Cc: bruce.richardson, pablo.de.lara.guarch, michael.qiu, dev

I met a problem which I used the DPDK hash table for multi processes. One started as primary process and the other as secondary process.


I based on the client and server multiprocess example. My aim is that server creates a hash table, then share it to the client. The client will read and write the hash table, and the server will read the hash table. I use rte_calloc allocate the space for hash table, use memzone tells the client the hash table address.
But once I add an entry into the hash table, calling "lookup" function will have the segment fault. But for the lookup function, I have exactly the same parameters for lookup when the first time calls the lookup.
If I create the hash table inside the client, everything works correctly.
I put pieces of codes for server and client codes related to the hash table. I have spent almost 3 days on this bug. But there is no any clue which can help to solve this bug. If any of you can give some suggestions, I will be appreciated. I post the question into the mail list, but have not yet got any reply.


This problem is that in dpdk multi process - client and server example, dpdk-2.2.0/examples/multi_process/client_server_mp
My aim is that server create a hash table, then share it to client. Client will write the hash  table, server will read the hash table.  I am using dpdk hash table.  What I did is that server create a hash table (table and array entries), return the table address.  I use memzone pass the table address to client.  In client, the second lookup gets segment fault. The system gets crashed.  I will put some related code here. 
create hash table function:

struct onvm_ft*

onvm_ft_create(int cnt, int entry_size) {

        struct rte_hash* hash;

        struct onvm_ft* ft;

        struct rte_hash_parameters ipv4_hash_params = {

            .name = NULL,

            .entries = cnt,

            .key_len = sizeof(struct onvm_ft_ipv4_5tuple),

            .hash_func = NULL,

            .hash_func_init_val = 0,

        };




        char s[64];

        /* create ipv4 hash table. use core number and cycle counter to get a unique name. */

        ipv4_hash_params.name = s;

        ipv4_hash_params.socket_id = rte_socket_id();

        snprintf(s, sizeof(s), "onvm_ft_%d-%"PRIu64, rte_lcore_id(), rte_get_tsc_cycles());

        hash = rte_hash_create(&ipv4_hash_params);

        if (hash == NULL) {

                return NULL;

        }

        ft = (struct onvm_ft*)rte_calloc("table", 1, sizeof(struct onvm_ft), 0);

        if (ft == NULL) {

                rte_hash_free(hash);

                return NULL;

        }

        ft->hash = hash;

        ft->cnt = cnt;

        ft->entry_size = entry_size;

        /* Create data array for storing values */

        ft->data = rte_calloc("entry", cnt, entry_size, 0);

        if (ft->data == NULL) {

                rte_hash_free(hash);

                rte_free(ft);

                return NULL;

        }

        return ft;

}




related structure:

struct onvm_ft {

        struct rte_hash* hash;

        char* data;

        int cnt;

        int entry_size;

};




in server side, I will call the create function, use memzone share it to client. The following is what I do:

related variables:

struct onvm_ft *sdn_ft;

struct onvm_ft **sdn_ft_p;

const struct rte_memzone *mz_ftp;




        sdn_ft = onvm_ft_create(1024, sizeof(struct onvm_flow_entry));

        if(sdn_ft == NULL) {

                rte_exit(EXIT_FAILURE, "Unable to create flow table\n");

        }

        mz_ftp = rte_memzone_reserve(MZ_FTP_INFO, sizeof(struct onvm_ft *),

                                  rte_socket_id(), NO_FLAGS);

        if (mz_ftp == NULL) {

                rte_exit(EXIT_FAILURE, "Canot reserve memory zone for flow table pointer\n");

        }

        memset(mz_ftp->addr, 0, sizeof(struct onvm_ft *));

        sdn_ft_p = mz_ftp->addr;

        *sdn_ft_p = sdn_ft;




In client side:

struct onvm_ft *sdn_ft;

static void

map_flow_table(void) {

        const struct rte_memzone *mz_ftp;

        struct onvm_ft **ftp;




        mz_ftp = rte_memzone_lookup(MZ_FTP_INFO);

        if (mz_ftp == NULL)

                rte_exit(EXIT_FAILURE, "Cannot get flow table pointer\n");

        ftp = mz_ftp->addr;

        sdn_ft = *ftp;

}




The following is my debug message: I set a breakpoint in lookup table line. To narrow down the problem, I just send one flow. So the second time and the first time, the packets are the same.  

For the first time, it works. I print out the parameters: inside the onvm_ft_lookup function, if there is a related entry, it will return the address by flow_entry. 

Breakpoint 1, datapath_handle_read (dp=0x7ffff00008c0) at /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/sdn.c:191

191                                 ret = onvm_ft_lookup(sdn_ft, fk, (char**)&flow_entry);

(gdb) print *sdn_ft 

$1 = {hash = 0x7fff32cce740, data = 0x7fff32cb0480 "", cnt = 1024, entry_size = 56}

(gdb) print *fk

$2 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) s

onvm_ft_lookup (table=0x7fff32cbe4c0, key=0x7fff32b99d00, data=0x7ffff68d2b00) at /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:151

151 softrss = onvm_softrss(key);

(gdb) n

152         printf("software rss %d\n", softrss);

(gdb) 

software rss 403183624

154         tbl_index = rte_hash_lookup_with_hash(table->hash, (const void *)key, softrss);

(gdb) print table->hash

$3 = (struct rte_hash *) 0x7fff32cce740

(gdb) print *key

$4 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) print softrss 

$5 = 403183624

(gdb) c




After I hit c, it will do the second lookup,

Breakpoint 1, datapath_handle_read (dp=0x7ffff00008c0) at /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/sdn.c:191

191                                 ret = onvm_ft_lookup(sdn_ft, fk, (char**)&flow_entry);

(gdb) print *sdn_ft


$7 = {hash = 0x7fff32cce740, data = 0x7fff32cb0480 "", cnt = 1024, entry_size = 56}

(gdb) print *fk

$8 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) s

onvm_ft_lookup (table=0x7fff32cbe4c0, key=0x7fff32b99c00, data=0x7ffff68d2b00) at /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:151

151 softrss = onvm_softrss(key);

(gdb) n

152         printf("software rss %d\n", softrss);

(gdb) n

software rss 403183624

154         tbl_index = rte_hash_lookup_with_hash(table->hash, (const void *)key, softrss);

(gdb) print table->hash

$9 = (struct rte_hash *) 0x7fff32cce740

(gdb) print *key 

$10 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) print softrss

$11 = 403183624

(gdb) n





Program received signal SIGSEGV, Segmentation fault.

0x000000000045fb97 in __rte_hash_lookup_bulk ()

(gdb) bt

#0  0x000000000045fb97 in __rte_hash_lookup_bulk ()

#1  0x0000000000000000 in ?? ()




From the debug message, the parameters are exactly the same. I do not know why it has the segmentation fault. 

my lookup function:

int

onvm_ft_lookup(struct onvm_ft* table, struct onvm_ft_ipv4_5tuple *key, char** data) {

        int32_t tbl_index;

        uint32_t softrss;




        softrss = onvm_softrss(key);

        printf("software rss %d\n", softrss);




        tbl_index = rte_hash_lookup_with_hash(table->hash, (const void *)key, softrss);

        if (tbl_index >= 0) {

                *data = onvm_ft_get_data(table, tbl_index);

                return 0;

        }

        else {

                return tbl_index;

        }

}







At 2016-03-14 10:16:48, "Dhana Eadala" <edreddy@gmail.com> wrote:
>We found a problem in dpdk-2.2 using under multi-process environment.
>Here is the brief description how we are using the dpdk:
>
>We have two processes proc1, proc2 using dpdk. These proc1 and proc2 are
>two different compiled binaries.
>proc1 is started as primary process and proc2 as secondary process.
>
>proc1:
>Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash structure.
>As part of this, this api initalized the rte_hash structure and set the
>srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1 address space.
>
>proc2:
>calls srcHash =  rte_hash_find_existing("src_hash_name").
>This function call returns the rte_hash created by proc1.
>This srcHash->rte_hash_cmp_eq still points to the address of
>memcmp() from proc1 address space.
>Later proc2  calls
>rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
>rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(),
>which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
>This leads to a crash as h->rte_hash_cmp_eq is an address
>from proc1 address space and is invalid address in proc2 address space.
>
>We found, from dpdk documentation, that
>
>"
> The use of function pointers between multiple processes
> running based of different compiled
> binaries is not supported, since the location of a given function
> in one process may be different to
> its location in a second. This prevents the librte_hash library
> from behaving properly as in a  multi-
> threaded instance, since it uses a pointer to the hash function internally.
>
> To work around this issue, it is recommended that
> multi-process applications perform the hash
> calculations by directly calling the hashing function
> from the code and then using the
> rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions
> instead of the functions which do
> the hashing internally, such as rte_hash_add()/rte_hash_lookup().
>"
>
>We did follow the recommended steps by invoking rte_hash_lookup_with_hash().
>It was no issue up to and including dpdk-2.0.
>In later releases started crashing because rte_hash_cmp_eq is
>introduced in dpdk-2.1
>
>We fixed it with the following patch and would like to
>submit the patch to dpdk.org.
>Patch is created such that, if anyone wanted to use dpdk in
>multi-process environment with function pointers not shared, they need to
>define RTE_LIB_MP_NO_FUNC_PTR in their Makefile.
>Without defining this flag in Makefile, it works as it is now.
>
>Signed-off-by: Dhana Eadala <edreddy@gmail.com>
>---
> lib/librte_hash/rte_cuckoo_hash.c | 28 ++++++++++++++++++++++++++++
> 1 file changed, 28 insertions(+)
>
>diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
>index 3e3167c..0946777 100644
>--- a/lib/librte_hash/rte_cuckoo_hash.c
>+++ b/lib/librte_hash/rte_cuckoo_hash.c
>@@ -594,7 +594,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
> 				prim_bkt->signatures[i].alt == alt_hash) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					prim_bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				/* Enqueue index of free slot back in the ring. */
> 				enqueue_slot_back(h, cached_free_slots, slot_id);
> 				/* Update data */
>@@ -614,7 +618,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
> 				sec_bkt->signatures[i].current == alt_hash) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					sec_bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				/* Enqueue index of free slot back in the ring. */
> 				enqueue_slot_back(h, cached_free_slots, slot_id);
> 				/* Update data */
>@@ -725,7 +733,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp (key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				if (data != NULL)
> 					*data = k->pdata;
> 				/*
>@@ -748,7 +760,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].alt == sig) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				if (data != NULL)
> 					*data = k->pdata;
> 				/*
>@@ -840,7 +856,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				remove_entry(h, bkt, i);
> 
> 				/*
>@@ -863,7 +883,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				remove_entry(h, bkt, i);
> 
> 				/*
>@@ -980,7 +1004,11 @@ lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * co
> 	unsigned hit;
> 	unsigned key_idx;
> 
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+	hit = !memcmp(key_slot->key, keys[idx], h->key_len);
>+#else
> 	hit = !h->rte_hash_cmp_eq(key_slot->key, keys[idx], h->key_len);
>+#endif
> 	if (data != NULL)
> 		data[idx] = key_slot->pdata;
> 
>-- 
>2.5.0
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-14  2:16 [PATCH] hash: fix memcmp function pointer in multi-process environment Dhana Eadala
  2016-03-14  2:30 ` 张伟
@ 2016-03-14  4:38 ` 张伟
  2016-03-15  0:57   ` Dhananjaya Eadala
  2016-03-22 11:42 ` Thomas Monjalon
  2 siblings, 1 reply; 16+ messages in thread
From: 张伟 @ 2016-03-14  4:38 UTC (permalink / raw)
  To: Dhana Eadala; +Cc: bruce.richardson, pablo.de.lara.guarch, michael.qiu, dev

BTW, the following is my backtrace when the system crashes. 

Program received signal SIGSEGV, Segmentation fault.

0x00000000004883ab in rte_hash_reset (h=0x0)

    at /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_hash/rte_cuckoo_hash.c:444

444while (rte_ring_dequeue(h->free_slots, &ptr) == 0)

(gdb) bt

#0  0x00000000004883ab in rte_hash_reset (h=0x0)

    at /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_hash/rte_cuckoo_hash.c:444

#1  0x000000000048fdfb in rte_hash_lookup_with_hash (h=0x7fff32cce740, key=0x7fffffffe220, sig=403183624)

    at /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_hash/rte_cuckoo_hash.c:771

#2  0x000000000042b551 in onvm_ft_lookup_with_hash (table=0x7fff32cbe4c0, pkt=0x7fff390ea9c0, 

    data=0x7fffffffe298) at /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:104

#3  0x000000000042b8c3 in onvm_flow_dir_get_with_hash (table=0x7fff32cbe4c0, pkt=0x7fff390ea9c0, 

    flow_entry=0x7fffffffe298)

    at /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_dir.c:14

#4  0x00000000004251d7 in packet_handler (pkt=0x7fff390ea9c0, meta=0x7fff390eaa00)

    at /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/flow_table.c:212

#5  0x0000000000429502 in onvm_nf_run ()

#6  0x00000000004253f1 in main (argc=1, argv=0x7fffffffe648)

    at /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/flow_table.c:272

(gdb) 








I met a problem which I used the DPDK hash table for multi processes. One started as primary process and the other as secondary process.


I based on the client and server multiprocess example. My aim is that server creates a hash table, then share it to the client. The client will read and write the hash table, and the server will read the hash table. I use rte_calloc allocate the space for hash table, use memzone tells the client the hash table address.
But once I add an entry into the hash table, calling "lookup" function will have the segment fault. But for the lookup function, I have exactly the same parameters for lookup when the first time calls the lookup.
If I create the hash table inside the client, everything works correctly.
I put pieces of codes for server and client codes related to the hash table. I have spent almost 3 days on this bug. But there is no any clue which can help to solve this bug. If any of you can give some suggestions, I will be appreciated. I post the question into the mail list, but have not yet got any reply.


This problem is that in dpdk multi process - client and server example, dpdk-2.2.0/examples/multi_process/client_server_mp
My aim is that server create a hash table, then share it to client. Client will write the hash  table, server will read the hash table.  I am using dpdk hash table.  What I did is that server create a hash table (table and array entries), return the table address.  I use memzone pass the table address to client.  In client, the second lookup gets segment fault. The system gets crashed.  I will put some related code here. 
create hash table function:

struct onvm_ft*

onvm_ft_create(int cnt, int entry_size) {

        struct rte_hash* hash;

        struct onvm_ft* ft;

        struct rte_hash_parameters ipv4_hash_params = {

            .name = NULL,

            .entries = cnt,

            .key_len = sizeof(struct onvm_ft_ipv4_5tuple),

            .hash_func = NULL,

            .hash_func_init_val = 0,

        };




        char s[64];

        /* create ipv4 hash table. use core number and cycle counter to get a unique name. */

        ipv4_hash_params.name = s;

        ipv4_hash_params.socket_id = rte_socket_id();

        snprintf(s, sizeof(s), "onvm_ft_%d-%"PRIu64, rte_lcore_id(), rte_get_tsc_cycles());

        hash = rte_hash_create(&ipv4_hash_params);

        if (hash == NULL) {

                return NULL;

        }

        ft = (struct onvm_ft*)rte_calloc("table", 1, sizeof(struct onvm_ft), 0);

        if (ft == NULL) {

                rte_hash_free(hash);

                return NULL;

        }

        ft->hash = hash;

        ft->cnt = cnt;

        ft->entry_size = entry_size;

        /* Create data array for storing values */

        ft->data = rte_calloc("entry", cnt, entry_size, 0);

        if (ft->data == NULL) {

                rte_hash_free(hash);

                rte_free(ft);

                return NULL;

        }

        return ft;

}




related structure:

struct onvm_ft {

        struct rte_hash* hash;

        char* data;

        int cnt;

        int entry_size;

};




in server side, I will call the create function, use memzone share it to client. The following is what I do:

related variables:

struct onvm_ft *sdn_ft;

struct onvm_ft **sdn_ft_p;

const struct rte_memzone *mz_ftp;




        sdn_ft = onvm_ft_create(1024, sizeof(struct onvm_flow_entry));

        if(sdn_ft == NULL) {

                rte_exit(EXIT_FAILURE, "Unable to create flow table\n");

        }

        mz_ftp = rte_memzone_reserve(MZ_FTP_INFO, sizeof(struct onvm_ft *),

                                  rte_socket_id(), NO_FLAGS);

        if (mz_ftp == NULL) {

                rte_exit(EXIT_FAILURE, "Canot reserve memory zone for flow table pointer\n");

        }

        memset(mz_ftp->addr, 0, sizeof(struct onvm_ft *));

        sdn_ft_p = mz_ftp->addr;

        *sdn_ft_p = sdn_ft;




In client side:

struct onvm_ft *sdn_ft;

static void

map_flow_table(void) {

        const struct rte_memzone *mz_ftp;

        struct onvm_ft **ftp;




        mz_ftp = rte_memzone_lookup(MZ_FTP_INFO);

        if (mz_ftp == NULL)

                rte_exit(EXIT_FAILURE, "Cannot get flow table pointer\n");

        ftp = mz_ftp->addr;

        sdn_ft = *ftp;

}




The following is my debug message: I set a breakpoint in lookup table line. To narrow down the problem, I just send one flow. So the second time and the first time, the packets are the same.  

For the first time, it works. I print out the parameters: inside the onvm_ft_lookup function, if there is a related entry, it will return the address by flow_entry. 

Breakpoint 1, datapath_handle_read (dp=0x7ffff00008c0) at /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/sdn.c:191

191                                 ret = onvm_ft_lookup(sdn_ft, fk, (char**)&flow_entry);

(gdb) print *sdn_ft 

$1 = {hash = 0x7fff32cce740, data = 0x7fff32cb0480 "", cnt = 1024, entry_size = 56}

(gdb) print *fk

$2 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) s

onvm_ft_lookup (table=0x7fff32cbe4c0, key=0x7fff32b99d00, data=0x7ffff68d2b00) at /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:151

151 softrss = onvm_softrss(key);

(gdb) n

152         printf("software rss %d\n", softrss);

(gdb) 

software rss 403183624

154         tbl_index = rte_hash_lookup_with_hash(table->hash, (const void *)key, softrss);

(gdb) print table->hash

$3 = (struct rte_hash *) 0x7fff32cce740

(gdb) print *key

$4 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) print softrss 

$5 = 403183624

(gdb) c




After I hit c, it will do the second lookup,

Breakpoint 1, datapath_handle_read (dp=0x7ffff00008c0) at /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/sdn.c:191

191                                 ret = onvm_ft_lookup(sdn_ft, fk, (char**)&flow_entry);

(gdb) print *sdn_ft


$7 = {hash = 0x7fff32cce740, data = 0x7fff32cb0480 "", cnt = 1024, entry_size = 56}

(gdb) print *fk

$8 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) s

onvm_ft_lookup (table=0x7fff32cbe4c0, key=0x7fff32b99c00, data=0x7ffff68d2b00) at /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:151

151 softrss = onvm_softrss(key);

(gdb) n

152         printf("software rss %d\n", softrss);

(gdb) n

software rss 403183624

154         tbl_index = rte_hash_lookup_with_hash(table->hash, (const void *)key, softrss);

(gdb) print table->hash

$9 = (struct rte_hash *) 0x7fff32cce740

(gdb) print *key 

$10 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) print softrss

$11 = 403183624

(gdb) n





Program received signal SIGSEGV, Segmentation fault.

0x000000000045fb97 in __rte_hash_lookup_bulk ()

(gdb) bt

#0  0x000000000045fb97 in __rte_hash_lookup_bulk ()

#1  0x0000000000000000 in ?? ()




From the debug message, the parameters are exactly the same. I do not know why it has the segmentation fault. 

my lookup function:

int

onvm_ft_lookup(struct onvm_ft* table, struct onvm_ft_ipv4_5tuple *key, char** data) {

        int32_t tbl_index;

        uint32_t softrss;




        softrss = onvm_softrss(key);

        printf("software rss %d\n", softrss);




        tbl_index = rte_hash_lookup_with_hash(table->hash, (const void *)key, softrss);

        if (tbl_index >= 0) {

                *data = onvm_ft_get_data(table, tbl_index);

                return 0;

        }

        else {

                return tbl_index;

        }

}







At 2016-03-14 10:16:48, "Dhana Eadala" <edreddy@gmail.com> wrote:
>We found a problem in dpdk-2.2 using under multi-process environment.
>Here is the brief description how we are using the dpdk:
>
>We have two processes proc1, proc2 using dpdk. These proc1 and proc2 are
>two different compiled binaries.
>proc1 is started as primary process and proc2 as secondary process.
>
>proc1:
>Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash structure.
>As part of this, this api initalized the rte_hash structure and set the
>srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1 address space.
>
>proc2:
>calls srcHash =  rte_hash_find_existing("src_hash_name").
>This function call returns the rte_hash created by proc1.
>This srcHash->rte_hash_cmp_eq still points to the address of
>memcmp() from proc1 address space.
>Later proc2  calls
>rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
>rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(),
>which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
>This leads to a crash as h->rte_hash_cmp_eq is an address
>from proc1 address space and is invalid address in proc2 address space.
>
>We found, from dpdk documentation, that
>
>"
> The use of function pointers between multiple processes
> running based of different compiled
> binaries is not supported, since the location of a given function
> in one process may be different to
> its location in a second. This prevents the librte_hash library
> from behaving properly as in a  multi-
> threaded instance, since it uses a pointer to the hash function internally.
>
> To work around this issue, it is recommended that
> multi-process applications perform the hash
> calculations by directly calling the hashing function
> from the code and then using the
> rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions
> instead of the functions which do
> the hashing internally, such as rte_hash_add()/rte_hash_lookup().
>"
>
>We did follow the recommended steps by invoking rte_hash_lookup_with_hash().
>It was no issue up to and including dpdk-2.0.
>In later releases started crashing because rte_hash_cmp_eq is
>introduced in dpdk-2.1
>
>We fixed it with the following patch and would like to
>submit the patch to dpdk.org.
>Patch is created such that, if anyone wanted to use dpdk in
>multi-process environment with function pointers not shared, they need to
>define RTE_LIB_MP_NO_FUNC_PTR in their Makefile.
>Without defining this flag in Makefile, it works as it is now.
>
>Signed-off-by: Dhana Eadala <edreddy@gmail.com>
>---
> lib/librte_hash/rte_cuckoo_hash.c | 28 ++++++++++++++++++++++++++++
> 1 file changed, 28 insertions(+)
>
>diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
>index 3e3167c..0946777 100644
>--- a/lib/librte_hash/rte_cuckoo_hash.c
>+++ b/lib/librte_hash/rte_cuckoo_hash.c
>@@ -594,7 +594,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
> 				prim_bkt->signatures[i].alt == alt_hash) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					prim_bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				/* Enqueue index of free slot back in the ring. */
> 				enqueue_slot_back(h, cached_free_slots, slot_id);
> 				/* Update data */
>@@ -614,7 +618,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
> 				sec_bkt->signatures[i].current == alt_hash) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					sec_bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				/* Enqueue index of free slot back in the ring. */
> 				enqueue_slot_back(h, cached_free_slots, slot_id);
> 				/* Update data */
>@@ -725,7 +733,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp (key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				if (data != NULL)
> 					*data = k->pdata;
> 				/*
>@@ -748,7 +760,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].alt == sig) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				if (data != NULL)
> 					*data = k->pdata;
> 				/*
>@@ -840,7 +856,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				remove_entry(h, bkt, i);
> 
> 				/*
>@@ -863,7 +883,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				remove_entry(h, bkt, i);
> 
> 				/*
>@@ -980,7 +1004,11 @@ lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * co
> 	unsigned hit;
> 	unsigned key_idx;
> 
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+	hit = !memcmp(key_slot->key, keys[idx], h->key_len);
>+#else
> 	hit = !h->rte_hash_cmp_eq(key_slot->key, keys[idx], h->key_len);
>+#endif
> 	if (data != NULL)
> 		data[idx] = key_slot->pdata;
> 
>-- 
>2.5.0
>





 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-14  4:38 ` 张伟
@ 2016-03-15  0:57   ` Dhananjaya Eadala
  2016-03-15  1:02     ` 张伟
  0 siblings, 1 reply; 16+ messages in thread
From: Dhananjaya Eadala @ 2016-03-15  0:57 UTC (permalink / raw)
  To: 张伟; +Cc: bruce.richardson, pablo.de.lara.guarch, michael.qiu, dev

Hi

I looked at your info from gdb and source code.
 From source code, rte_hash_lookup_with_hash / 
__rte_hash_lookup_with_hash() function doesn't invoke rte_hash_reset() 
function.
It may be possible that rte_hash_cmp_eq function pointer from one 
process is matching to address of rte_hash_reset () in other process.

For quick test, you can do two things.

1. just apply the patch I provided and recompile the dpdk with a -D flag 
"RTE_LIB_MP_NO_FUNC_PTR" defined and re-run your test

2. If it still crashes, disable the optimization in Makefiles and see, 
using gdb, how you are entering rte_hash_reset() from 
__rte_hash_lookup_with_hash(). this will tell you if it was a problem 
with function pointers in multi-process environment


Thanks
Dhana

On 03/14/2016 12:38 AM, 张伟 wrote:
> BTW, the following is my backtrace when the system crashes.
>
> Program received signal SIGSEGV, Segmentation fault.
>
> 0x00000000004883ab in rte_hash_reset (h=0x0)
>
>     at 
> /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_hash/rte_cuckoo_hash.c:444
>
> 444while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
>
> (gdb) bt
>
> #0  0x00000000004883ab in rte_hash_reset (h=0x0)
>
>     at 
> /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_hash/rte_cuckoo_hash.c:444
>
> #1  0x000000000048fdfb in rte_hash_lookup_with_hash (h=0x7fff32cce740, 
> key=0x7fffffffe220, sig=403183624)
>
>     at 
> /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_hash/rte_cuckoo_hash.c:771
>
> #2  0x000000000042b551 in onvm_ft_lookup_with_hash 
> (table=0x7fff32cbe4c0, pkt=0x7fff390ea9c0,
>
>     data=0x7fffffffe298) at 
> /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:104
>
> #3  0x000000000042b8c3 in onvm_flow_dir_get_with_hash 
> (table=0x7fff32cbe4c0, pkt=0x7fff390ea9c0,
>
> flow_entry=0x7fffffffe298)
>
>     at 
> /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_dir.c:14
>
> #4  0x00000000004251d7 in packet_handler (pkt=0x7fff390ea9c0, 
> meta=0x7fff390eaa00)
>
>     at 
> /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/flow_table.c:212
>
> #5  0x0000000000429502 in onvm_nf_run ()
>
> #6  0x00000000004253f1 in main (argc=1, argv=0x7fffffffe648)
>
>     at 
> /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/flow_table.c:272
>
> (gdb)
>
>
>
>
> I met a problem which I used the DPDK hash table for multi processes. 
> One started as primary process and the other as secondary process.
> I based on the client and server multiprocess example. My aim is that 
> server creates a hash table, then share it to the client. The client 
> will read and write the hash table, and the server will read the hash 
> table. I use rte_calloc allocate the space for hash table, use memzone 
> tells the client the hash table address.
> But once I add an entry into the hash table, calling "lookup" function 
> will have the segment fault. But for the lookup function, I have 
> exactly the same parameters for lookup when the first time calls the 
> lookup.
> If I create the hash table inside the client, everything works correctly.
> I put pieces of codes for server and client codes related to the hash 
> table. I have spent almost 3 days on this bug. But there is no any 
> clue which can help to solve this bug. If any of you can give some 
> suggestions, I will be appreciated. I post the question into the mail 
> list, but have not yet got any reply.
>
> This problem is that in dpdk multi process - client and server 
> example, dpdk-2.2.0/examples/multi_process/client_server_mp
> My aim is that server create a hash table, then share it to client. 
> Client will write the hash  table, server will read the hash table.  I 
> am using dpdk hash table.  What I did is that server create a hash 
> table (table and array entries), return the table address.  I use 
> memzone pass the table address to client.  In client, the second 
> lookup gets segment fault. The system gets crashed.  I will put some 
> related code here.
> create hash table function:
>
> struct onvm_ft*
>
> onvm_ft_create(int cnt, int entry_size) {
>
>         struct rte_hash* hash;
>
>         struct onvm_ft* ft;
>
>         struct rte_hash_parameters ipv4_hash_params = {
>
>             .name = NULL,
>
>             .entries = cnt,
>
>             .key_len = sizeof(struct onvm_ft_ipv4_5tuple),
>
>             .hash_func = NULL,
>
>             .hash_func_init_val = 0,
>
>         };
>
>
>         char s[64];
>
>         /* create ipv4 hash table. use core number and cycle counter 
> to get a unique name. */
>
> ipv4_hash_params.name <http://ipv4_hash_params.name/> = s;
>
>         ipv4_hash_params.socket_id = rte_socket_id();
>
>         snprintf(s, sizeof(s), "onvm_ft_%d-%"PRIu64, rte_lcore_id(), 
> rte_get_tsc_cycles());
>
>         hash = rte_hash_create(&ipv4_hash_params);
>
>         if (hash == NULL) {
>
>                 return NULL;
>
>         }
>
>         ft = 
> (struct onvm_ft*)rte_calloc("table", 1, sizeof(struct onvm_ft), 0);
>
>         if (ft == NULL) {
>
>                 rte_hash_free(hash);
>
>                 return NULL;
>
>         }
>
>         ft->hash = hash;
>
>         ft->cnt = cnt;
>
>         ft->entry_size = entry_size;
>
>         /* Create data array for storing values */
>
>         ft->data = rte_calloc("entry", cnt, entry_size, 0);
>
>         if (ft->data == NULL) {
>
>                 rte_hash_free(hash);
>
>                 rte_free(ft);
>
>                 return NULL;
>
>         }
>
>         return ft;
>
> }
>
>
> related structure:
>
> struct onvm_ft {
>
>         struct rte_hash* hash;
>
>         char* data;
>
>         int cnt;
>
>         int entry_size;
>
> };
>
>
> in server side, I will call the create function, use memzone share it 
> to client. The following is what I do:
>
> related variables:
>
> struct onvm_ft *sdn_ft;
>
> struct onvm_ft **sdn_ft_p;
>
> const struct rte_memzone *mz_ftp;
>
>
>         sdn_ft = onvm_ft_create(1024, sizeof(struct onvm_flow_entry));
>
>         if(sdn_ft == NULL) {
>
>                 rte_exit(EXIT_FAILURE, "Unable to create flow table\n");
>
>         }
>
>         mz_ftp = 
> rte_memzone_reserve(MZ_FTP_INFO, sizeof(struct onvm_ft *),
>
>                                   rte_socket_id(), NO_FLAGS);
>
>         if (mz_ftp == NULL) {
>
>                 rte_exit(EXIT_FAILURE, "Canot reserve memory zone for 
> flow table pointer\n");
>
>         }
>
>         memset(mz_ftp->addr, 0, sizeof(struct onvm_ft *));
>
>         sdn_ft_p = mz_ftp->addr;
>
>         *sdn_ft_p = sdn_ft;
>
>
> In client side:
>
> struct onvm_ft *sdn_ft;
>
> static void
>
> map_flow_table(void) {
>
>         const struct rte_memzone *mz_ftp;
>
>         struct onvm_ft **ftp;
>
>
>         mz_ftp = rte_memzone_lookup(MZ_FTP_INFO);
>
>         if (mz_ftp == NULL)
>
>                 rte_exit(EXIT_FAILURE, "Cannot get flow table pointer\n");
>
>         ftp = mz_ftp->addr;
>
>         sdn_ft = *ftp;
>
> }
>
>
> The following is my debug message: I set a breakpoint in lookup table 
> line. To narrow down the problem, I just send one flow. So the second 
> time and the first time, the packets are the same.
>
> For the first time, it works. I print out the parameters: inside the 
> onvm_ft_lookup function, if there is a related entry, it will return 
> the address by flow_entry.
>
> Breakpoint 1, datapath_handle_read (dp=0x7ffff00008c0) at 
> /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/sdn.c:191
>
> 191                                 ret = onvm_ft_lookup(sdn_ft, fk, 
> (char**)&flow_entry);
>
> (gdb) print *sdn_ft
>
> $1 = {hash = 0x7fff32cce740, data = 0x7fff32cb0480 "", cnt = 1024, 
> entry_size = 56}
>
> (gdb) print *fk
>
> $2 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, 
> dst_port = 11798, proto = 17 '\021'}
>
> (gdb) s
>
> onvm_ft_lookup (table=0x7fff32cbe4c0, key=0x7fff32b99d00, 
> data=0x7ffff68d2b00) at 
> /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:151
>
> 151 softrss = onvm_softrss(key);
>
> (gdb) n
>
> 152         printf("software rss %d\n", softrss);
>
> (gdb)
>
> software rss 403183624
>
> 154         tbl_index = rte_hash_lookup_with_hash(table->hash, (const 
> void *)key, softrss);
>
> (gdb) print table->hash
>
> $3 = (struct rte_hash *) 0x7fff32cce740
>
> (gdb) print *key
>
> $4 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, 
> dst_port = 11798, proto = 17 '\021'}
>
> (gdb) print softrss
>
> $5 = 403183624
>
> (gdb) c
>
>
> After I hit c, it will do the second lookup,
>
> Breakpoint 1, datapath_handle_read (dp=0x7ffff00008c0) at 
> /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/sdn.c:191
>
> 191                                 ret = onvm_ft_lookup(sdn_ft, fk, 
> (char**)&flow_entry);
>
> (gdb) print *sdn_ft
>
> $7 = {hash = 0x7fff32cce740, data = 0x7fff32cb0480 "", cnt = 1024, 
> entry_size = 56}
>
> (gdb) print *fk
>
> $8 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, 
> dst_port = 11798, proto = 17 '\021'}
>
> (gdb) s
>
> onvm_ft_lookup (table=0x7fff32cbe4c0, key=0x7fff32b99c00, 
> data=0x7ffff68d2b00) at 
> /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:151
>
> 151 softrss = onvm_softrss(key);
>
> (gdb) n
>
> 152         printf("software rss %d\n", softrss);
>
> (gdb) n
>
> software rss 403183624
>
> 154         tbl_index = rte_hash_lookup_with_hash(table->hash, (const 
> void *)key, softrss);
>
> (gdb) print table->hash
>
> $9 = (struct rte_hash *) 0x7fff32cce740
>
> (gdb) print *key
>
> $10 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, 
> dst_port = 11798, proto = 17 '\021'}
>
> (gdb) print softrss
>
> $11 = 403183624
>
> (gdb) n
>
>
> Program received signal SIGSEGV, Segmentation fault.
>
> 0x000000000045fb97 in __rte_hash_lookup_bulk ()
>
> (gdb) bt
>
> #0  0x000000000045fb97 in __rte_hash_lookup_bulk ()
>
> #1  0x0000000000000000 in ?? ()
>
>
> From the debug message, the parameters are exactly the same. I do not 
> know why it has the segmentation fault.
>
> my lookup function:
>
> int
>
> onvm_ft_lookup(struct onvm_ft* table, struct onvm_ft_ipv4_5tuple 
> *key, char** data) {
>
>         int32_t tbl_index;
>
>         uint32_t softrss;
>
>
>         softrss = onvm_softrss(key);
>
>         printf("software rss %d\n", softrss);
>
>
>         tbl_index = rte_hash_lookup_with_hash(table->hash, 
> (const void *)key, softrss);
>
>         if (tbl_index >= 0) {
>
>                 *data = onvm_ft_get_data(table, tbl_index);
>
>                 return 0;
>
>         }
>
>         else {
>
>                 return tbl_index;
>
>         }
>
> }
>
>
>
>
> At 2016-03-14 10:16:48, "Dhana Eadala" <edreddy@gmail.com <mailto:edreddy@gmail.com>> wrote:
> >We found a problem in dpdk-2.2 using under multi-process environment.
> >Here is the brief description how we are using the dpdk:
> >
> >We have two processes proc1, proc2 using dpdk. These proc1 and proc2 are
> >two different compiled binaries.
> >proc1 is started as primary process and proc2 as secondary process.
> >
> >proc1:
> >Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash structure.
> >As part of this, this api initalized the rte_hash structure and set the
> >srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1 address space.
> >
> >proc2:
> >calls srcHash =  rte_hash_find_existing("src_hash_name").
> >This function call returns the rte_hash created by proc1.
> >This srcHash->rte_hash_cmp_eq still points to the address of
> >memcmp() from proc1 address space.
> >Later proc2  calls
> >rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
> >rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(),
> >which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
> >This leads to a crash as h->rte_hash_cmp_eq is an address
> >from proc1 address space and is invalid address in proc2 address space.
> >
> >We found, from dpdk documentation, that
> >
> >"
> > The use of function pointers between multiple processes
> > running based of different compiled
> > binaries is not supported, since the location of a given function
> > in one process may be different to
> > its location in a second. This prevents the librte_hash library
> > from behaving properly as in a  multi-
> > threaded instance, since it uses a pointer to the hash function internally.
> >
> > To work around this issue, it is recommended that
> > multi-process applications perform the hash
> > calculations by directly calling the hashing function
> > from the code and then using the
> > rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions
> > instead of the functions which do
> > the hashing internally, such as rte_hash_add()/rte_hash_lookup().
> >"
> >
> >We did follow the recommended steps by invoking rte_hash_lookup_with_hash().
> >It was no issue up to and including dpdk-2.0.
> >In later releases started crashing because rte_hash_cmp_eq is
> >introduced in dpdk-2.1
> >
> >We fixed it with the following patch and would like to
> >submit the patch to dpdk.org.
> >Patch is created such that, if anyone wanted to use dpdk in
> >multi-process environment with function pointers not shared, they need to
> >define RTE_LIB_MP_NO_FUNC_PTR in their Makefile.
> >Without defining this flag in Makefile, it works as it is now.
> >
> >Signed-off-by: Dhana Eadala <edreddy@gmail.com <mailto:edreddy@gmail.com>>
> >---
> > lib/librte_hash/rte_cuckoo_hash.c | 28 ++++++++++++++++++++++++++++
> > 1 file changed, 28 insertions(+)
> >
> >diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
> >index 3e3167c..0946777 100644
> >--- a/lib/librte_hash/rte_cuckoo_hash.c
> >+++ b/lib/librte_hash/rte_cuckoo_hash.c
> >@@ -594,7 +594,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
> > 				prim_bkt->signatures[i].alt == alt_hash) {
> > 			k = (struct rte_hash_key *) ((char *)keys +
> > 					prim_bkt->key_idx[i] * h->key_entry_size);
> >+#ifdef RTE_LIB_MP_NO_FUNC_PTR
> >+			if (memcmp(key, k->key, h->key_len) == 0) {
> >+#else
> > 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
> >+#endif
> > 				/* Enqueue index of free slot back in the ring. */
> > 				enqueue_slot_back(h, cached_free_slots, slot_id);
> > 				/* Update data */
> >@@ -614,7 +618,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
> > 				sec_bkt->signatures[i].current == alt_hash) {
> > 			k = (struct rte_hash_key *) ((char *)keys +
> > 					sec_bkt->key_idx[i] * h->key_entry_size);
> >+#ifdef RTE_LIB_MP_NO_FUNC_PTR
> >+			if (memcmp(key, k->key, h->key_len) == 0) {
> >+#else
> > 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
> >+#endif
> > 				/* Enqueue index of free slot back in the ring. */
> > 				enqueue_slot_back(h, cached_free_slots, slot_id);
> > 				/* Update data */
> >@@ -725,7 +733,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
> > 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> > 			k = (struct rte_hash_key *) ((char *)keys +
> > 					bkt->key_idx[i] * h->key_entry_size);
> >+#ifdef RTE_LIB_MP_NO_FUNC_PTR
> >+			if (memcmp (key, k->key, h->key_len) == 0) {
> >+#else
> > 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
> >+#endif
> > 				if (data != NULL)
> > 					*data = k->pdata;
> > 				/*
> >@@ -748,7 +760,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
> > 				bkt->signatures[i].alt == sig) {
> > 			k = (struct rte_hash_key *) ((char *)keys +
> > 					bkt->key_idx[i] * h->key_entry_size);
> >+#ifdef RTE_LIB_MP_NO_FUNC_PTR
> >+			if (memcmp(key, k->key, h->key_len) == 0) {
> >+#else
> > 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
> >+#endif
> > 				if (data != NULL)
> > 					*data = k->pdata;
> > 				/*
> >@@ -840,7 +856,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
> > 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> > 			k = (struct rte_hash_key *) ((char *)keys +
> > 					bkt->key_idx[i] * h->key_entry_size);
> >+#ifdef RTE_LIB_MP_NO_FUNC_PTR
> >+			if (memcmp(key, k->key, h->key_len) == 0) {
> >+#else
> > 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
> >+#endif
> > 				remove_entry(h, bkt, i);
> >
> > 				/*
> >@@ -863,7 +883,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
> > 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> > 			k = (struct rte_hash_key *) ((char *)keys +
> > 					bkt->key_idx[i] * h->key_entry_size);
> >+#ifdef RTE_LIB_MP_NO_FUNC_PTR
> >+			if (memcmp(key, k->key, h->key_len) == 0) {
> >+#else
> > 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
> >+#endif
> > 				remove_entry(h, bkt, i);
> >
> > 				/*
> >@@ -980,7 +1004,11 @@ lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * co
> > 	unsigned hit;
> > 	unsigned key_idx;
> >
> >+#ifdef RTE_LIB_MP_NO_FUNC_PTR
> >+	hit = !memcmp(key_slot->key, keys[idx], h->key_len);
> >+#else
> > 	hit = !h->rte_hash_cmp_eq(key_slot->key, keys[idx], h->key_len);
> >+#endif
> > 	if (data != NULL)
> > 		data[idx] = key_slot->pdata;
> >
> >--
> >2.5.0
> >
>
>
>
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-15  0:57   ` Dhananjaya Eadala
@ 2016-03-15  1:02     ` 张伟
  2016-03-24 14:00       ` De Lara Guarch, Pablo
  0 siblings, 1 reply; 16+ messages in thread
From: 张伟 @ 2016-03-15  1:02 UTC (permalink / raw)
  To: Dhananjaya Eadala
  Cc: bruce.richardson, pablo.de.lara.guarch, michael.qiu, dev

Thanks so much for your patch! Your patch exactly solves my bug. :)


At 2016-03-15 08:57:29, "Dhananjaya Eadala" <edreddy@gmail.com> wrote:
Hi

I looked at your info from gdb and source code.
From source code,  rte_hash_lookup_with_hash / __rte_hash_lookup_with_hash() function doesn't invoke rte_hash_reset() function.
It may be possible that rte_hash_cmp_eq function pointer from one process is matching to address of rte_hash_reset () in other process.

For quick test, you can do two things.

1. just apply the patch I provided and recompile the dpdk with a -D flag "RTE_LIB_MP_NO_FUNC_PTR" defined and re-run your test

2. If it still crashes, disable the optimization in Makefiles and see, using gdb, how you are entering rte_hash_reset() from __rte_hash_lookup_with_hash(). this will tell you if it was a problem with function pointers in multi-process environment


Thanks
Dhana

On 03/14/2016 12:38 AM, 张伟 wrote:

BTW, the following is my backtrace when the system crashes. 

Program received signal SIGSEGV, Segmentation fault.

0x00000000004883ab in rte_hash_reset (h=0x0)

    at /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_hash/rte_cuckoo_hash.c:444

444while (rte_ring_dequeue(h->free_slots, &ptr) == 0)

(gdb) bt

#0  0x00000000004883ab in rte_hash_reset (h=0x0)

    at /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_hash/rte_cuckoo_hash.c:444

#1  0x000000000048fdfb in rte_hash_lookup_with_hash (h=0x7fff32cce740, key=0x7fffffffe220, sig=403183624)

    at /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_hash/rte_cuckoo_hash.c:771

#2  0x000000000042b551 in onvm_ft_lookup_with_hash (table=0x7fff32cbe4c0, pkt=0x7fff390ea9c0, 

    data=0x7fffffffe298) at /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:104

#3  0x000000000042b8c3 in onvm_flow_dir_get_with_hash (table=0x7fff32cbe4c0, pkt=0x7fff390ea9c0, 

    flow_entry=0x7fffffffe298)

    at /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_dir.c:14

#4  0x00000000004251d7 in packet_handler (pkt=0x7fff390ea9c0, meta=0x7fff390eaa00)

    at /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/flow_table.c:212

#5  0x0000000000429502 in onvm_nf_run ()

#6  0x00000000004253f1 in main (argc=1, argv=0x7fffffffe648)

    at /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/flow_table.c:272

(gdb) 








I met a problem which I used the DPDK hash table for multi processes. One started as primary process and the other as secondary process.
I based on the client and server multiprocess example. My aim is that server creates a hash table, then share it to the client. The client will read and write the hash table, and the server will read the hash table. I use rte_calloc allocate the space for hash table, use memzone tells the client the hash table address.
But once I add an entry into the hash table, calling "lookup" function will have the segment fault. But for the lookup function, I have exactly the same parameters for lookup when the first time calls the lookup.
If I create the hash table inside the client, everything works correctly.
I put pieces of codes for server and client codes related to the hash table. I have spent almost 3 days on this bug. But there is no any clue which can help to solve this bug. If any of you can give some suggestions, I will be appreciated. I post the question into the mail list, but have not yet got any reply.


This problem is that in dpdk multi process - client and server example, dpdk-2.2.0/examples/multi_process/client_server_mp
My aim is that server create a hash table, then share it to client. Client will write the hash  table, server will read the hash table.  I am using dpdk hash table.  What I did is that server create a hash table (table and array entries), return the table address.  I use memzone pass the table address to client.  In client, the second lookup gets segment fault. The system gets crashed.  I will put some related code here. 
create hash table function:

struct onvm_ft*

onvm_ft_create(int cnt, int entry_size) {

        struct rte_hash* hash;

        struct onvm_ft* ft;

        struct rte_hash_parameters ipv4_hash_params = {

            .name = NULL,

            .entries = cnt,

            .key_len = sizeof(struct onvm_ft_ipv4_5tuple),

            .hash_func = NULL,

            .hash_func_init_val = 0,

        };




        char s[64];

        /* create ipv4 hash table. use core number and cycle counter to get a unique name. */

        ipv4_hash_params.name = s;

        ipv4_hash_params.socket_id = rte_socket_id();

        snprintf(s, sizeof(s), "onvm_ft_%d-%"PRIu64, rte_lcore_id(), rte_get_tsc_cycles());

        hash = rte_hash_create(&ipv4_hash_params);

        if (hash == NULL) {

                return NULL;

        }

        ft = (struct onvm_ft*)rte_calloc("table", 1, sizeof(struct onvm_ft), 0);

        if (ft == NULL) {

                rte_hash_free(hash);

                return NULL;

        }

        ft->hash = hash;

        ft->cnt = cnt;

        ft->entry_size = entry_size;

        /* Create data array for storing values */

        ft->data = rte_calloc("entry", cnt, entry_size, 0);

        if (ft->data == NULL) {

                rte_hash_free(hash);

                rte_free(ft);

                return NULL;

        }

        return ft;

}




related structure:

struct onvm_ft {

        struct rte_hash* hash;

        char* data;

        int cnt;

        int entry_size;

};




in server side, I will call the create function, use memzone share it to client. The following is what I do:

related variables:

struct onvm_ft *sdn_ft;

struct onvm_ft **sdn_ft_p;

const struct rte_memzone *mz_ftp;




        sdn_ft = onvm_ft_create(1024, sizeof(struct onvm_flow_entry));

        if(sdn_ft == NULL) {

                rte_exit(EXIT_FAILURE, "Unable to create flow table\n");

        }

        mz_ftp = rte_memzone_reserve(MZ_FTP_INFO, sizeof(struct onvm_ft *),

                                  rte_socket_id(), NO_FLAGS);

        if (mz_ftp == NULL) {

                rte_exit(EXIT_FAILURE, "Canot reserve memory zone for flow table pointer\n");

        }

        memset(mz_ftp->addr, 0, sizeof(struct onvm_ft *));

        sdn_ft_p = mz_ftp->addr;

        *sdn_ft_p = sdn_ft;




In client side:

struct onvm_ft *sdn_ft;

static void

map_flow_table(void) {

        const struct rte_memzone *mz_ftp;

        struct onvm_ft **ftp;




        mz_ftp = rte_memzone_lookup(MZ_FTP_INFO);

        if (mz_ftp == NULL)

                rte_exit(EXIT_FAILURE, "Cannot get flow table pointer\n");

        ftp = mz_ftp->addr;

        sdn_ft = *ftp;

}




The following is my debug message: I set a breakpoint in lookup table line. To narrow down the problem, I just send one flow. So the second time and the first time, the packets are the same.  

For the first time, it works. I print out the parameters: inside the onvm_ft_lookup function, if there is a related entry, it will return the address by flow_entry. 

Breakpoint 1, datapath_handle_read (dp=0x7ffff00008c0) at /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/sdn.c:191

191                                 ret = onvm_ft_lookup(sdn_ft, fk, (char**)&flow_entry);

(gdb) print *sdn_ft 

$1 = {hash = 0x7fff32cce740, data = 0x7fff32cb0480 "", cnt = 1024, entry_size = 56}

(gdb) print *fk

$2 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) s

onvm_ft_lookup (table=0x7fff32cbe4c0, key=0x7fff32b99d00, data=0x7ffff68d2b00) at /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:151

151 softrss = onvm_softrss(key);

(gdb) n

152         printf("software rss %d\n", softrss);

(gdb) 

software rss 403183624

154         tbl_index = rte_hash_lookup_with_hash(table->hash, (const void *)key, softrss);

(gdb) print table->hash

$3 = (struct rte_hash *) 0x7fff32cce740

(gdb) print *key

$4 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) print softrss 

$5 = 403183624

(gdb) c




After I hit c, it will do the second lookup,

Breakpoint 1, datapath_handle_read (dp=0x7ffff00008c0) at /home/zhangwei1984/openNetVM-master/openNetVM/examples/flow_table/sdn.c:191

191                                 ret = onvm_ft_lookup(sdn_ft, fk, (char**)&flow_entry);

(gdb) print *sdn_ft


$7 = {hash = 0x7fff32cce740, data = 0x7fff32cb0480 "", cnt = 1024, entry_size = 56}

(gdb) print *fk

$8 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) s

onvm_ft_lookup (table=0x7fff32cbe4c0, key=0x7fff32b99c00, data=0x7ffff68d2b00) at /home/zhangwei1984/openNetVM-master/openNetVM/onvm/shared/onvm_flow_table.c:151

151 softrss = onvm_softrss(key);

(gdb) n

152         printf("software rss %d\n", softrss);

(gdb) n

software rss 403183624

154         tbl_index = rte_hash_lookup_with_hash(table->hash, (const void *)key, softrss);

(gdb) print table->hash

$9 = (struct rte_hash *) 0x7fff32cce740

(gdb) print *key 

$10 = {src_addr = 419496202, dst_addr = 453050634, src_port = 53764, dst_port = 11798, proto = 17 '\021'}

(gdb) print softrss

$11 = 403183624

(gdb) n





Program received signal SIGSEGV, Segmentation fault.

0x000000000045fb97 in __rte_hash_lookup_bulk ()

(gdb) bt

#0  0x000000000045fb97 in __rte_hash_lookup_bulk ()

#1  0x0000000000000000 in ?? ()




From the debug message, the parameters are exactly the same. I do not know why it has the segmentation fault. 

my lookup function:

int

onvm_ft_lookup(struct onvm_ft* table, struct onvm_ft_ipv4_5tuple *key, char** data) {

        int32_t tbl_index;

        uint32_t softrss;




        softrss = onvm_softrss(key);

        printf("software rss %d\n", softrss);




        tbl_index = rte_hash_lookup_with_hash(table->hash, (const void *)key, softrss);

        if (tbl_index >= 0) {

                *data = onvm_ft_get_data(table, tbl_index);

                return 0;

        }

        else {

                return tbl_index;

        }

}





At 2016-03-14 10:16:48, "Dhana Eadala" <edreddy@gmail.com> wrote:
>We found a problem in dpdk-2.2 using under multi-process environment.
>Here is the brief description how we are using the dpdk:
>
>We have two processes proc1, proc2 using dpdk. These proc1 and proc2 are
>two different compiled binaries.
>proc1 is started as primary process and proc2 as secondary process.
>
>proc1:
>Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash structure.
>As part of this, this api initalized the rte_hash structure and set the
>srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1 address space.
>
>proc2:
>calls srcHash =  rte_hash_find_existing("src_hash_name").
>This function call returns the rte_hash created by proc1.
>This srcHash->rte_hash_cmp_eq still points to the address of
>memcmp() from proc1 address space.
>Later proc2  calls
>rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
>rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(),
>which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
>This leads to a crash as h->rte_hash_cmp_eq is an address
>from proc1 address space and is invalid address in proc2 address space.
>
>We found, from dpdk documentation, that
>
>"
> The use of function pointers between multiple processes
> running based of different compiled
> binaries is not supported, since the location of a given function
> in one process may be different to
> its location in a second. This prevents the librte_hash library
> from behaving properly as in a  multi-
> threaded instance, since it uses a pointer to the hash function internally.
>
> To work around this issue, it is recommended that
> multi-process applications perform the hash
> calculations by directly calling the hashing function
> from the code and then using the
> rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions
> instead of the functions which do
> the hashing internally, such as rte_hash_add()/rte_hash_lookup().
>"
>
>We did follow the recommended steps by invoking rte_hash_lookup_with_hash().
>It was no issue up to and including dpdk-2.0.
>In later releases started crashing because rte_hash_cmp_eq is
>introduced in dpdk-2.1
>
>We fixed it with the following patch and would like to
>submit the patch to dpdk.org.
>Patch is created such that, if anyone wanted to use dpdk in
>multi-process environment with function pointers not shared, they need to
>define RTE_LIB_MP_NO_FUNC_PTR in their Makefile.
>Without defining this flag in Makefile, it works as it is now.
>
>Signed-off-by: Dhana Eadala <edreddy@gmail.com>
>---
> lib/librte_hash/rte_cuckoo_hash.c | 28 ++++++++++++++++++++++++++++
> 1 file changed, 28 insertions(+)
>
>diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
>index 3e3167c..0946777 100644
>--- a/lib/librte_hash/rte_cuckoo_hash.c
>+++ b/lib/librte_hash/rte_cuckoo_hash.c
>@@ -594,7 +594,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
> 				prim_bkt->signatures[i].alt == alt_hash) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					prim_bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				/* Enqueue index of free slot back in the ring. */
> 				enqueue_slot_back(h, cached_free_slots, slot_id);
> 				/* Update data */
>@@ -614,7 +618,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
> 				sec_bkt->signatures[i].current == alt_hash) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					sec_bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				/* Enqueue index of free slot back in the ring. */
> 				enqueue_slot_back(h, cached_free_slots, slot_id);
> 				/* Update data */
>@@ -725,7 +733,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp (key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				if (data != NULL)
> 					*data = k->pdata;
> 				/*
>@@ -748,7 +760,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].alt == sig) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				if (data != NULL)
> 					*data = k->pdata;
> 				/*
>@@ -840,7 +856,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				remove_entry(h, bkt, i);
> 
> 				/*
>@@ -863,7 +883,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
> 				bkt->signatures[i].sig != NULL_SIGNATURE) {
> 			k = (struct rte_hash_key *) ((char *)keys +
> 					bkt->key_idx[i] * h->key_entry_size);
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+			if (memcmp(key, k->key, h->key_len) == 0) {
>+#else
> 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
>+#endif
> 				remove_entry(h, bkt, i);
> 
> 				/*
>@@ -980,7 +1004,11 @@ lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * co
> 	unsigned hit;
> 	unsigned key_idx;
> 
>+#ifdef RTE_LIB_MP_NO_FUNC_PTR
>+	hit = !memcmp(key_slot->key, keys[idx], h->key_len);
>+#else
> 	hit = !h->rte_hash_cmp_eq(key_slot->key, keys[idx], h->key_len);
>+#endif
> 	if (data != NULL)
> 		data[idx] = key_slot->pdata;
> 
>-- 
>2.5.0
>





 





 



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-14  2:16 [PATCH] hash: fix memcmp function pointer in multi-process environment Dhana Eadala
  2016-03-14  2:30 ` 张伟
  2016-03-14  4:38 ` 张伟
@ 2016-03-22 11:42 ` Thomas Monjalon
  2016-03-22 19:53   ` De Lara Guarch, Pablo
  2 siblings, 1 reply; 16+ messages in thread
From: Thomas Monjalon @ 2016-03-22 11:42 UTC (permalink / raw)
  To: pablo.de.lara.guarch, sergio.gonzalez.monroy
  Cc: dev, Dhana Eadala, bruce.richardson, michael.qiu

Hi,

Pablo, Sergio, please could you help with this issue?

2016-03-13 22:16, Dhana Eadala:
> We found a problem in dpdk-2.2 using under multi-process environment.
> Here is the brief description how we are using the dpdk:
> 
> We have two processes proc1, proc2 using dpdk. These proc1 and proc2 are
> two different compiled binaries.
> proc1 is started as primary process and proc2 as secondary process.
> 
> proc1:
> Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash structure.
> As part of this, this api initalized the rte_hash structure and set the
> srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1 address space.
> 
> proc2:
> calls srcHash =  rte_hash_find_existing("src_hash_name").
> This function call returns the rte_hash created by proc1.
> This srcHash->rte_hash_cmp_eq still points to the address of
> memcmp() from proc1 address space.
> Later proc2  calls
> rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
> rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(),
> which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
> This leads to a crash as h->rte_hash_cmp_eq is an address
> from proc1 address space and is invalid address in proc2 address space.
> 
> We found, from dpdk documentation, that
> 
> "
>  The use of function pointers between multiple processes
>  running based of different compiled
>  binaries is not supported, since the location of a given function
>  in one process may be different to
>  its location in a second. This prevents the librte_hash library
>  from behaving properly as in a  multi-
>  threaded instance, since it uses a pointer to the hash function internally.
> 
>  To work around this issue, it is recommended that
>  multi-process applications perform the hash
>  calculations by directly calling the hashing function
>  from the code and then using the
>  rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions
>  instead of the functions which do
>  the hashing internally, such as rte_hash_add()/rte_hash_lookup().
> "
> 
> We did follow the recommended steps by invoking rte_hash_lookup_with_hash().
> It was no issue up to and including dpdk-2.0.
> In later releases started crashing because rte_hash_cmp_eq is
> introduced in dpdk-2.1
> 
> We fixed it with the following patch and would like to
> submit the patch to dpdk.org.
> Patch is created such that, if anyone wanted to use dpdk in
> multi-process environment with function pointers not shared, they need to
> define RTE_LIB_MP_NO_FUNC_PTR in their Makefile.
> Without defining this flag in Makefile, it works as it is now.

Introducing #ifdef RTE_LIB_MP_NO_FUNC_PTR is not recommended.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-22 11:42 ` Thomas Monjalon
@ 2016-03-22 19:53   ` De Lara Guarch, Pablo
  2016-04-19  4:58     ` rte_hash_del_key crash " 张伟
  0 siblings, 1 reply; 16+ messages in thread
From: De Lara Guarch, Pablo @ 2016-03-22 19:53 UTC (permalink / raw)
  To: Thomas Monjalon, Gonzalez Monroy, Sergio
  Cc: dev, Dhana Eadala, Richardson, Bruce, Qiu, Michael

Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Tuesday, March 22, 2016 11:42 AM
> To: De Lara Guarch, Pablo; Gonzalez Monroy, Sergio
> Cc: dev@dpdk.org; Dhana Eadala; Richardson, Bruce; Qiu, Michael
> Subject: Re: [dpdk-dev] [PATCH] hash: fix memcmp function pointer in multi-
> process environment
> 
> Hi,
> 
> Pablo, Sergio, please could you help with this issue?

I agree this is not the best way to fix this. I will try to have a fix without having to use ifdefs.

Thanks,
Pablo
> 
> 2016-03-13 22:16, Dhana Eadala:
> > We found a problem in dpdk-2.2 using under multi-process environment.
> > Here is the brief description how we are using the dpdk:
> >
> > We have two processes proc1, proc2 using dpdk. These proc1 and proc2
> are
> > two different compiled binaries.
> > proc1 is started as primary process and proc2 as secondary process.
> >
> > proc1:
> > Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash
> structure.
> > As part of this, this api initalized the rte_hash structure and set the
> > srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1
> address space.
> >
> > proc2:
> > calls srcHash =  rte_hash_find_existing("src_hash_name").
> > This function call returns the rte_hash created by proc1.
> > This srcHash->rte_hash_cmp_eq still points to the address of
> > memcmp() from proc1 address space.
> > Later proc2  calls
> > rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
> > rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(),
> > which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
> > This leads to a crash as h->rte_hash_cmp_eq is an address
> > from proc1 address space and is invalid address in proc2 address space.
> >
> > We found, from dpdk documentation, that
> >
> > "
> >  The use of function pointers between multiple processes
> >  running based of different compiled
> >  binaries is not supported, since the location of a given function
> >  in one process may be different to
> >  its location in a second. This prevents the librte_hash library
> >  from behaving properly as in a  multi-
> >  threaded instance, since it uses a pointer to the hash function internally.
> >
> >  To work around this issue, it is recommended that
> >  multi-process applications perform the hash
> >  calculations by directly calling the hashing function
> >  from the code and then using the
> >  rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions
> >  instead of the functions which do
> >  the hashing internally, such as rte_hash_add()/rte_hash_lookup().
> > "
> >
> > We did follow the recommended steps by invoking
> rte_hash_lookup_with_hash().
> > It was no issue up to and including dpdk-2.0.
> > In later releases started crashing because rte_hash_cmp_eq is
> > introduced in dpdk-2.1
> >
> > We fixed it with the following patch and would like to
> > submit the patch to dpdk.org.
> > Patch is created such that, if anyone wanted to use dpdk in
> > multi-process environment with function pointers not shared, they need to
> > define RTE_LIB_MP_NO_FUNC_PTR in their Makefile.
> > Without defining this flag in Makefile, it works as it is now.
> 
> Introducing #ifdef RTE_LIB_MP_NO_FUNC_PTR is not recommended.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-15  1:02     ` 张伟
@ 2016-03-24 14:00       ` De Lara Guarch, Pablo
  2016-04-01 13:14         ` Thomas Monjalon
  0 siblings, 1 reply; 16+ messages in thread
From: De Lara Guarch, Pablo @ 2016-03-24 14:00 UTC (permalink / raw)
  To: zhangwqh, Dhananjaya Eadala; +Cc: Richardson, Bruce, Qiu, Michael, dev

Hi,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of ??
> Sent: Tuesday, March 15, 2016 1:03 AM
> To: Dhananjaya Eadala
> Cc: Richardson, Bruce; De Lara Guarch, Pablo; Qiu, Michael; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] hash: fix memcmp function pointer in multi-
> process environment
> 
> Thanks so much for your patch! Your patch exactly solves my bug. :)

I just sent an alternative patch to solve the issue, without requiring #ifdefs.
Could you take a look at it and check that works for you?

Thanks,
Pablo



^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-24 14:00       ` De Lara Guarch, Pablo
@ 2016-04-01 13:14         ` Thomas Monjalon
  0 siblings, 0 replies; 16+ messages in thread
From: Thomas Monjalon @ 2016-04-01 13:14 UTC (permalink / raw)
  To: Dhananjaya Eadala
  Cc: dev, De Lara Guarch, Pablo, zhangwqh, Richardson, Bruce, Qiu, Michael

2016-03-24 14:00, De Lara Guarch, Pablo:
> Hi,
> 
> I just sent an alternative patch to solve the issue, without requiring #ifdefs.
> Could you take a look at it and check that works for you?

Please Dhananjaya Eadala,
Could you confirm the bug is fixed with this patch:
	http://dpdk.org/patch/11822

^ permalink raw reply	[flat|nested] 16+ messages in thread

* rte_hash_del_key crash in multi-process environment
  2016-03-22 19:53   ` De Lara Guarch, Pablo
@ 2016-04-19  4:58     ` 张伟
  2016-04-19  7:39       ` De Lara Guarch, Pablo
  0 siblings, 1 reply; 16+ messages in thread
From: 张伟 @ 2016-04-19  4:58 UTC (permalink / raw)
  To: De Lara Guarch, Pablo
  Cc: Thomas Monjalon, Gonzalez Monroy, Sergio, dev, Dhana Eadala,
	Richardson, Bruce, Qiu, Michael

Hi all, 


In the multi-process environment, before I met a bug when calling rte_hash_lookup_with_hash. Using Dhana's patch fixed my problem. Now I need to remove the flow in the multi-process environment, the system gets crashed when calling rte_hash_del_key function. The following is the gdb trace. Does anybody meet this problem or know how to fix it?




Program received signal SIGILL, Illegal instruction.

0x000000000048a0dd in rte_port_ring_reader_frag_free (port=0x7ffe113d4100) at /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_port/rte_port_frag.c:266

266            return -1;

(gdb) bt

#0  0x000000000048a0dd in rte_port_ring_reader_frag_free (port=0x7ffe113d4100) at /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_port/rte_port_frag.c:266

#1  0x000000000049c537 in rte_hash_del_key (h=0x7ffe113d4100, key=0x7ffe092e1000)

   at /home/zhangwei1984/timopenNetVM/dpdk-2.2.0/lib/librte_hash/rte_cuckoo_hash.c:917

#2  0x000000000043716a in onvm_ft_remove_key (table=0x7ffe113c3e80, key=0x7ffe092e1000) at /home/zhangwei1984/onvm-shared-cpu/onvm/shared/onvm_flow_table.c:160

#3  0x000000000043767e in onvm_flow_dir_del_and_free_key (key=0x7ffe092e1000) at /home/zhangwei1984/onvm-shared-cpu/onvm/shared/onvm_flow_dir.c:144

#4  0x0000000000437619 in onvm_flow_dir_del_key (key=0x7ffe092e1000) at /home/zhangwei1984/onvm-shared-cpu/onvm/shared/onvm_flow_dir.c:128

#5  0x0000000000423ded in remove_flow_rule (idx=3) at /home/zhangwei1984/onvm-shared-cpu/examples/flow_dir/flow_dir.c:130

#6  0x0000000000423e44 in clear_stat_remove_flow_rule (nf_info=0x7fff3e652100) at /home/zhangwei1984/onvm-shared-cpu/examples/flow_dir/flow_dir.c:145

#7  0x00000000004247e3 in alloc_nfs_install_flow_rule (services=0xd66e90 <services>, pkt=0x7ffe13f56400)

   at /home/zhangwei1984/onvm-shared-cpu/examples/flow_dir/flow_dir.c:186

#8  0x0000000000424bdb in packet_handler (pkt=0x7ffe13f56400, meta=0x7ffe13f56440) at /home/zhangwei1984/onvm-shared-cpu/examples/flow_dir/flow_dir.c:294

#9  0x000000000043001d in onvm_nf_run (info=0x7fff3e651b00, handler=0x424b21 <packet_handler>) at /home/zhangwei1984/onvm-shared-cpu/onvm/onvm_nf/onvm_nflib.c:462

#10 0x0000000000424cc2 in main (argc=3, argv=0x7fffffffe660) at /home/zhangwei1984/onvm-shared-cpu/examples/flow_dir/flow_dir.c:323








At 2016-03-23 03:53:43, "De Lara Guarch, Pablo" <pablo.de.lara.guarch@intel.com> wrote:
>Hi Thomas,
>
>> -----Original Message-----
>> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
>> Sent: Tuesday, March 22, 2016 11:42 AM
>> To: De Lara Guarch, Pablo; Gonzalez Monroy, Sergio
>> Cc: dev@dpdk.org; Dhana Eadala; Richardson, Bruce; Qiu, Michael
>> Subject: Re: [dpdk-dev] [PATCH] hash: fix memcmp function pointer in multi-
>> process environment
>> 
>> Hi,
>> 
>> Pablo, Sergio, please could you help with this issue?
>
>I agree this is not the best way to fix this. I will try to have a fix without having to use ifdefs.
>
>Thanks,
>Pablo
>> 
>> 2016-03-13 22:16, Dhana Eadala:
>> > We found a problem in dpdk-2.2 using under multi-process environment.
>> > Here is the brief description how we are using the dpdk:
>> >
>> > We have two processes proc1, proc2 using dpdk. These proc1 and proc2
>> are
>> > two different compiled binaries.
>> > proc1 is started as primary process and proc2 as secondary process.
>> >
>> > proc1:
>> > Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash
>> structure.
>> > As part of this, this api initalized the rte_hash structure and set the
>> > srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1
>> address space.
>> >
>> > proc2:
>> > calls srcHash =  rte_hash_find_existing("src_hash_name").
>> > This function call returns the rte_hash created by proc1.
>> > This srcHash->rte_hash_cmp_eq still points to the address of
>> > memcmp() from proc1 address space.
>> > Later proc2  calls
>> > rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
>> > rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(),
>> > which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
>> > This leads to a crash as h->rte_hash_cmp_eq is an address
>> > from proc1 address space and is invalid address in proc2 address space.
>> >
>> > We found, from dpdk documentation, that
>> >
>> > "
>> >  The use of function pointers between multiple processes
>> >  running based of different compiled
>> >  binaries is not supported, since the location of a given function
>> >  in one process may be different to
>> >  its location in a second. This prevents the librte_hash library
>> >  from behaving properly as in a  multi-
>> >  threaded instance, since it uses a pointer to the hash function internally.
>> >
>> >  To work around this issue, it is recommended that
>> >  multi-process applications perform the hash
>> >  calculations by directly calling the hashing function
>> >  from the code and then using the
>> >  rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions
>> >  instead of the functions which do
>> >  the hashing internally, such as rte_hash_add()/rte_hash_lookup().
>> > "
>> >
>> > We did follow the recommended steps by invoking
>> rte_hash_lookup_with_hash().
>> > It was no issue up to and including dpdk-2.0.
>> > In later releases started crashing because rte_hash_cmp_eq is
>> > introduced in dpdk-2.1
>> >
>> > We fixed it with the following patch and would like to
>> > submit the patch to dpdk.org.
>> > Patch is created such that, if anyone wanted to use dpdk in
>> > multi-process environment with function pointers not shared, they need to
>> > define RTE_LIB_MP_NO_FUNC_PTR in their Makefile.
>> > Without defining this flag in Makefile, it works as it is now.
>> 
>> Introducing #ifdef RTE_LIB_MP_NO_FUNC_PTR is not recommended.
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: rte_hash_del_key crash in multi-process environment
  2016-04-19  4:58     ` rte_hash_del_key crash " 张伟
@ 2016-04-19  7:39       ` De Lara Guarch, Pablo
  2016-04-19 21:12         ` 张伟
  0 siblings, 1 reply; 16+ messages in thread
From: De Lara Guarch, Pablo @ 2016-04-19  7:39 UTC (permalink / raw)
  To: zhangwqh
  Cc: Thomas Monjalon, Gonzalez Monroy, Sergio, dev, Dhana Eadala,
	Richardson, Bruce, Qiu, Michael

Hi,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of ??
> Sent: Tuesday, April 19, 2016 5:58 AM
> To: De Lara Guarch, Pablo
> Cc: Thomas Monjalon; Gonzalez Monroy, Sergio; dev@dpdk.org; Dhana
> Eadala; Richardson, Bruce; Qiu, Michael
> Subject: [dpdk-dev] rte_hash_del_key crash in multi-process environment
> 
> Hi all,
> 
> 
> In the multi-process environment, before I met a bug when calling
> rte_hash_lookup_with_hash. Using Dhana's patch fixed my problem. Now I
> need to remove the flow in the multi-process environment, the system gets
> crashed when calling rte_hash_del_key function. The following is the gdb
> trace. Does anybody meet this problem or know how to fix it?

First of all, another fix for the multi-process support was implemented and merged for 16.04 release,
so take a look at it, if you can.
Regarding the rte_hash_del_key() function, you should use rte_hash_del_key_with_hash,
if you want to use it in a multi-process environment (as you did for the lookup function).

Thanks,
Pablo

> 
> 
> 
> 
> Program received signal SIGILL, Illegal instruction.
> 
> 0x000000000048a0dd in rte_port_ring_reader_frag_free
> (port=0x7ffe113d4100) at /home/zhangwei1984/timopenNetVM/dpdk-
> 2.2.0/lib/librte_port/rte_port_frag.c:266
> 
> 266            return -1;
> 
> (gdb) bt
> 
> #0  0x000000000048a0dd in rte_port_ring_reader_frag_free
> (port=0x7ffe113d4100) at /home/zhangwei1984/timopenNetVM/dpdk-
> 2.2.0/lib/librte_port/rte_port_frag.c:266
> 
> #1  0x000000000049c537 in rte_hash_del_key (h=0x7ffe113d4100,
> key=0x7ffe092e1000)
> 
>    at /home/zhangwei1984/timopenNetVM/dpdk-
> 2.2.0/lib/librte_hash/rte_cuckoo_hash.c:917
> 
> #2  0x000000000043716a in onvm_ft_remove_key (table=0x7ffe113c3e80,
> key=0x7ffe092e1000) at /home/zhangwei1984/onvm-shared-
> cpu/onvm/shared/onvm_flow_table.c:160
> 
> #3  0x000000000043767e in onvm_flow_dir_del_and_free_key
> (key=0x7ffe092e1000) at /home/zhangwei1984/onvm-shared-
> cpu/onvm/shared/onvm_flow_dir.c:144
> 
> #4  0x0000000000437619 in onvm_flow_dir_del_key (key=0x7ffe092e1000) at
> /home/zhangwei1984/onvm-shared-
> cpu/onvm/shared/onvm_flow_dir.c:128
> 
> #5  0x0000000000423ded in remove_flow_rule (idx=3) at
> /home/zhangwei1984/onvm-shared-cpu/examples/flow_dir/flow_dir.c:130
> 
> #6  0x0000000000423e44 in clear_stat_remove_flow_rule
> (nf_info=0x7fff3e652100) at /home/zhangwei1984/onvm-shared-
> cpu/examples/flow_dir/flow_dir.c:145
> 
> #7  0x00000000004247e3 in alloc_nfs_install_flow_rule (services=0xd66e90
> <services>, pkt=0x7ffe13f56400)
> 
>    at /home/zhangwei1984/onvm-shared-
> cpu/examples/flow_dir/flow_dir.c:186
> 
> #8  0x0000000000424bdb in packet_handler (pkt=0x7ffe13f56400,
> meta=0x7ffe13f56440) at /home/zhangwei1984/onvm-shared-
> cpu/examples/flow_dir/flow_dir.c:294
> 
> #9  0x000000000043001d in onvm_nf_run (info=0x7fff3e651b00,
> handler=0x424b21 <packet_handler>) at /home/zhangwei1984/onvm-
> shared-cpu/onvm/onvm_nf/onvm_nflib.c:462
> 
> #10 0x0000000000424cc2 in main (argc=3, argv=0x7fffffffe660) at
> /home/zhangwei1984/onvm-shared-cpu/examples/flow_dir/flow_dir.c:323
> 
> 
> 
> 
> 
> 
> 
> 
> At 2016-03-23 03:53:43, "De Lara Guarch, Pablo"
> <pablo.de.lara.guarch@intel.com> wrote:
> >Hi Thomas,
> >
> >> -----Original Message-----
> >> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> >> Sent: Tuesday, March 22, 2016 11:42 AM
> >> To: De Lara Guarch, Pablo; Gonzalez Monroy, Sergio
> >> Cc: dev@dpdk.org; Dhana Eadala; Richardson, Bruce; Qiu, Michael
> >> Subject: Re: [dpdk-dev] [PATCH] hash: fix memcmp function pointer in
> multi-
> >> process environment
> >>
> >> Hi,
> >>
> >> Pablo, Sergio, please could you help with this issue?
> >
> >I agree this is not the best way to fix this. I will try to have a fix without
> having to use ifdefs.
> >
> >Thanks,
> >Pablo
> >>
> >> 2016-03-13 22:16, Dhana Eadala:
> >> > We found a problem in dpdk-2.2 using under multi-process
> environment.
> >> > Here is the brief description how we are using the dpdk:
> >> >
> >> > We have two processes proc1, proc2 using dpdk. These proc1 and proc2
> >> are
> >> > two different compiled binaries.
> >> > proc1 is started as primary process and proc2 as secondary process.
> >> >
> >> > proc1:
> >> > Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash
> >> structure.
> >> > As part of this, this api initalized the rte_hash structure and set the
> >> > srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1
> >> address space.
> >> >
> >> > proc2:
> >> > calls srcHash =  rte_hash_find_existing("src_hash_name").
> >> > This function call returns the rte_hash created by proc1.
> >> > This srcHash->rte_hash_cmp_eq still points to the address of
> >> > memcmp() from proc1 address space.
> >> > Later proc2  calls
> >> > rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
> >> > rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(),
> >> > which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
> >> > This leads to a crash as h->rte_hash_cmp_eq is an address
> >> > from proc1 address space and is invalid address in proc2 address space.
> >> >
> >> > We found, from dpdk documentation, that
> >> >
> >> > "
> >> >  The use of function pointers between multiple processes
> >> >  running based of different compiled
> >> >  binaries is not supported, since the location of a given function
> >> >  in one process may be different to
> >> >  its location in a second. This prevents the librte_hash library
> >> >  from behaving properly as in a  multi-
> >> >  threaded instance, since it uses a pointer to the hash function
> internally.
> >> >
> >> >  To work around this issue, it is recommended that
> >> >  multi-process applications perform the hash
> >> >  calculations by directly calling the hashing function
> >> >  from the code and then using the
> >> >  rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions
> >> >  instead of the functions which do
> >> >  the hashing internally, such as rte_hash_add()/rte_hash_lookup().
> >> > "
> >> >
> >> > We did follow the recommended steps by invoking
> >> rte_hash_lookup_with_hash().
> >> > It was no issue up to and including dpdk-2.0.
> >> > In later releases started crashing because rte_hash_cmp_eq is
> >> > introduced in dpdk-2.1
> >> >
> >> > We fixed it with the following patch and would like to
> >> > submit the patch to dpdk.org.
> >> > Patch is created such that, if anyone wanted to use dpdk in
> >> > multi-process environment with function pointers not shared, they need
> to
> >> > define RTE_LIB_MP_NO_FUNC_PTR in their Makefile.
> >> > Without defining this flag in Makefile, it works as it is now.
> >>
> >> Introducing #ifdef RTE_LIB_MP_NO_FUNC_PTR is not recommended.
> >

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: rte_hash_del_key crash in multi-process environment
  2016-04-19  7:39       ` De Lara Guarch, Pablo
@ 2016-04-19 21:12         ` 张伟
  0 siblings, 0 replies; 16+ messages in thread
From: 张伟 @ 2016-04-19 21:12 UTC (permalink / raw)
  To: De Lara Guarch, Pablo
  Cc: Thomas Monjalon, Gonzalez Monroy, Sergio, dev, Dhana Eadala,
	Richardson, Bruce, Qiu, Michael

Thanks so much! That fixes my problem. 








At 2016-04-19 15:39:16, "De Lara Guarch, Pablo" <pablo.de.lara.guarch@intel.com> wrote:
>Hi,
>
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of ??
>> Sent: Tuesday, April 19, 2016 5:58 AM
>> To: De Lara Guarch, Pablo
>> Cc: Thomas Monjalon; Gonzalez Monroy, Sergio; dev@dpdk.org; Dhana
>> Eadala; Richardson, Bruce; Qiu, Michael
>> Subject: [dpdk-dev] rte_hash_del_key crash in multi-process environment
>> 
>> Hi all,
>> 
>> 
>> In the multi-process environment, before I met a bug when calling
>> rte_hash_lookup_with_hash. Using Dhana's patch fixed my problem. Now I
>> need to remove the flow in the multi-process environment, the system gets
>> crashed when calling rte_hash_del_key function. The following is the gdb
>> trace. Does anybody meet this problem or know how to fix it?
>
>First of all, another fix for the multi-process support was implemented and merged for 16.04 release,
>so take a look at it, if you can.
>Regarding the rte_hash_del_key() function, you should use rte_hash_del_key_with_hash,
>if you want to use it in a multi-process environment (as you did for the lookup function).
>
>Thanks,
>Pablo
>
>> 
>> 
>> 
>> 
>> Program received signal SIGILL, Illegal instruction.
>> 
>> 0x000000000048a0dd in rte_port_ring_reader_frag_free
>> (port=0x7ffe113d4100) at /home/zhangwei1984/timopenNetVM/dpdk-
>> 2.2.0/lib/librte_port/rte_port_frag.c:266
>> 
>> 266            return -1;
>> 
>> (gdb) bt
>> 
>> #0  0x000000000048a0dd in rte_port_ring_reader_frag_free
>> (port=0x7ffe113d4100) at /home/zhangwei1984/timopenNetVM/dpdk-
>> 2.2.0/lib/librte_port/rte_port_frag.c:266
>> 
>> #1  0x000000000049c537 in rte_hash_del_key (h=0x7ffe113d4100,
>> key=0x7ffe092e1000)
>> 
>>    at /home/zhangwei1984/timopenNetVM/dpdk-
>> 2.2.0/lib/librte_hash/rte_cuckoo_hash.c:917
>> 
>> #2  0x000000000043716a in onvm_ft_remove_key (table=0x7ffe113c3e80,
>> key=0x7ffe092e1000) at /home/zhangwei1984/onvm-shared-
>> cpu/onvm/shared/onvm_flow_table.c:160
>> 
>> #3  0x000000000043767e in onvm_flow_dir_del_and_free_key
>> (key=0x7ffe092e1000) at /home/zhangwei1984/onvm-shared-
>> cpu/onvm/shared/onvm_flow_dir.c:144
>> 
>> #4  0x0000000000437619 in onvm_flow_dir_del_key (key=0x7ffe092e1000) at
>> /home/zhangwei1984/onvm-shared-
>> cpu/onvm/shared/onvm_flow_dir.c:128
>> 
>> #5  0x0000000000423ded in remove_flow_rule (idx=3) at
>> /home/zhangwei1984/onvm-shared-cpu/examples/flow_dir/flow_dir.c:130
>> 
>> #6  0x0000000000423e44 in clear_stat_remove_flow_rule
>> (nf_info=0x7fff3e652100) at /home/zhangwei1984/onvm-shared-
>> cpu/examples/flow_dir/flow_dir.c:145
>> 
>> #7  0x00000000004247e3 in alloc_nfs_install_flow_rule (services=0xd66e90
>> <services>, pkt=0x7ffe13f56400)
>> 
>>    at /home/zhangwei1984/onvm-shared-
>> cpu/examples/flow_dir/flow_dir.c:186
>> 
>> #8  0x0000000000424bdb in packet_handler (pkt=0x7ffe13f56400,
>> meta=0x7ffe13f56440) at /home/zhangwei1984/onvm-shared-
>> cpu/examples/flow_dir/flow_dir.c:294
>> 
>> #9  0x000000000043001d in onvm_nf_run (info=0x7fff3e651b00,
>> handler=0x424b21 <packet_handler>) at /home/zhangwei1984/onvm-
>> shared-cpu/onvm/onvm_nf/onvm_nflib.c:462
>> 
>> #10 0x0000000000424cc2 in main (argc=3, argv=0x7fffffffe660) at
>> /home/zhangwei1984/onvm-shared-cpu/examples/flow_dir/flow_dir.c:323
>> 
>> 
>> 
>> 
>> 
>> 
>> 
>> 
>> At 2016-03-23 03:53:43, "De Lara Guarch, Pablo"
>> <pablo.de.lara.guarch@intel.com> wrote:
>> >Hi Thomas,
>> >
>> >> -----Original Message-----
>> >> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
>> >> Sent: Tuesday, March 22, 2016 11:42 AM
>> >> To: De Lara Guarch, Pablo; Gonzalez Monroy, Sergio
>> >> Cc: dev@dpdk.org; Dhana Eadala; Richardson, Bruce; Qiu, Michael
>> >> Subject: Re: [dpdk-dev] [PATCH] hash: fix memcmp function pointer in
>> multi-
>> >> process environment
>> >>
>> >> Hi,
>> >>
>> >> Pablo, Sergio, please could you help with this issue?
>> >
>> >I agree this is not the best way to fix this. I will try to have a fix without
>> having to use ifdefs.
>> >
>> >Thanks,
>> >Pablo
>> >>
>> >> 2016-03-13 22:16, Dhana Eadala:
>> >> > We found a problem in dpdk-2.2 using under multi-process
>> environment.
>> >> > Here is the brief description how we are using the dpdk:
>> >> >
>> >> > We have two processes proc1, proc2 using dpdk. These proc1 and proc2
>> >> are
>> >> > two different compiled binaries.
>> >> > proc1 is started as primary process and proc2 as secondary process.
>> >> >
>> >> > proc1:
>> >> > Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash
>> >> structure.
>> >> > As part of this, this api initalized the rte_hash structure and set the
>> >> > srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1
>> >> address space.
>> >> >
>> >> > proc2:
>> >> > calls srcHash =  rte_hash_find_existing("src_hash_name").
>> >> > This function call returns the rte_hash created by proc1.
>> >> > This srcHash->rte_hash_cmp_eq still points to the address of
>> >> > memcmp() from proc1 address space.
>> >> > Later proc2  calls
>> >> > rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
>> >> > rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(),
>> >> > which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
>> >> > This leads to a crash as h->rte_hash_cmp_eq is an address
>> >> > from proc1 address space and is invalid address in proc2 address space.
>> >> >
>> >> > We found, from dpdk documentation, that
>> >> >
>> >> > "
>> >> >  The use of function pointers between multiple processes
>> >> >  running based of different compiled
>> >> >  binaries is not supported, since the location of a given function
>> >> >  in one process may be different to
>> >> >  its location in a second. This prevents the librte_hash library
>> >> >  from behaving properly as in a  multi-
>> >> >  threaded instance, since it uses a pointer to the hash function
>> internally.
>> >> >
>> >> >  To work around this issue, it is recommended that
>> >> >  multi-process applications perform the hash
>> >> >  calculations by directly calling the hashing function
>> >> >  from the code and then using the
>> >> >  rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions
>> >> >  instead of the functions which do
>> >> >  the hashing internally, such as rte_hash_add()/rte_hash_lookup().
>> >> > "
>> >> >
>> >> > We did follow the recommended steps by invoking
>> >> rte_hash_lookup_with_hash().
>> >> > It was no issue up to and including dpdk-2.0.
>> >> > In later releases started crashing because rte_hash_cmp_eq is
>> >> > introduced in dpdk-2.1
>> >> >
>> >> > We fixed it with the following patch and would like to
>> >> > submit the patch to dpdk.org.
>> >> > Patch is created such that, if anyone wanted to use dpdk in
>> >> > multi-process environment with function pointers not shared, they need
>> to
>> >> > define RTE_LIB_MP_NO_FUNC_PTR in their Makefile.
>> >> > Without defining this flag in Makefile, it works as it is now.
>> >>
>> >> Introducing #ifdef RTE_LIB_MP_NO_FUNC_PTR is not recommended.
>> >

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-04  1:00   ` Dhananjaya Reddy Eadala
@ 2016-03-09 21:36     ` Dhananjaya Reddy Eadala
  0 siblings, 0 replies; 16+ messages in thread
From: Dhananjaya Reddy Eadala @ 2016-03-09 21:36 UTC (permalink / raw)
  To: Qiu, Michael; +Cc: dev

Hi Michael

If you agree on the #ifdef protection I explained in my previous mail, I
will re-submit the patch with refactoring the the commit log with less than
80 characters per line.

Thanks
Dhana


On Thu, Mar 3, 2016 at 8:00 PM, Dhananjaya Reddy Eadala <edreddy@gmail.com>
wrote:

> Hi Michael
>
> Please see my answers to your comments here.
>
> 1. Sure, I will refactor the commit log to restrict not more than 80
> characters.
>
> 2. Not sure how we can ifdef at the location you mentioned. Can you please
> elaborate more on this.
>     We already have similar ifdef protection to what you suggested and
> with that protection memcmp is assigned.
>     Problem is in using the function pointer to call the compare function.
>     So we need protection for invoking compare function, under
> multi-process environment.
>
> 3. I couldn't come up with any other idea to protect this function pointer
> invocation.
>
> Thanks
> Dhana
>
>
>
>
> On Thu, Mar 3, 2016 at 12:44 AM, Qiu, Michael <michael.qiu@intel.com>
> wrote:
>
>> On 3/3/2016 11:36 AM, Dhana Eadala wrote:
>> > We found a problem in dpdk-2.2 using under multi-process environment.
>> > Here is the brief description how we are using the dpdk:
>> >
>> > We have two processes proc1, proc2 using dpdk. These proc1 and proc2
>> are two different compiled binaries.
>> > proc1 is started as primary process and proc2 as secondary process.
>> >
>> > proc1:
>> > Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash
>> structure.
>> > As part of this, this api initalized the rte_hash structure and set the
>> srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1 address
>> space.
>> >
>> > proc2:
>> > calls srcHash =  rte_hash_find_existing("src_hash_name"). This returns
>> the rte_hash created by proc1.
>> > This srcHash->rte_hash_cmp_eq still points to the address of memcmp()
>> from proc1 address space.
>> > Later proc2  calls rte_hash_lookup_with_hash(srcHash, (const void*)
>> &key, key.sig);
>> > Under the hood, rte_hash_lookup_with_hash() invokes
>> __rte_hash_lookup_with_hash(), which in turn calls h->rte_hash_cmp_eq(key,
>> k->key, h->key_len).
>> > This leads to a crash as h->rte_hash_cmp_eq is an address from proc1
>> address space and is invalid address in proc2 address space.
>> >
>> > We found, from dpdk documentation, that
>> >
>> > "
>> >  The use of function pointers between multiple processes running based
>> of different compiled
>> >  binaries is not supported, since the location of a given function in
>> one process may be different to
>> >  its location in a second. This prevents the librte_hash library from
>> behaving properly as in a  multi-
>> >  threaded instance, since it uses a pointer to the hash function
>> internally.
>> >
>> >  To work around this issue, it is recommended that multi-process
>> applications perform the hash
>> >  calculations by directly calling the hashing function from the code
>> and then using the
>> >  rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions instead
>> of the functions which do
>> >  the hashing internally, such as rte_hash_add()/rte_hash_lookup().
>> > "
>> >
>> > We did follow the recommended steps by invoking
>> rte_hash_lookup_with_hash().
>> > It was no issue up to and including dpdk-2.0. In later releases started
>> crashing because rte_hash_cmp_eq is introduced in dpdk-2.1
>> >
>> > We fixed it with the following patch and would like to submit the patch
>> to dpdk.org.
>> > Patch is created such that, if anyone wanted to use dpdk in
>> multi-process environment with function pointers not shared, they need to
>> > define RTE_LIB_MP_NO_FUNC_PTR in their Makefile. Without defining this
>> flag in Makefile, it works as it is now.
>> >
>> > Signed-off-by: Dhana Eadala <edreddy@gmail.com>
>> > ---
>> >
>>
>> Some comments:
>>
>> 1.  your commit log need to refactor, better to limit every line less
>> than 80 character.
>>
>> 2. I think you could add the ifdef here in
>> lib/librte_hash/rte_cuckoo_hash.c :
>> /*
>>  * If x86 architecture is used, select appropriate compare function,
>>  * which may use x86 instrinsics, otherwise use memcmp
>>  */
>> #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) ||\
>>      defined(RTE_ARCH_X86_X32) || defined(RTE_ARCH_ARM64)
>>     /* Select function to compare keys */
>>     switch (params->key_len) {
>>     case 16:
>>         h->rte_hash_cmp_eq = rte_hash_k16_cmp_eq;
>>         break;
>> [...]
>>         break;
>>     default:
>>         /* If key is not multiple of 16, use generic memcmp */
>>         h->rte_hash_cmp_eq = memcmp;
>>     }
>> #else
>>     h->rte_hash_cmp_eq = memcmp;
>> #endif
>>
>> So that could remove other #ifdef in those lines.
>>
>> 3. I don't think ask others to write RTE_LIB_MP_NO_FUNC_PTR in makefile
>> is a good idea, if you really want to do that, please add a doc so that
>> others could know it.
>>
>> Thanks,
>> Michael
>>
>
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-03  5:44 ` Qiu, Michael
@ 2016-03-04  1:00   ` Dhananjaya Reddy Eadala
  2016-03-09 21:36     ` Dhananjaya Reddy Eadala
  0 siblings, 1 reply; 16+ messages in thread
From: Dhananjaya Reddy Eadala @ 2016-03-04  1:00 UTC (permalink / raw)
  To: Qiu, Michael; +Cc: dev

Hi Michael

Please see my answers to your comments here.

1. Sure, I will refactor the commit log to restrict not more than 80
characters.

2. Not sure how we can ifdef at the location you mentioned. Can you please
elaborate more on this.
    We already have similar ifdef protection to what you suggested and with
that protection memcmp is assigned.
    Problem is in using the function pointer to call the compare function.
    So we need protection for invoking compare function, under
multi-process environment.

3. I couldn't come up with any other idea to protect this function pointer
invocation.

Thanks
Dhana




On Thu, Mar 3, 2016 at 12:44 AM, Qiu, Michael <michael.qiu@intel.com> wrote:

> On 3/3/2016 11:36 AM, Dhana Eadala wrote:
> > We found a problem in dpdk-2.2 using under multi-process environment.
> > Here is the brief description how we are using the dpdk:
> >
> > We have two processes proc1, proc2 using dpdk. These proc1 and proc2 are
> two different compiled binaries.
> > proc1 is started as primary process and proc2 as secondary process.
> >
> > proc1:
> > Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash
> structure.
> > As part of this, this api initalized the rte_hash structure and set the
> srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1 address
> space.
> >
> > proc2:
> > calls srcHash =  rte_hash_find_existing("src_hash_name"). This returns
> the rte_hash created by proc1.
> > This srcHash->rte_hash_cmp_eq still points to the address of memcmp()
> from proc1 address space.
> > Later proc2  calls rte_hash_lookup_with_hash(srcHash, (const void*)
> &key, key.sig);
> > Under the hood, rte_hash_lookup_with_hash() invokes
> __rte_hash_lookup_with_hash(), which in turn calls h->rte_hash_cmp_eq(key,
> k->key, h->key_len).
> > This leads to a crash as h->rte_hash_cmp_eq is an address from proc1
> address space and is invalid address in proc2 address space.
> >
> > We found, from dpdk documentation, that
> >
> > "
> >  The use of function pointers between multiple processes running based
> of different compiled
> >  binaries is not supported, since the location of a given function in
> one process may be different to
> >  its location in a second. This prevents the librte_hash library from
> behaving properly as in a  multi-
> >  threaded instance, since it uses a pointer to the hash function
> internally.
> >
> >  To work around this issue, it is recommended that multi-process
> applications perform the hash
> >  calculations by directly calling the hashing function from the code and
> then using the
> >  rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions instead
> of the functions which do
> >  the hashing internally, such as rte_hash_add()/rte_hash_lookup().
> > "
> >
> > We did follow the recommended steps by invoking
> rte_hash_lookup_with_hash().
> > It was no issue up to and including dpdk-2.0. In later releases started
> crashing because rte_hash_cmp_eq is introduced in dpdk-2.1
> >
> > We fixed it with the following patch and would like to submit the patch
> to dpdk.org.
> > Patch is created such that, if anyone wanted to use dpdk in
> multi-process environment with function pointers not shared, they need to
> > define RTE_LIB_MP_NO_FUNC_PTR in their Makefile. Without defining this
> flag in Makefile, it works as it is now.
> >
> > Signed-off-by: Dhana Eadala <edreddy@gmail.com>
> > ---
> >
>
> Some comments:
>
> 1.  your commit log need to refactor, better to limit every line less
> than 80 character.
>
> 2. I think you could add the ifdef here in
> lib/librte_hash/rte_cuckoo_hash.c :
> /*
>  * If x86 architecture is used, select appropriate compare function,
>  * which may use x86 instrinsics, otherwise use memcmp
>  */
> #if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) ||\
>      defined(RTE_ARCH_X86_X32) || defined(RTE_ARCH_ARM64)
>     /* Select function to compare keys */
>     switch (params->key_len) {
>     case 16:
>         h->rte_hash_cmp_eq = rte_hash_k16_cmp_eq;
>         break;
> [...]
>         break;
>     default:
>         /* If key is not multiple of 16, use generic memcmp */
>         h->rte_hash_cmp_eq = memcmp;
>     }
> #else
>     h->rte_hash_cmp_eq = memcmp;
> #endif
>
> So that could remove other #ifdef in those lines.
>
> 3. I don't think ask others to write RTE_LIB_MP_NO_FUNC_PTR in makefile
> is a good idea, if you really want to do that, please add a doc so that
> others could know it.
>
> Thanks,
> Michael
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] hash: fix memcmp function pointer in multi-process environment
  2016-03-03  3:35 [PATCH] hash: fix memcmp function pointer " Dhana Eadala
@ 2016-03-03  5:44 ` Qiu, Michael
  2016-03-04  1:00   ` Dhananjaya Reddy Eadala
  0 siblings, 1 reply; 16+ messages in thread
From: Qiu, Michael @ 2016-03-03  5:44 UTC (permalink / raw)
  To: Dhana Eadala, Richardson, Bruce, De Lara Guarch, Pablo; +Cc: dev

On 3/3/2016 11:36 AM, Dhana Eadala wrote:
> We found a problem in dpdk-2.2 using under multi-process environment.
> Here is the brief description how we are using the dpdk:
>
> We have two processes proc1, proc2 using dpdk. These proc1 and proc2 are two different compiled binaries.
> proc1 is started as primary process and proc2 as secondary process.
>
> proc1:
> Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash structure.
> As part of this, this api initalized the rte_hash structure and set the srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1 address space.
>
> proc2:
> calls srcHash =  rte_hash_find_existing("src_hash_name"). This returns the rte_hash created by proc1.
> This srcHash->rte_hash_cmp_eq still points to the address of memcmp() from proc1 address space.
> Later proc2  calls rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
> Under the hood, rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(), which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
> This leads to a crash as h->rte_hash_cmp_eq is an address from proc1 address space and is invalid address in proc2 address space.
>
> We found, from dpdk documentation, that
>
> "
>  The use of function pointers between multiple processes running based of different compiled
>  binaries is not supported, since the location of a given function in one process may be different to
>  its location in a second. This prevents the librte_hash library from behaving properly as in a  multi-
>  threaded instance, since it uses a pointer to the hash function internally.
>
>  To work around this issue, it is recommended that multi-process applications perform the hash
>  calculations by directly calling the hashing function from the code and then using the
>  rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions instead of the functions which do
>  the hashing internally, such as rte_hash_add()/rte_hash_lookup().
> "
>
> We did follow the recommended steps by invoking rte_hash_lookup_with_hash().
> It was no issue up to and including dpdk-2.0. In later releases started crashing because rte_hash_cmp_eq is introduced in dpdk-2.1
>
> We fixed it with the following patch and would like to submit the patch to dpdk.org.
> Patch is created such that, if anyone wanted to use dpdk in multi-process environment with function pointers not shared, they need to
> define RTE_LIB_MP_NO_FUNC_PTR in their Makefile. Without defining this flag in Makefile, it works as it is now.
>
> Signed-off-by: Dhana Eadala <edreddy@gmail.com>
> ---
>

Some comments:

1.  your commit log need to refactor, better to limit every line less
than 80 character.

2. I think you could add the ifdef here in
lib/librte_hash/rte_cuckoo_hash.c :
/*
 * If x86 architecture is used, select appropriate compare function,
 * which may use x86 instrinsics, otherwise use memcmp
 */
#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) ||\
     defined(RTE_ARCH_X86_X32) || defined(RTE_ARCH_ARM64)
    /* Select function to compare keys */
    switch (params->key_len) {
    case 16:
        h->rte_hash_cmp_eq = rte_hash_k16_cmp_eq;
        break;
[...]
        break;
    default:
        /* If key is not multiple of 16, use generic memcmp */
        h->rte_hash_cmp_eq = memcmp;
    }
#else
    h->rte_hash_cmp_eq = memcmp;
#endif

So that could remove other #ifdef in those lines.

3. I don't think ask others to write RTE_LIB_MP_NO_FUNC_PTR in makefile
is a good idea, if you really want to do that, please add a doc so that
others could know it.

Thanks,
Michael

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] hash: fix memcmp function pointer in multi-process environment
@ 2016-03-03  3:35 Dhana Eadala
  2016-03-03  5:44 ` Qiu, Michael
  0 siblings, 1 reply; 16+ messages in thread
From: Dhana Eadala @ 2016-03-03  3:35 UTC (permalink / raw)
  To: bruce.richardson, pablo.de.lara.guarch, michael.qiu; +Cc: dev

We found a problem in dpdk-2.2 using under multi-process environment.
Here is the brief description how we are using the dpdk:

We have two processes proc1, proc2 using dpdk. These proc1 and proc2 are two different compiled binaries.
proc1 is started as primary process and proc2 as secondary process.

proc1:
Calls srcHash = rte_hash_create("src_hash_name") to create rte_hash structure.
As part of this, this api initalized the rte_hash structure and set the srcHash->rte_hash_cmp_eq to the address of memcmp() from proc1 address space.

proc2:
calls srcHash =  rte_hash_find_existing("src_hash_name"). This returns the rte_hash created by proc1.
This srcHash->rte_hash_cmp_eq still points to the address of memcmp() from proc1 address space.
Later proc2  calls rte_hash_lookup_with_hash(srcHash, (const void*) &key, key.sig);
Under the hood, rte_hash_lookup_with_hash() invokes __rte_hash_lookup_with_hash(), which in turn calls h->rte_hash_cmp_eq(key, k->key, h->key_len).
This leads to a crash as h->rte_hash_cmp_eq is an address from proc1 address space and is invalid address in proc2 address space.

We found, from dpdk documentation, that

"
 The use of function pointers between multiple processes running based of different compiled
 binaries is not supported, since the location of a given function in one process may be different to
 its location in a second. This prevents the librte_hash library from behaving properly as in a  multi-
 threaded instance, since it uses a pointer to the hash function internally.

 To work around this issue, it is recommended that multi-process applications perform the hash
 calculations by directly calling the hashing function from the code and then using the
 rte_hash_add_with_hash()/rte_hash_lookup_with_hash() functions instead of the functions which do
 the hashing internally, such as rte_hash_add()/rte_hash_lookup().
"

We did follow the recommended steps by invoking rte_hash_lookup_with_hash().
It was no issue up to and including dpdk-2.0. In later releases started crashing because rte_hash_cmp_eq is introduced in dpdk-2.1

We fixed it with the following patch and would like to submit the patch to dpdk.org.
Patch is created such that, if anyone wanted to use dpdk in multi-process environment with function pointers not shared, they need to
define RTE_LIB_MP_NO_FUNC_PTR in their Makefile. Without defining this flag in Makefile, it works as it is now.

Signed-off-by: Dhana Eadala <edreddy@gmail.com>
---
 lib/librte_hash/rte_cuckoo_hash.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index 3e3167c..0946777 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -594,7 +594,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 				prim_bkt->signatures[i].alt == alt_hash) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					prim_bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp(key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				/* Enqueue index of free slot back in the ring. */
 				enqueue_slot_back(h, cached_free_slots, slot_id);
 				/* Update data */
@@ -614,7 +618,11 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 				sec_bkt->signatures[i].current == alt_hash) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					sec_bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp(key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				/* Enqueue index of free slot back in the ring. */
 				enqueue_slot_back(h, cached_free_slots, slot_id);
 				/* Update data */
@@ -725,7 +733,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 				bkt->signatures[i].sig != NULL_SIGNATURE) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp (key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				if (data != NULL)
 					*data = k->pdata;
 				/*
@@ -748,7 +760,11 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
 				bkt->signatures[i].alt == sig) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp(key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				if (data != NULL)
 					*data = k->pdata;
 				/*
@@ -840,7 +856,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 				bkt->signatures[i].sig != NULL_SIGNATURE) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp(key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				remove_entry(h, bkt, i);
 
 				/*
@@ -863,7 +883,11 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
 				bkt->signatures[i].sig != NULL_SIGNATURE) {
 			k = (struct rte_hash_key *) ((char *)keys +
 					bkt->key_idx[i] * h->key_entry_size);
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+			if (memcmp(key, k->key, h->key_len) == 0) {
+#else
 			if (h->rte_hash_cmp_eq(key, k->key, h->key_len) == 0) {
+#endif
 				remove_entry(h, bkt, i);
 
 				/*
@@ -980,7 +1004,11 @@ lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * co
 	unsigned hit;
 	unsigned key_idx;
 
+#ifdef RTE_LIB_MP_NO_FUNC_PTR
+	hit = !memcmp(key_slot->key, keys[idx], h->key_len);
+#else
 	hit = !h->rte_hash_cmp_eq(key_slot->key, keys[idx], h->key_len);
+#endif
 	if (data != NULL)
 		data[idx] = key_slot->pdata;
 
-- 
2.5.0

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2016-04-19 21:13 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-03-14  2:16 [PATCH] hash: fix memcmp function pointer in multi-process environment Dhana Eadala
2016-03-14  2:30 ` 张伟
2016-03-14  4:38 ` 张伟
2016-03-15  0:57   ` Dhananjaya Eadala
2016-03-15  1:02     ` 张伟
2016-03-24 14:00       ` De Lara Guarch, Pablo
2016-04-01 13:14         ` Thomas Monjalon
2016-03-22 11:42 ` Thomas Monjalon
2016-03-22 19:53   ` De Lara Guarch, Pablo
2016-04-19  4:58     ` rte_hash_del_key crash " 张伟
2016-04-19  7:39       ` De Lara Guarch, Pablo
2016-04-19 21:12         ` 张伟
  -- strict thread matches above, loose matches on Subject: below --
2016-03-03  3:35 [PATCH] hash: fix memcmp function pointer " Dhana Eadala
2016-03-03  5:44 ` Qiu, Michael
2016-03-04  1:00   ` Dhananjaya Reddy Eadala
2016-03-09 21:36     ` Dhananjaya Reddy Eadala

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.