All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] rculfhash: fall back to single-threaded resize on calloc failure
       [not found] <1403572832-9066-1-git-send-email-normalperson@yhbt.net>
@ 2014-06-24  1:20 ` Eric Wong
  2014-06-24  1:20 ` [PATCH 2/3] rculfhash: handle pthread_create failures Eric Wong
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-06-24  1:20 UTC (permalink / raw)
  To: lttng-dev; +Cc: e

Having a calloc fail on my server should not be fatal.

Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
 rculfhash.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/rculfhash.c b/rculfhash.c
index 7d39388..2a45045 100644
--- a/rculfhash.c
+++ b/rculfhash.c
@@ -1189,7 +1189,10 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
 	}
 	partition_len = len >> cds_lfht_get_count_order_ulong(nr_threads);
 	work = calloc(nr_threads, sizeof(*work));
-	assert(work);
+	if (!work) {
+		dbg_printf("error allocating for resize, single-threading\n");
+		goto fallback;
+	}
 	for (thread = 0; thread < nr_threads; thread++) {
 		work[thread].ht = ht;
 		work[thread].i = i;
@@ -1205,6 +1208,11 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
 		assert(!ret);
 	}
 	free(work);
+	return;
+fallback:
+	ht->flavor->thread_online();
+	fct(ht, i, 0, len);
+	ht->flavor->thread_offline();
 }
 
 /*
-- 
2.0.0.259.gbf1bc9c

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/3] rculfhash: handle pthread_create failures
       [not found] <1403572832-9066-1-git-send-email-normalperson@yhbt.net>
  2014-06-24  1:20 ` [PATCH 1/3] rculfhash: fall back to single-threaded resize on calloc failure Eric Wong
@ 2014-06-24  1:20 ` Eric Wong
  2014-06-24  1:20 ` [PATCH 3/3] rculfhash: remove duplicated code Eric Wong
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-06-24  1:20 UTC (permalink / raw)
  To: lttng-dev; +Cc: e

Like calloc, pthread_create may fail with EAGAIN due to a lack
of resources.  Account for that and gracefully continue.

Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
 rculfhash.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/rculfhash.c b/rculfhash.c
index 2a45045..d534be2 100644
--- a/rculfhash.c
+++ b/rculfhash.c
@@ -1171,6 +1171,7 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
 		void (*fct)(struct cds_lfht *ht, unsigned long i,
 			unsigned long start, unsigned long len))
 {
+	unsigned long start = 0;
 	unsigned long partition_len;
 	struct partition_resize_work *work;
 	int thread, ret;
@@ -1201,6 +1202,18 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
 		work[thread].fct = fct;
 		ret = pthread_create(&(work[thread].thread_id), ht->resize_attr,
 			partition_resize_thread, &work[thread]);
+		if (ret == EAGAIN) {
+			/*
+			 * out of resources: wait and join the threads
+			 * we've created, then handle leftovers
+			 */
+			dbg_printf(
+			      "error spawning for resize, single-threading\n");
+			start = work[thread].start;
+			len -= start;
+			nr_threads = thread;
+			break;
+		}
 		assert(!ret);
 	}
 	for (thread = 0; thread < nr_threads; thread++) {
@@ -1208,10 +1221,17 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
 		assert(!ret);
 	}
 	free(work);
-	return;
+
+	/*
+	 * a pthread_create failure above will either lead in us having
+	 * no threads to join or starting at a non-zero offset,
+	 * fallback to single thread processing of leftovers
+	 */
+	if (start == 0 && nr_threads > 0)
+		return;
 fallback:
 	ht->flavor->thread_online();
-	fct(ht, i, 0, len);
+	fct(ht, i, start, len);
 	ht->flavor->thread_offline();
 }
 
-- 
2.0.0.259.gbf1bc9c

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/3] rculfhash: remove duplicated code
       [not found] <1403572832-9066-1-git-send-email-normalperson@yhbt.net>
  2014-06-24  1:20 ` [PATCH 1/3] rculfhash: fall back to single-threaded resize on calloc failure Eric Wong
  2014-06-24  1:20 ` [PATCH 2/3] rculfhash: handle pthread_create failures Eric Wong
@ 2014-06-24  1:20 ` Eric Wong
       [not found] ` <1403572832-9066-2-git-send-email-normalperson@yhbt.net>
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-06-24  1:20 UTC (permalink / raw)
  To: lttng-dev; +Cc: e

Signed-off-by: Eric Wong <normalperson@yhbt.net>
---
 rculfhash.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/rculfhash.c b/rculfhash.c
index d534be2..61da37a 100644
--- a/rculfhash.c
+++ b/rculfhash.c
@@ -1177,6 +1177,10 @@ void partition_resize_helper(struct cds_lfht *ht, unsigned long i,
 	int thread, ret;
 	unsigned long nr_threads;
 
+	assert(nr_cpus_mask != -1);
+	if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD)
+		goto fallback;
+
 	/*
 	 * Note: nr_cpus_mask + 1 is always power of 2.
 	 * We spawn just the number of threads we need to satisfy the minimum
@@ -1270,13 +1274,6 @@ static
 void init_table_populate(struct cds_lfht *ht, unsigned long i,
 			 unsigned long len)
 {
-	assert(nr_cpus_mask != -1);
-	if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) {
-		ht->flavor->thread_online();
-		init_table_populate_partition(ht, i, 0, len);
-		ht->flavor->thread_offline();
-		return;
-	}
 	partition_resize_helper(ht, i, len, init_table_populate_partition);
 }
 
@@ -1369,14 +1366,6 @@ void remove_table_partition(struct cds_lfht *ht, unsigned long i,
 static
 void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len)
 {
-
-	assert(nr_cpus_mask != -1);
-	if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) {
-		ht->flavor->thread_online();
-		remove_table_partition(ht, i, 0, len);
-		ht->flavor->thread_offline();
-		return;
-	}
 	partition_resize_helper(ht, i, len, remove_table_partition);
 }
 
-- 
2.0.0.259.gbf1bc9c

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] rculfhash: fall back to single-threaded resize on calloc failure
       [not found] ` <1403572832-9066-2-git-send-email-normalperson@yhbt.net>
@ 2014-07-31 20:45   ` Mathieu Desnoyers
  0 siblings, 0 replies; 13+ messages in thread
From: Mathieu Desnoyers @ 2014-07-31 20:45 UTC (permalink / raw)
  To: Eric Wong; +Cc: e, lttng-dev

Merged into master, stable-0.7 and stable-0.8, thanks!

Mathieu

----- Original Message -----
> From: "Eric Wong" <normalperson@yhbt.net>
> To: lttng-dev@lists.lttng.org
> Cc: e@80x24.org
> Sent: Monday, June 23, 2014 9:20:30 PM
> Subject: [lttng-dev] [PATCH 1/3] rculfhash: fall back to single-threaded	resize on calloc failure
> 
> Having a calloc fail on my server should not be fatal.
> 
> Signed-off-by: Eric Wong <normalperson@yhbt.net>
> ---
>  rculfhash.c | 10 +++++++++-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/rculfhash.c b/rculfhash.c
> index 7d39388..2a45045 100644
> --- a/rculfhash.c
> +++ b/rculfhash.c
> @@ -1189,7 +1189,10 @@ void partition_resize_helper(struct cds_lfht *ht,
> unsigned long i,
>  	}
>  	partition_len = len >> cds_lfht_get_count_order_ulong(nr_threads);
>  	work = calloc(nr_threads, sizeof(*work));
> -	assert(work);
> +	if (!work) {
> +		dbg_printf("error allocating for resize, single-threading\n");
> +		goto fallback;
> +	}
>  	for (thread = 0; thread < nr_threads; thread++) {
>  		work[thread].ht = ht;
>  		work[thread].i = i;
> @@ -1205,6 +1208,11 @@ void partition_resize_helper(struct cds_lfht *ht,
> unsigned long i,
>  		assert(!ret);
>  	}
>  	free(work);
> +	return;
> +fallback:
> +	ht->flavor->thread_online();
> +	fct(ht, i, 0, len);
> +	ht->flavor->thread_offline();
>  }
>  
>  /*
> --
> 2.0.0.259.gbf1bc9c
> 
> 
> _______________________________________________
> lttng-dev mailing list
> lttng-dev@lists.lttng.org
> http://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev
> 

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] rculfhash: handle pthread_create failures
       [not found] ` <1403572832-9066-3-git-send-email-normalperson@yhbt.net>
@ 2014-07-31 20:57   ` Mathieu Desnoyers
  0 siblings, 0 replies; 13+ messages in thread
From: Mathieu Desnoyers @ 2014-07-31 20:57 UTC (permalink / raw)
  To: Eric Wong; +Cc: e, lttng-dev

Merged with minor style edits into master, stable-0.8 and stable-0.7,
thanks!

Mathieu

----- Original Message -----
> From: "Eric Wong" <normalperson@yhbt.net>
> To: lttng-dev@lists.lttng.org
> Cc: e@80x24.org
> Sent: Monday, June 23, 2014 9:20:31 PM
> Subject: [lttng-dev] [PATCH 2/3] rculfhash: handle pthread_create failures
> 
> Like calloc, pthread_create may fail with EAGAIN due to a lack
> of resources.  Account for that and gracefully continue.
> 
> Signed-off-by: Eric Wong <normalperson@yhbt.net>
> ---
>  rculfhash.c | 24 ++++++++++++++++++++++--
>  1 file changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/rculfhash.c b/rculfhash.c
> index 2a45045..d534be2 100644
> --- a/rculfhash.c
> +++ b/rculfhash.c
> @@ -1171,6 +1171,7 @@ void partition_resize_helper(struct cds_lfht *ht,
> unsigned long i,
>  		void (*fct)(struct cds_lfht *ht, unsigned long i,
>  			unsigned long start, unsigned long len))
>  {
> +	unsigned long start = 0;
>  	unsigned long partition_len;
>  	struct partition_resize_work *work;
>  	int thread, ret;
> @@ -1201,6 +1202,18 @@ void partition_resize_helper(struct cds_lfht *ht,
> unsigned long i,
>  		work[thread].fct = fct;
>  		ret = pthread_create(&(work[thread].thread_id), ht->resize_attr,
>  			partition_resize_thread, &work[thread]);
> +		if (ret == EAGAIN) {
> +			/*
> +			 * out of resources: wait and join the threads
> +			 * we've created, then handle leftovers
> +			 */
> +			dbg_printf(
> +			      "error spawning for resize, single-threading\n");
> +			start = work[thread].start;
> +			len -= start;
> +			nr_threads = thread;
> +			break;
> +		}
>  		assert(!ret);
>  	}
>  	for (thread = 0; thread < nr_threads; thread++) {
> @@ -1208,10 +1221,17 @@ void partition_resize_helper(struct cds_lfht *ht,
> unsigned long i,
>  		assert(!ret);
>  	}
>  	free(work);
> -	return;
> +
> +	/*
> +	 * a pthread_create failure above will either lead in us having
> +	 * no threads to join or starting at a non-zero offset,
> +	 * fallback to single thread processing of leftovers
> +	 */
> +	if (start == 0 && nr_threads > 0)
> +		return;
>  fallback:
>  	ht->flavor->thread_online();
> -	fct(ht, i, 0, len);
> +	fct(ht, i, start, len);
>  	ht->flavor->thread_offline();
>  }
>  
> --
> 2.0.0.259.gbf1bc9c
> 
> 
> _______________________________________________
> lttng-dev mailing list
> lttng-dev@lists.lttng.org
> http://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev
> 

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] rculfhash: remove duplicated code
       [not found] ` <1403572832-9066-4-git-send-email-normalperson@yhbt.net>
@ 2014-07-31 21:00   ` Mathieu Desnoyers
  0 siblings, 0 replies; 13+ messages in thread
From: Mathieu Desnoyers @ 2014-07-31 21:00 UTC (permalink / raw)
  To: Eric Wong; +Cc: e, lttng-dev

Merged into master, stable-0.8, stable-0.7, thanks!

Mathieu

----- Original Message -----
> From: "Eric Wong" <normalperson@yhbt.net>
> To: lttng-dev@lists.lttng.org
> Cc: e@80x24.org
> Sent: Monday, June 23, 2014 9:20:32 PM
> Subject: [lttng-dev] [PATCH 3/3] rculfhash: remove duplicated code
> 
> Signed-off-by: Eric Wong <normalperson@yhbt.net>
> ---
>  rculfhash.c | 19 ++++---------------
>  1 file changed, 4 insertions(+), 15 deletions(-)
> 
> diff --git a/rculfhash.c b/rculfhash.c
> index d534be2..61da37a 100644
> --- a/rculfhash.c
> +++ b/rculfhash.c
> @@ -1177,6 +1177,10 @@ void partition_resize_helper(struct cds_lfht *ht,
> unsigned long i,
>  	int thread, ret;
>  	unsigned long nr_threads;
>  
> +	assert(nr_cpus_mask != -1);
> +	if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD)
> +		goto fallback;
> +
>  	/*
>  	 * Note: nr_cpus_mask + 1 is always power of 2.
>  	 * We spawn just the number of threads we need to satisfy the minimum
> @@ -1270,13 +1274,6 @@ static
>  void init_table_populate(struct cds_lfht *ht, unsigned long i,
>  			 unsigned long len)
>  {
> -	assert(nr_cpus_mask != -1);
> -	if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) {
> -		ht->flavor->thread_online();
> -		init_table_populate_partition(ht, i, 0, len);
> -		ht->flavor->thread_offline();
> -		return;
> -	}
>  	partition_resize_helper(ht, i, len, init_table_populate_partition);
>  }
>  
> @@ -1369,14 +1366,6 @@ void remove_table_partition(struct cds_lfht *ht,
> unsigned long i,
>  static
>  void remove_table(struct cds_lfht *ht, unsigned long i, unsigned long len)
>  {
> -
> -	assert(nr_cpus_mask != -1);
> -	if (nr_cpus_mask < 0 || len < 2 * MIN_PARTITION_PER_THREAD) {
> -		ht->flavor->thread_online();
> -		remove_table_partition(ht, i, 0, len);
> -		ht->flavor->thread_offline();
> -		return;
> -	}
>  	partition_resize_helper(ht, i, len, remove_table_partition);
>  }
>  
> --
> 2.0.0.259.gbf1bc9c
> 
> 
> _______________________________________________
> lttng-dev mailing list
> lttng-dev@lists.lttng.org
> http://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev
> 

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] rculfhash: error checking fixes
       [not found] <1403572832-9066-1-git-send-email-normalperson@yhbt.net>
                   ` (5 preceding siblings ...)
       [not found] ` <1403572832-9066-4-git-send-email-normalperson@yhbt.net>
@ 2014-07-31 21:08 ` Mathieu Desnoyers
       [not found] ` <1681102864.24131.1406840905204.JavaMail.zimbra@efficios.com>
  7 siblings, 0 replies; 13+ messages in thread
From: Mathieu Desnoyers @ 2014-07-31 21:08 UTC (permalink / raw)
  To: Eric Wong; +Cc: e, lttng-dev

----- Original Message -----
> From: "Eric Wong" <normalperson@yhbt.net>
> To: lttng-dev@lists.lttng.org
> Cc: e@80x24.org
> Sent: Monday, June 23, 2014 9:20:29 PM
> Subject: [lttng-dev] [PATCH 0/3] rculfhash: error checking fixes
> 
> Hi Mathieu, I noticed the pthread_create handling is a bit dangerous in
> rculfhash since it may kill the process long after the process is up and
> running.  I also fixed the calloc failure handling since I was in the
> area.

All merged, thanks! Sorry for the delay, the past month has been quite busy.

> 
> In the future, it would also be nice if the alloc_bucket_table behavior
> could fail gracefully on ENOMEM, but that would break ABI...

We could pull this into master for an eventual 0.9 release. We already
have a few ABI breaking changes that require us to bump the ABI in
master.

Thoughts ?

Thanks,

Mathieu

> 
> The following changes since commit d863e15371666dff14e7f3191de5ff91d9b24d7f:
> 
>   Cleanup: tests: cast console write return value as void (2014-05-13
>   09:27:36 -0400)
> 
> are available in the git repository at:
> 
>   git://80x24.org/urcu rculfhash-errcheck
> 
> for you to fetch changes up to d54dd3e6ada2edf4599fc0f26e647a6942a73a39:
> 
>   rculfhash: remove duplicated code (2014-06-24 00:39:26 +0000)
> 
> ----------------------------------------------------------------
> Eric Wong (3):
>       rculfhash: fall back to single-threaded resize on calloc failure
>       rculfhash: handle pthread_create failures
>       rculfhash: remove duplicated code
> 
>  rculfhash.c | 49 +++++++++++++++++++++++++++++++++----------------
>  1 file changed, 33 insertions(+), 16 deletions(-)
> 
> P.S. I still hope to resume the wfcqueue-related epoll improvements
>      we started on last year in the kernel.
> 
> _______________________________________________
> lttng-dev mailing list
> lttng-dev@lists.lttng.org
> http://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev
> 

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] rculfhash: error checking fixes
       [not found] ` <1681102864.24131.1406840905204.JavaMail.zimbra@efficios.com>
@ 2014-07-31 21:35   ` Eric Wong
       [not found]   ` <20140731213500.GA662@dcvr.yhbt.net>
  1 sibling, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-07-31 21:35 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: e, lttng-dev

Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
> All merged, thanks! Sorry for the delay, the past month has been quite busy.

No worries.  I've been sidetracked into other projects, too.

> Eric Wong <normalperson@yhbt.net> wrote:
> > In the future, it would also be nice if the alloc_bucket_table behavior
> > could fail gracefully on ENOMEM, but that would break ABI...
> 
> We could pull this into master for an eventual 0.9 release. We already
> have a few ABI breaking changes that require us to bump the ABI in
> master.

OK.  I'll try to work on that (unless you have more time now, I've
been sidetracked too :x)

While we're on ABI/API changes, there's also several other changes
I would like:

* Remove the pthread_mutex_t requirement in the wfcqueue and lfstack
  structures.

  I suppose we could implement limited new classes and layer the
  complete old ones on top to preserve ABI/API compatibility.

* cmpxchg_double (cmpxchg16b on x86-64) so lfstack can use
  a lock-free stack for single pop operations.  I'm currently using
  ck_stack from ConcurrencyKit, but generally prefer using the
  URCU APIs and it would be great if lfstack could support this
  on some arches.

Do you have any timeline for a 0.9 release?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] rculfhash: error checking fixes
       [not found]   ` <20140731213500.GA662@dcvr.yhbt.net>
@ 2014-07-31 22:05     ` Mathieu Desnoyers
       [not found]     ` <1996010706.24303.1406844354939.JavaMail.zimbra@efficios.com>
  1 sibling, 0 replies; 13+ messages in thread
From: Mathieu Desnoyers @ 2014-07-31 22:05 UTC (permalink / raw)
  To: Eric Wong; +Cc: e, lttng-dev

----- Original Message -----
> From: "Eric Wong" <normalperson@yhbt.net>
> To: "Mathieu Desnoyers" <mathieu.desnoyers@efficios.com>
> Cc: lttng-dev@lists.lttng.org, e@80x24.org
> Sent: Thursday, July 31, 2014 5:35:00 PM
> Subject: Re: [lttng-dev] [PATCH 0/3] rculfhash: error checking fixes
> 
> Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
> > All merged, thanks! Sorry for the delay, the past month has been quite
> > busy.
> 
> No worries.  I've been sidetracked into other projects, too.
> 
> > Eric Wong <normalperson@yhbt.net> wrote:
> > > In the future, it would also be nice if the alloc_bucket_table behavior
> > > could fail gracefully on ENOMEM, but that would break ABI...
> > 
> > We could pull this into master for an eventual 0.9 release. We already
> > have a few ABI breaking changes that require us to bump the ABI in
> > master.
> 
> OK.  I'll try to work on that (unless you have more time now, I've
> been sidetracked too :x)

I do have to focus on other things at the moment, so you are welcome
to contribute those changes!

> 
> While we're on ABI/API changes, there's also several other changes
> I would like:
> 
> * Remove the pthread_mutex_t requirement in the wfcqueue and lfstack
>   structures.

Indeed, the mutex is not needed for all users. However, it allows us
to implement APIs that can be used by end users who want all
synchronization to be taken care of for them. In opposition, we
have the __ prefixed API which require to read and understand the
comments about synchronization requirements.

I wonder: what is the issue with having this mutex field in there ?
Is it space ? Having a dependency on pthread ? other ?

> 
>   I suppose we could implement limited new classes and layer the
>   complete old ones on top to preserve ABI/API compatibility.

If we decide to remove the pthread_mutex_t field, this would indeed
be the way to do it.

> 
> * cmpxchg_double (cmpxchg16b on x86-64) so lfstack can use
>   a lock-free stack for single pop operations.  I'm currently using
>   ck_stack from ConcurrencyKit, but generally prefer using the
>   URCU APIs and it would be great if lfstack could support this
>   on some arches.

Is there a way to implement a fallback for architectures that don't
have the double cmpxchg ? My intent is that if we start doing
optimizations for some architectures, the APIs can still be used
as is by applications ported to other architectures (modulo a
performance penality cost if unavoidable).

> 
> Do you have any timeline for a 0.9 release?
> 

I would have liked to bring a major new feature like my RCU judy
array in 0.9, but unfortunately I keep getting side tracked on
other things :-/ I still need to rework the API and add more
tests. So no ETA for now.

Thanks!

Mathieu

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] rculfhash: error checking fixes
       [not found]     ` <1996010706.24303.1406844354939.JavaMail.zimbra@efficios.com>
@ 2014-07-31 22:25       ` Eric Wong
       [not found]       ` <20140731222545.GA3947@dcvr.yhbt.net>
  1 sibling, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-07-31 22:25 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: e, lttng-dev

Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
> I do have to focus on other things at the moment, so you are welcome
> to contribute those changes!

OK, I'll try to get back to the rculfhash-using project in the next few
weeks (but no promises)

> > While we're on ABI/API changes, there's also several other changes
> > I would like:
> > 
> > * Remove the pthread_mutex_t requirement in the wfcqueue and lfstack
> >   structures.
> 
> Indeed, the mutex is not needed for all users. However, it allows us
> to implement APIs that can be used by end users who want all
> synchronization to be taken care of for them. In opposition, we
> have the __ prefixed API which require to read and understand the
> comments about synchronization requirements.
> 
> I wonder: what is the issue with having this mutex field in there ?
> Is it space ? Having a dependency on pthread ? other ?

Space.  On my current project, I can use a union and put other stuff
where the mutex is.

> >   I suppose we could implement limited new classes and layer the
> >   complete old ones on top to preserve ABI/API compatibility.
> 
> If we decide to remove the pthread_mutex_t field, this would indeed
> be the way to do it.

OK.  Then the hardest part is probably naming the new API :)

> > * cmpxchg_double (cmpxchg16b on x86-64) so lfstack can use
> >   a lock-free stack for single pop operations.  I'm currently using
> >   ck_stack from ConcurrencyKit, but generally prefer using the
> >   URCU APIs and it would be great if lfstack could support this
> >   on some arches.
> 
> Is there a way to implement a fallback for architectures that don't
> have the double cmpxchg ?

Unfortunately not.  I have completely separate code paths, also cannot
support early AMD64 machines which lack cmpxchg16b.

> My intent is that if we start doing
> optimizations for some architectures, the APIs can still be used
> as is by applications ported to other architectures (modulo a
> performance penality cost if unavoidable).

Understandable.  I wonder if regular cmpxchg with pointer-packing
for the generation counter works.  I'll have to try that.

> > Do you have any timeline for a 0.9 release?
> 
> I would have liked to bring a major new feature like my RCU judy
> array in 0.9, but unfortunately I keep getting side tracked on
> other things :-/ I still need to rework the API and add more
> tests. So no ETA for now.

OK, no worries.  I also noticed some rbtree work from the old days.
Is that still on the horizon?

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] rculfhash: error checking fixes
       [not found]       ` <20140731222545.GA3947@dcvr.yhbt.net>
@ 2014-08-01  0:12         ` Mathieu Desnoyers
       [not found]         ` <252105882.24338.1406851935525.JavaMail.zimbra@efficios.com>
  1 sibling, 0 replies; 13+ messages in thread
From: Mathieu Desnoyers @ 2014-08-01  0:12 UTC (permalink / raw)
  To: Eric Wong, Paul E. McKenney, Lai Jiangshan; +Cc: e, lttng-dev

----- Original Message -----
> From: "Eric Wong" <normalperson@yhbt.net>
> To: "Mathieu Desnoyers" <mathieu.desnoyers@efficios.com>
> Cc: lttng-dev@lists.lttng.org, e@80x24.org
> Sent: Thursday, July 31, 2014 6:25:45 PM
> Subject: Re: [lttng-dev] [PATCH 0/3] rculfhash: error checking fixes
> 
> Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
> > I do have to focus on other things at the moment, so you are welcome
> > to contribute those changes!
> 
> OK, I'll try to get back to the rculfhash-using project in the next few
> weeks (but no promises)

Great!

> 
> > > While we're on ABI/API changes, there's also several other changes
> > > I would like:
> > > 
> > > * Remove the pthread_mutex_t requirement in the wfcqueue and lfstack
> > >   structures.
> > 
> > Indeed, the mutex is not needed for all users. However, it allows us
> > to implement APIs that can be used by end users who want all
> > synchronization to be taken care of for them. In opposition, we
> > have the __ prefixed API which require to read and understand the
> > comments about synchronization requirements.
> > 
> > I wonder: what is the issue with having this mutex field in there ?
> > Is it space ? Having a dependency on pthread ? other ?
> 
> Space.  On my current project, I can use a union and put other stuff
> where the mutex is.
> 
> > >   I suppose we could implement limited new classes and layer the
> > >   complete old ones on top to preserve ABI/API compatibility.
> > 
> > If we decide to remove the pthread_mutex_t field, this would indeed
> > be the way to do it.
> 
> OK.  Then the hardest part is probably naming the new API :)

I'm trying something with transparent union. See patch in separate
email.

> 
> > > * cmpxchg_double (cmpxchg16b on x86-64) so lfstack can use
> > >   a lock-free stack for single pop operations.  I'm currently using
> > >   ck_stack from ConcurrencyKit, but generally prefer using the
> > >   URCU APIs and it would be great if lfstack could support this
> > >   on some arches.
> > 
> > Is there a way to implement a fallback for architectures that don't
> > have the double cmpxchg ?
> 
> Unfortunately not.  I have completely separate code paths, also cannot
> support early AMD64 machines which lack cmpxchg16b.

This means we would have to dynamically detect if the CPU supports
the instruction, and fallback to a different way of doing things.
So we would have to plan space (e.g. a union) for both the cmpxchg16b
and the fallback, with possibly a compiler flag that would allow
compiling out the fallback if the user really care about compactness,
and not about portability.

> 
> > My intent is that if we start doing
> > optimizations for some architectures, the APIs can still be used
> > as is by applications ported to other architectures (modulo a
> > performance penality cost if unavoidable).
> 
> Understandable.  I wonder if regular cmpxchg with pointer-packing
> for the generation counter works.  I'll have to try that.

Not sure I understand your idea here.

I wonder if we could simply fallback on a mutex for the fallback ?

> 
> > > Do you have any timeline for a 0.9 release?
> > 
> > I would have liked to bring a major new feature like my RCU judy
> > array in 0.9, but unfortunately I keep getting side tracked on
> > other things :-/ I still need to rework the API and add more
> > tests. So no ETA for now.
> 
> OK, no worries.  I also noticed some rbtree work from the old days.
> Is that still on the horizon?

I don't think I want to push the rbtree code in production. It seems
to work, but I have a hard time convincing myself that I would be
able to debug it if an issue happens within this code. Moreover,
the RCU judy array seems more flexible in many ways than the rbtree,
so I prefer investing my time on Judy at the moment.

Thanks!

Mathieu


-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 0/3] rculfhash: error checking fixes
       [not found]         ` <252105882.24338.1406851935525.JavaMail.zimbra@efficios.com>
@ 2014-08-01  1:44           ` Eric Wong
  0 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-08-01  1:44 UTC (permalink / raw)
  To: Mathieu Desnoyers; +Cc: e, lttng-dev, Paul E. McKenney

Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
> I'm trying something with transparent union. See patch in separate
> email.

Thanks!  I didn't know about the transparent union feature in GCC,
looks much nicer than what I would've done :)

> > > > * cmpxchg_double (cmpxchg16b on x86-64) so lfstack can use
> > > >   a lock-free stack for single pop operations.  I'm currently using
> > > >   ck_stack from ConcurrencyKit, but generally prefer using the
> > > >   URCU APIs and it would be great if lfstack could support this
> > > >   on some arches.
> > > 
> > > Is there a way to implement a fallback for architectures that don't
> > > have the double cmpxchg ?
> > 
> > Unfortunately not.  I have completely separate code paths, also cannot
> > support early AMD64 machines which lack cmpxchg16b.
> 
> This means we would have to dynamically detect if the CPU supports
> the instruction, and fallback to a different way of doing things.
> So we would have to plan space (e.g. a union) for both the cmpxchg16b
> and the fallback, with possibly a compiler flag that would allow
> compiling out the fallback if the user really care about compactness,
> and not about portability.

Yeah, it's a bit of a pain :/

> > > My intent is that if we start doing
> > > optimizations for some architectures, the APIs can still be used
> > > as is by applications ported to other architectures (modulo a
> > > performance penality cost if unavoidable).
> > 
> > Understandable.  I wonder if regular cmpxchg with pointer-packing
> > for the generation counter works.  I'll have to try that.
> 
> Not sure I understand your idea here.

The lock-free pop in ck_stack relies on an extra generation counter
field to avoid ABA problem.  I could pack that counter into the normal
head pointer of the stack since current x86-64 only uses 48-bit address
space; so I could have 20 bits for the counter (including bits for
alignment).  I doubt it could be trusted on systems with many
cores/threads, though.

On the other hand, my choice of a lock-free stack already wastes cycles
spinning when the stack is empty; so I might choose something else
entirely.

Thanks for your responses!

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 0/3] rculfhash: error checking fixes
@ 2014-06-24  1:20 Eric Wong
  0 siblings, 0 replies; 13+ messages in thread
From: Eric Wong @ 2014-06-24  1:20 UTC (permalink / raw)
  To: lttng-dev; +Cc: e

Hi Mathieu, I noticed the pthread_create handling is a bit dangerous in
rculfhash since it may kill the process long after the process is up and
running.  I also fixed the calloc failure handling since I was in the
area.

In the future, it would also be nice if the alloc_bucket_table behavior
could fail gracefully on ENOMEM, but that would break ABI...

The following changes since commit d863e15371666dff14e7f3191de5ff91d9b24d7f:

  Cleanup: tests: cast console write return value as void (2014-05-13 09:27:36 -0400)

are available in the git repository at:

  git://80x24.org/urcu rculfhash-errcheck

for you to fetch changes up to d54dd3e6ada2edf4599fc0f26e647a6942a73a39:

  rculfhash: remove duplicated code (2014-06-24 00:39:26 +0000)

----------------------------------------------------------------
Eric Wong (3):
      rculfhash: fall back to single-threaded resize on calloc failure
      rculfhash: handle pthread_create failures
      rculfhash: remove duplicated code

 rculfhash.c | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

P.S. I still hope to resume the wfcqueue-related epoll improvements
     we started on last year in the kernel.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2014-08-01  1:44 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1403572832-9066-1-git-send-email-normalperson@yhbt.net>
2014-06-24  1:20 ` [PATCH 1/3] rculfhash: fall back to single-threaded resize on calloc failure Eric Wong
2014-06-24  1:20 ` [PATCH 2/3] rculfhash: handle pthread_create failures Eric Wong
2014-06-24  1:20 ` [PATCH 3/3] rculfhash: remove duplicated code Eric Wong
     [not found] ` <1403572832-9066-2-git-send-email-normalperson@yhbt.net>
2014-07-31 20:45   ` [PATCH 1/3] rculfhash: fall back to single-threaded resize on calloc failure Mathieu Desnoyers
     [not found] ` <1403572832-9066-3-git-send-email-normalperson@yhbt.net>
2014-07-31 20:57   ` [PATCH 2/3] rculfhash: handle pthread_create failures Mathieu Desnoyers
     [not found] ` <1403572832-9066-4-git-send-email-normalperson@yhbt.net>
2014-07-31 21:00   ` [PATCH 3/3] rculfhash: remove duplicated code Mathieu Desnoyers
2014-07-31 21:08 ` [PATCH 0/3] rculfhash: error checking fixes Mathieu Desnoyers
     [not found] ` <1681102864.24131.1406840905204.JavaMail.zimbra@efficios.com>
2014-07-31 21:35   ` Eric Wong
     [not found]   ` <20140731213500.GA662@dcvr.yhbt.net>
2014-07-31 22:05     ` Mathieu Desnoyers
     [not found]     ` <1996010706.24303.1406844354939.JavaMail.zimbra@efficios.com>
2014-07-31 22:25       ` Eric Wong
     [not found]       ` <20140731222545.GA3947@dcvr.yhbt.net>
2014-08-01  0:12         ` Mathieu Desnoyers
     [not found]         ` <252105882.24338.1406851935525.JavaMail.zimbra@efficios.com>
2014-08-01  1:44           ` Eric Wong
2014-06-24  1:20 Eric Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.