All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] md: dm-crypt: Add option to re-use a new global work-queue.
@ 2010-04-22 17:48 San Mehat
  2010-04-22 18:03 ` Milan Broz
  0 siblings, 1 reply; 18+ messages in thread
From: San Mehat @ 2010-04-22 17:48 UTC (permalink / raw)
  To: dm-devel
  Cc: San Mehat, Brian Swetland, Andrew Morton, Christophe Saout, Milan Broz

Typically, dm-crypt instances each have their own set of kcrypt/kcrypt_io
work-queues. This patch adds an option which will create one set of
work-queues on init, and re-uses them for all dm-crypt target instances.

Cc: Milan Broz <mbroz@redhat.com>
Cc: Brian Swetland <swetland@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christophe Saout <christophe@saout.de>
Signed-off-by: San Mehat <san@google.com>
---
 drivers/md/Kconfig    |   10 ++++++++++
 drivers/md/dm-crypt.c |   42 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2158377..8d82dfc 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -244,6 +244,16 @@ config DM_CRYPT
 
 	  If unsure, say N.
 
+config DM_CRYPT_GLOBAL_WORKQUEUES
+	boolean "Use global instead of per-device work-queues"
+	depends on DM_CRYPT
+	---help---
+	  Normally 2 kernel work-queue threads are created for every
+	  dm-crypt target. This option creates only 1 set of work-queues
+	  on init, and re-uses them.
+
+	  If unsure, say N.
+
 config DM_SNAPSHOT
        tristate "Snapshot target"
        depends on BLK_DEV_DM
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 959d6d1..875ad9a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -104,8 +104,10 @@ struct crypt_config {
 	mempool_t *page_pool;
 	struct bio_set *bs;
 
+#ifndef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
 	struct workqueue_struct *io_queue;
 	struct workqueue_struct *crypt_queue;
+#endif
 
 	/*
 	 * crypto related data
@@ -148,6 +150,10 @@ struct crypt_config {
 #define MIN_BIO_PAGES  8
 
 static struct kmem_cache *_crypt_io_pool;
+#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
+static struct workqueue_struct *_io_queue;
+static struct workqueue_struct *_crypt_queue;
+#endif
 
 static void clone_init(struct dm_crypt_io *, struct bio *);
 static void kcryptd_queue_crypt(struct dm_crypt_io *io);
@@ -730,7 +736,11 @@ static void kcryptd_queue_io(struct dm_crypt_io *io)
 	struct crypt_config *cc = io->target->private;
 
 	INIT_WORK(&io->work, kcryptd_io);
+#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
+	queue_work(_io_queue, &io->work);
+#else
 	queue_work(cc->io_queue, &io->work);
+#endif
 }
 
 static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io,
@@ -914,7 +924,11 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
 	struct crypt_config *cc = io->target->private;
 
 	INIT_WORK(&io->work, kcryptd_crypt);
+#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
+	queue_work(_crypt_queue, &io->work);
+#else
 	queue_work(cc->crypt_queue, &io->work);
+#endif
 }
 
 /*
@@ -1165,6 +1179,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	} else
 		cc->iv_mode = NULL;
 
+#ifndef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
 	cc->io_queue = create_singlethread_workqueue("kcryptd_io");
 	if (!cc->io_queue) {
 		ti->error = "Couldn't create kcryptd io queue";
@@ -1174,15 +1189,15 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	cc->crypt_queue = create_singlethread_workqueue("kcryptd");
 	if (!cc->crypt_queue) {
 		ti->error = "Couldn't create kcryptd queue";
-		goto bad_crypt_queue;
+		destroy_workqueue(cc->io_queue);
+		goto bad_io_queue;
 	}
+#endif
 
 	ti->num_flush_requests = 1;
 	ti->private = cc;
 	return 0;
 
-bad_crypt_queue:
-	destroy_workqueue(cc->io_queue);
 bad_io_queue:
 	kfree(cc->iv_mode);
 bad_ivmode_string:
@@ -1210,8 +1225,10 @@ static void crypt_dtr(struct dm_target *ti)
 {
 	struct crypt_config *cc = (struct crypt_config *) ti->private;
 
+#ifndef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
 	destroy_workqueue(cc->io_queue);
 	destroy_workqueue(cc->crypt_queue);
+#endif
 
 	if (cc->req)
 		mempool_free(cc->req, cc->req_pool);
@@ -1399,6 +1416,21 @@ static int __init dm_crypt_init(void)
 {
 	int r;
 
+#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
+	_io_queue = create_singlethread_workqueue("kcryptd_io");
+	if (!_io_queue) {
+		DMERR("couldn't create kcryptd io queue");
+		return -ENOMEM;
+	}
+
+	_crypt_queue = create_singlethread_workqueue("kcryptd");
+	if (!_crypt_queue) {
+		DMERR("couldn't create kcryptd queue");
+		destroy_workqueue(_io_queue);
+		return -ENOMEM;
+	}
+#endif
+
 	_crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0);
 	if (!_crypt_io_pool)
 		return -ENOMEM;
@@ -1416,6 +1448,10 @@ static void __exit dm_crypt_exit(void)
 {
 	dm_unregister_target(&crypt_target);
 	kmem_cache_destroy(_crypt_io_pool);
+#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
+	destroy_workqueue(_io_queue);
+	destroy_workqueue(_crypt_queue);
+#endif
 }
 
 module_init(dm_crypt_init);
-- 
1.7.0.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] md: dm-crypt: Add option to re-use a new global work-queue.
  2010-04-22 17:48 [PATCH] md: dm-crypt: Add option to re-use a new global work-queue San Mehat
@ 2010-04-22 18:03 ` Milan Broz
  2010-04-22 18:08   ` San Mehat
  0 siblings, 1 reply; 18+ messages in thread
From: Milan Broz @ 2010-04-22 18:03 UTC (permalink / raw)
  To: San Mehat
  Cc: Brian Swetland, dm-devel, Andrew Morton, Alasdair G Kergon,
	Christophe Saout

On 04/22/2010 07:48 PM, San Mehat wrote:
> Typically, dm-crypt instances each have their own set of kcrypt/kcrypt_io
> work-queues. This patch adds an option which will create one set of
> work-queues on init, and re-uses them for all dm-crypt target instances.

Hi,

I'll take a look, but you are basically re-introducing previous
logic and it was removed because of deadlock possibility.

(Imagine stacked dm-crypt - Truecrypt uses that - and low memory situation.
The mempool is exhausted and only possibility to free memory is finishing some
request in another queue - which is the same (blocked) queue with your patch!

We must allocate bio clones there and using per-device mempools and queues
to avoid these problems.

And how this will work with asynchronous crypto processing when weh
must wait if crypto queue is full? (In the same device stacked situation.)

Can you explain the real reason for this patch?

(cc: Alasdair - I think he will not accept the patch anyway.)

Milan


> 
> Cc: Milan Broz <mbroz@redhat.com>
> Cc: Brian Swetland <swetland@google.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Christophe Saout <christophe@saout.de>
> Signed-off-by: San Mehat <san@google.com>
> ---
>  drivers/md/Kconfig    |   10 ++++++++++
>  drivers/md/dm-crypt.c |   42 +++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 49 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
> index 2158377..8d82dfc 100644
> --- a/drivers/md/Kconfig
> +++ b/drivers/md/Kconfig
> @@ -244,6 +244,16 @@ config DM_CRYPT
>  
>  	  If unsure, say N.
>  
> +config DM_CRYPT_GLOBAL_WORKQUEUES
> +	boolean "Use global instead of per-device work-queues"
> +	depends on DM_CRYPT
> +	---help---
> +	  Normally 2 kernel work-queue threads are created for every
> +	  dm-crypt target. This option creates only 1 set of work-queues
> +	  on init, and re-uses them.
> +
> +	  If unsure, say N.
> +
>  config DM_SNAPSHOT
>         tristate "Snapshot target"
>         depends on BLK_DEV_DM
> diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
> index 959d6d1..875ad9a 100644
> --- a/drivers/md/dm-crypt.c
> +++ b/drivers/md/dm-crypt.c
> @@ -104,8 +104,10 @@ struct crypt_config {
>  	mempool_t *page_pool;
>  	struct bio_set *bs;
>  
> +#ifndef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>  	struct workqueue_struct *io_queue;
>  	struct workqueue_struct *crypt_queue;
> +#endif
>  
>  	/*
>  	 * crypto related data
> @@ -148,6 +150,10 @@ struct crypt_config {
>  #define MIN_BIO_PAGES  8
>  
>  static struct kmem_cache *_crypt_io_pool;
> +#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
> +static struct workqueue_struct *_io_queue;
> +static struct workqueue_struct *_crypt_queue;
> +#endif
>  
>  static void clone_init(struct dm_crypt_io *, struct bio *);
>  static void kcryptd_queue_crypt(struct dm_crypt_io *io);
> @@ -730,7 +736,11 @@ static void kcryptd_queue_io(struct dm_crypt_io *io)
>  	struct crypt_config *cc = io->target->private;
>  
>  	INIT_WORK(&io->work, kcryptd_io);
> +#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
> +	queue_work(_io_queue, &io->work);
> +#else
>  	queue_work(cc->io_queue, &io->work);
> +#endif
>  }
>  
>  static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io,
> @@ -914,7 +924,11 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
>  	struct crypt_config *cc = io->target->private;
>  
>  	INIT_WORK(&io->work, kcryptd_crypt);
> +#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
> +	queue_work(_crypt_queue, &io->work);
> +#else
>  	queue_work(cc->crypt_queue, &io->work);
> +#endif
>  }
>  
>  /*
> @@ -1165,6 +1179,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  	} else
>  		cc->iv_mode = NULL;
>  
> +#ifndef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>  	cc->io_queue = create_singlethread_workqueue("kcryptd_io");
>  	if (!cc->io_queue) {
>  		ti->error = "Couldn't create kcryptd io queue";
> @@ -1174,15 +1189,15 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  	cc->crypt_queue = create_singlethread_workqueue("kcryptd");
>  	if (!cc->crypt_queue) {
>  		ti->error = "Couldn't create kcryptd queue";
> -		goto bad_crypt_queue;
> +		destroy_workqueue(cc->io_queue);
> +		goto bad_io_queue;
>  	}
> +#endif
>  
>  	ti->num_flush_requests = 1;
>  	ti->private = cc;
>  	return 0;
>  
> -bad_crypt_queue:
> -	destroy_workqueue(cc->io_queue);
>  bad_io_queue:
>  	kfree(cc->iv_mode);
>  bad_ivmode_string:
> @@ -1210,8 +1225,10 @@ static void crypt_dtr(struct dm_target *ti)
>  {
>  	struct crypt_config *cc = (struct crypt_config *) ti->private;
>  
> +#ifndef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>  	destroy_workqueue(cc->io_queue);
>  	destroy_workqueue(cc->crypt_queue);
> +#endif
>  
>  	if (cc->req)
>  		mempool_free(cc->req, cc->req_pool);
> @@ -1399,6 +1416,21 @@ static int __init dm_crypt_init(void)
>  {
>  	int r;
>  
> +#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
> +	_io_queue = create_singlethread_workqueue("kcryptd_io");
> +	if (!_io_queue) {
> +		DMERR("couldn't create kcryptd io queue");
> +		return -ENOMEM;
> +	}
> +
> +	_crypt_queue = create_singlethread_workqueue("kcryptd");
> +	if (!_crypt_queue) {
> +		DMERR("couldn't create kcryptd queue");
> +		destroy_workqueue(_io_queue);
> +		return -ENOMEM;
> +	}
> +#endif
> +
>  	_crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0);
>  	if (!_crypt_io_pool)
>  		return -ENOMEM;
> @@ -1416,6 +1448,10 @@ static void __exit dm_crypt_exit(void)
>  {
>  	dm_unregister_target(&crypt_target);
>  	kmem_cache_destroy(_crypt_io_pool);
> +#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
> +	destroy_workqueue(_io_queue);
> +	destroy_workqueue(_crypt_queue);
> +#endif
>  }
>  
>  module_init(dm_crypt_init);

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] md: dm-crypt: Add option to re-use a new global work-queue.
  2010-04-22 18:03 ` Milan Broz
@ 2010-04-22 18:08   ` San Mehat
  2010-04-22 18:47     ` Milan Broz
  0 siblings, 1 reply; 18+ messages in thread
From: San Mehat @ 2010-04-22 18:08 UTC (permalink / raw)
  To: Milan Broz
  Cc: Brian Swetland, dm-devel, Andrew Morton, Alasdair G Kergon,
	Christophe Saout

On Thu, Apr 22, 2010 at 11:03 AM, Milan Broz <mbroz@redhat.com> wrote:
> On 04/22/2010 07:48 PM, San Mehat wrote:
>> Typically, dm-crypt instances each have their own set of kcrypt/kcrypt_io
>> work-queues. This patch adds an option which will create one set of
>> work-queues on init, and re-uses them for all dm-crypt target instances.
>
> Hi,
>
> I'll take a look, but you are basically re-introducing previous
> logic and it was removed because of deadlock possibility.
>
> (Imagine stacked dm-crypt - Truecrypt uses that - and low memory situation.
> The mempool is exhausted and only possibility to free memory is finishing some
> request in another queue - which is the same (blocked) queue with your patch!
>
> We must allocate bio clones there and using per-device mempools and queues
> to avoid these problems.
>
> And how this will work with asynchronous crypto processing when weh
> must wait if crypto queue is full? (In the same device stacked situation.)
>
> Can you explain the real reason for this patch?
>

Sure, I'd be happy to explain.

  Upcoming versions of android are about to start using dm/dm-crypt
heavily, having
a large number of small dm-crypt instances running on the device (hard
to tell just
how many, but i've seen cases where up to 50 or 60 instances may be
running). This ends up creating 100 - 120 kernel threads, and I was
simply trying to cut that down.

I'd be more than happy to discuss alternatives; but do we *really*
need 2 work-queue threads per instance?

> (cc: Alasdair - I think he will not accept the patch anyway.)

Probably not, but at least we can get the discussion going :)

>
> Milan
>
>
>>
>> Cc: Milan Broz <mbroz@redhat.com>
>> Cc: Brian Swetland <swetland@google.com>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Christophe Saout <christophe@saout.de>
>> Signed-off-by: San Mehat <san@google.com>
>> ---
>>  drivers/md/Kconfig    |   10 ++++++++++
>>  drivers/md/dm-crypt.c |   42 +++++++++++++++++++++++++++++++++++++++---
>>  2 files changed, 49 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
>> index 2158377..8d82dfc 100644
>> --- a/drivers/md/Kconfig
>> +++ b/drivers/md/Kconfig
>> @@ -244,6 +244,16 @@ config DM_CRYPT
>>
>>         If unsure, say N.
>>
>> +config DM_CRYPT_GLOBAL_WORKQUEUES
>> +     boolean "Use global instead of per-device work-queues"
>> +     depends on DM_CRYPT
>> +     ---help---
>> +       Normally 2 kernel work-queue threads are created for every
>> +       dm-crypt target. This option creates only 1 set of work-queues
>> +       on init, and re-uses them.
>> +
>> +       If unsure, say N.
>> +
>>  config DM_SNAPSHOT
>>         tristate "Snapshot target"
>>         depends on BLK_DEV_DM
>> diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
>> index 959d6d1..875ad9a 100644
>> --- a/drivers/md/dm-crypt.c
>> +++ b/drivers/md/dm-crypt.c
>> @@ -104,8 +104,10 @@ struct crypt_config {
>>       mempool_t *page_pool;
>>       struct bio_set *bs;
>>
>> +#ifndef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>>       struct workqueue_struct *io_queue;
>>       struct workqueue_struct *crypt_queue;
>> +#endif
>>
>>       /*
>>        * crypto related data
>> @@ -148,6 +150,10 @@ struct crypt_config {
>>  #define MIN_BIO_PAGES  8
>>
>>  static struct kmem_cache *_crypt_io_pool;
>> +#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>> +static struct workqueue_struct *_io_queue;
>> +static struct workqueue_struct *_crypt_queue;
>> +#endif
>>
>>  static void clone_init(struct dm_crypt_io *, struct bio *);
>>  static void kcryptd_queue_crypt(struct dm_crypt_io *io);
>> @@ -730,7 +736,11 @@ static void kcryptd_queue_io(struct dm_crypt_io *io)
>>       struct crypt_config *cc = io->target->private;
>>
>>       INIT_WORK(&io->work, kcryptd_io);
>> +#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>> +     queue_work(_io_queue, &io->work);
>> +#else
>>       queue_work(cc->io_queue, &io->work);
>> +#endif
>>  }
>>
>>  static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io,
>> @@ -914,7 +924,11 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
>>       struct crypt_config *cc = io->target->private;
>>
>>       INIT_WORK(&io->work, kcryptd_crypt);
>> +#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>> +     queue_work(_crypt_queue, &io->work);
>> +#else
>>       queue_work(cc->crypt_queue, &io->work);
>> +#endif
>>  }
>>
>>  /*
>> @@ -1165,6 +1179,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>>       } else
>>               cc->iv_mode = NULL;
>>
>> +#ifndef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>>       cc->io_queue = create_singlethread_workqueue("kcryptd_io");
>>       if (!cc->io_queue) {
>>               ti->error = "Couldn't create kcryptd io queue";
>> @@ -1174,15 +1189,15 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>>       cc->crypt_queue = create_singlethread_workqueue("kcryptd");
>>       if (!cc->crypt_queue) {
>>               ti->error = "Couldn't create kcryptd queue";
>> -             goto bad_crypt_queue;
>> +             destroy_workqueue(cc->io_queue);
>> +             goto bad_io_queue;
>>       }
>> +#endif
>>
>>       ti->num_flush_requests = 1;
>>       ti->private = cc;
>>       return 0;
>>
>> -bad_crypt_queue:
>> -     destroy_workqueue(cc->io_queue);
>>  bad_io_queue:
>>       kfree(cc->iv_mode);
>>  bad_ivmode_string:
>> @@ -1210,8 +1225,10 @@ static void crypt_dtr(struct dm_target *ti)
>>  {
>>       struct crypt_config *cc = (struct crypt_config *) ti->private;
>>
>> +#ifndef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>>       destroy_workqueue(cc->io_queue);
>>       destroy_workqueue(cc->crypt_queue);
>> +#endif
>>
>>       if (cc->req)
>>               mempool_free(cc->req, cc->req_pool);
>> @@ -1399,6 +1416,21 @@ static int __init dm_crypt_init(void)
>>  {
>>       int r;
>>
>> +#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>> +     _io_queue = create_singlethread_workqueue("kcryptd_io");
>> +     if (!_io_queue) {
>> +             DMERR("couldn't create kcryptd io queue");
>> +             return -ENOMEM;
>> +     }
>> +
>> +     _crypt_queue = create_singlethread_workqueue("kcryptd");
>> +     if (!_crypt_queue) {
>> +             DMERR("couldn't create kcryptd queue");
>> +             destroy_workqueue(_io_queue);
>> +             return -ENOMEM;
>> +     }
>> +#endif
>> +
>>       _crypt_io_pool = KMEM_CACHE(dm_crypt_io, 0);
>>       if (!_crypt_io_pool)
>>               return -ENOMEM;
>> @@ -1416,6 +1448,10 @@ static void __exit dm_crypt_exit(void)
>>  {
>>       dm_unregister_target(&crypt_target);
>>       kmem_cache_destroy(_crypt_io_pool);
>> +#ifdef CONFIG_DM_CRYPT_GLOBAL_WORKQUEUES
>> +     destroy_workqueue(_io_queue);
>> +     destroy_workqueue(_crypt_queue);
>> +#endif
>>  }
>>
>>  module_init(dm_crypt_init);
>



-- 
San Mehat  |  Staff Software Engineer  |  Android  |  Google Inc.
415.366.6172 (san@google.com)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] md: dm-crypt: Add option to re-use a new global work-queue.
  2010-04-22 18:08   ` San Mehat
@ 2010-04-22 18:47     ` Milan Broz
  2010-04-22 19:42       ` San Mehat
  0 siblings, 1 reply; 18+ messages in thread
From: Milan Broz @ 2010-04-22 18:47 UTC (permalink / raw)
  To: San Mehat
  Cc: Brian Swetland, dm-devel, Andrew Morton, Alasdair G Kergon,
	Christophe Saout

On 04/22/2010 08:08 PM, San Mehat wrote:
> On Thu, Apr 22, 2010 at 11:03 AM, Milan Broz <mbroz@redhat.com> wrote:
>> On 04/22/2010 07:48 PM, San Mehat wrote:
>>> Typically, dm-crypt instances each have their own set of kcrypt/kcrypt_io
>>> work-queues. This patch adds an option which will create one set of
>>> work-queues on init, and re-uses them for all dm-crypt target instances.

>> Can you explain the real reason for this patch?
>>
> 
> Sure, I'd be happy to explain.

(Please add this always to patch header.)

> 
>   Upcoming versions of android are about to start using dm/dm-crypt
> heavily, having
> a large number of small dm-crypt instances running on the device (hard
> to tell just
> how many, but i've seen cases where up to 50 or 60 instances may be
> running). This ends up creating 100 - 120 kernel threads, and I was
> simply trying to cut that down.

Sorry, but I don't take this argument. "Too many notes!" :-)

So the problem is with memory allocation? Scheduler? Or where?
Kernel threads should be cheap.

If you need 60 crypt devices, you almost surely hit at least starvation
problem with one global queue!
(Just curious - what are these crypt devices doing?)

> I'd be more than happy to discuss alternatives; but do we *really*
> need 2 work-queue threads per instance?

yes. 

For separate io queue - see commit cabf08e4d3d1181d7c408edae97fb4d1c31518af

| Add post-processing queue (per crypt device) for read operations.

| Current implementation uses only one queue for all operations
| and this can lead to starvation caused by many requests waiting
| for memory allocation. But the needed memory-releasing operation
| is queued after these requests (in the same queue).


(and there were another problem with async crypt - callback is called
in interrupt context, bio must be submitted from separate workqueue IIRC)

>> (cc: Alasdair - I think he will not accept the patch anyway.)
> 
> Probably not, but at least we can get the discussion going :)

I am not saying that I do not want to discuss this - but we must know
the real problems many queues are causing first.
And then think about possible solutions.

Milan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] md: dm-crypt: Add option to re-use a new global work-queue.
  2010-04-22 18:47     ` Milan Broz
@ 2010-04-22 19:42       ` San Mehat
  2010-04-23 14:01         ` Heinz Mauelshagen
  2010-04-27 20:58         ` San Mehat
  0 siblings, 2 replies; 18+ messages in thread
From: San Mehat @ 2010-04-22 19:42 UTC (permalink / raw)
  To: Milan Broz
  Cc: Brian Swetland, dm-devel, Andrew Morton, Alasdair G Kergon,
	Christophe Saout

On Thu, Apr 22, 2010 at 11:47 AM, Milan Broz <mbroz@redhat.com> wrote:
> On 04/22/2010 08:08 PM, San Mehat wrote:
>> On Thu, Apr 22, 2010 at 11:03 AM, Milan Broz <mbroz@redhat.com> wrote:
>>> On 04/22/2010 07:48 PM, San Mehat wrote:
>>>> Typically, dm-crypt instances each have their own set of kcrypt/kcrypt_io
>>>> work-queues. This patch adds an option which will create one set of
>>>> work-queues on init, and re-uses them for all dm-crypt target instances.
>
>>> Can you explain the real reason for this patch?
>>>
>>
>> Sure, I'd be happy to explain.
>
> (Please add this always to patch header.)
>

Will do - thanks.

>>
>>   Upcoming versions of android are about to start using dm/dm-crypt
>> heavily, having
>> a large number of small dm-crypt instances running on the device (hard
>> to tell just
>> how many, but i've seen cases where up to 50 or 60 instances may be
>> running). This ends up creating 100 - 120 kernel threads, and I was
>> simply trying to cut that down.
>
> Sorry, but I don't take this argument. "Too many notes!" :-)
>
> So the problem is with memory allocation? Scheduler? Or where?
> Kernel threads should be cheap.
>

Well the initial consideration was towards memory overhead with so
many threads that don't do much (in our use-case) on an embedded
device.

> If you need 60 crypt devices, you almost surely hit at least starvation
> problem with one global queue!
> (Just curious - what are these crypt devices doing?)

The crypt devices are providing small read-only encrypted file-systems
- whose backing files exist on an external FAT file-system, and are
created on-demand as needed. In this usage scenario, we'll only see
typically a few of these devices being simultaneously accessed, (and
the sd-card throughput is definitely the long-pole in the performance
profile, so even when I beat on 80 or 90 concurrent instances, we're
mainly waiting for mmcqd to complete transactions).

>
>> I'd be more than happy to discuss alternatives; but do we *really*
>> need 2 work-queue threads per instance?
>
> yes.

What if we made a note in the Kconfig advising against using the option in
stacked configurations? (Or even make it depend on CONFIG_EMBEDDED)

Thanks for your time,

-san

>
> For separate io queue - see commit cabf08e4d3d1181d7c408edae97fb4d1c31518af
>
> | Add post-processing queue (per crypt device) for read operations.
>
> | Current implementation uses only one queue for all operations
> | and this can lead to starvation caused by many requests waiting
> | for memory allocation. But the needed memory-releasing operation
> | is queued after these requests (in the same queue).
>
>
> (and there were another problem with async crypt - callback is called
> in interrupt context, bio must be submitted from separate workqueue IIRC)
>
>>> (cc: Alasdair - I think he will not accept the patch anyway.)
>>
>> Probably not, but at least we can get the discussion going :)
>
> I am not saying that I do not want to discuss this - but we must know
> the real problems many queues are causing first.
> And then think about possible solutions.
>
> Milan
>



-- 
San Mehat  |  Staff Software Engineer  |  Android  |  Google Inc.
415.366.6172 (san@google.com)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] md: dm-crypt: Add option to re-use a new global work-queue.
  2010-04-22 19:42       ` San Mehat
@ 2010-04-23 14:01         ` Heinz Mauelshagen
  2010-04-27 20:58         ` San Mehat
  1 sibling, 0 replies; 18+ messages in thread
From: Heinz Mauelshagen @ 2010-04-23 14:01 UTC (permalink / raw)
  To: device-mapper development
  Cc: Brian Swetland, Andrew Morton, Christophe Saout,
	Alasdair G Kergon, Milan Broz

On Thu, 2010-04-22 at 12:42 -0700, San Mehat wrote:
> On Thu, Apr 22, 2010 at 11:47 AM, Milan Broz <mbroz@redhat.com> wrote:
> > On 04/22/2010 08:08 PM, San Mehat wrote:
> >> On Thu, Apr 22, 2010 at 11:03 AM, Milan Broz <mbroz@redhat.com> wrote:
> >>> On 04/22/2010 07:48 PM, San Mehat wrote:
> >>>> Typically, dm-crypt instances each have their own set of kcrypt/kcrypt_io
> >>>> work-queues. This patch adds an option which will create one set of
> >>>> work-queues on init, and re-uses them for all dm-crypt target instances.
> >
> >>> Can you explain the real reason for this patch?
> >>>
> >>
> >> Sure, I'd be happy to explain.
> >
> > (Please add this always to patch header.)
> >
> 
> Will do - thanks.
> 
> >>
> >>   Upcoming versions of android are about to start using dm/dm-crypt
> >> heavily, having
> >> a large number of small dm-crypt instances running on the device (hard
> >> to tell just
> >> how many, but i've seen cases where up to 50 or 60 instances may be
> >> running). This ends up creating 100 - 120 kernel threads, and I was
> >> simply trying to cut that down.
> >
> > Sorry, but I don't take this argument. "Too many notes!" :-)
> >
> > So the problem is with memory allocation? Scheduler? Or where?
> > Kernel threads should be cheap.

Please don't forget to think about the other end of the computer systems
scale: large multiprocessors/multicore systems. There you'd have way too
many threads with just some dm-crypt devices going with the multi-thread
workqueue design.

There ain't now "one for all" solution.

Heinz

> >
> 
> Well the initial consideration was towards memory overhead with so
> many threads that don't do much (in our use-case) on an embedded
> device.
> 
> > If you need 60 crypt devices, you almost surely hit at least starvation
> > problem with one global queue!
> > (Just curious - what are these crypt devices doing?)
> 
> The crypt devices are providing small read-only encrypted file-systems
> - whose backing files exist on an external FAT file-system, and are
> created on-demand as needed. In this usage scenario, we'll only see
> typically a few of these devices being simultaneously accessed, (and
> the sd-card throughput is definitely the long-pole in the performance
> profile, so even when I beat on 80 or 90 concurrent instances, we're
> mainly waiting for mmcqd to complete transactions).
> 
> >
> >> I'd be more than happy to discuss alternatives; but do we *really*
> >> need 2 work-queue threads per instance?
> >
> > yes.
> 
> What if we made a note in the Kconfig advising against using the option in
> stacked configurations? (Or even make it depend on CONFIG_EMBEDDED)
> 
> Thanks for your time,
> 
> -san
> 
> >
> > For separate io queue - see commit cabf08e4d3d1181d7c408edae97fb4d1c31518af
> >
> > | Add post-processing queue (per crypt device) for read operations.
> >
> > | Current implementation uses only one queue for all operations
> > | and this can lead to starvation caused by many requests waiting
> > | for memory allocation. But the needed memory-releasing operation
> > | is queued after these requests (in the same queue).
> >
> >
> > (and there were another problem with async crypt - callback is called
> > in interrupt context, bio must be submitted from separate workqueue IIRC)
> >
> >>> (cc: Alasdair - I think he will not accept the patch anyway.)
> >>
> >> Probably not, but at least we can get the discussion going :)
> >
> > I am not saying that I do not want to discuss this - but we must know
> > the real problems many queues are causing first.
> > And then think about possible solutions.
> >
> > Milan
> >
> 
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] md: dm-crypt: Add option to re-use a new global work-queue.
  2010-04-22 19:42       ` San Mehat
  2010-04-23 14:01         ` Heinz Mauelshagen
@ 2010-04-27 20:58         ` San Mehat
  2010-11-02 22:02           ` cmwq and dm-crypt devices? (was: Re: md: dm-crypt: Add option to re-use a new global work-queue.) Mike Snitzer
  1 sibling, 1 reply; 18+ messages in thread
From: San Mehat @ 2010-04-27 20:58 UTC (permalink / raw)
  To: Milan Broz
  Cc: Brian Swetland, dm-devel, Andrew Morton, Alasdair G Kergon,
	Christophe Saout

*ping* Any word on my previous counter-proposal? Shall I prepare
another patch for consideration?

-san


On Thu, Apr 22, 2010 at 12:42 PM, San Mehat <san@google.com> wrote:
> On Thu, Apr 22, 2010 at 11:47 AM, Milan Broz <mbroz@redhat.com> wrote:
>> On 04/22/2010 08:08 PM, San Mehat wrote:
>>> On Thu, Apr 22, 2010 at 11:03 AM, Milan Broz <mbroz@redhat.com> wrote:
>>>> On 04/22/2010 07:48 PM, San Mehat wrote:
>>>>> Typically, dm-crypt instances each have their own set of kcrypt/kcrypt_io
>>>>> work-queues. This patch adds an option which will create one set of
>>>>> work-queues on init, and re-uses them for all dm-crypt target instances.
>>
>>>> Can you explain the real reason for this patch?
>>>>
>>>
>>> Sure, I'd be happy to explain.
>>
>> (Please add this always to patch header.)
>>
>
> Will do - thanks.
>
>>>
>>>   Upcoming versions of android are about to start using dm/dm-crypt
>>> heavily, having
>>> a large number of small dm-crypt instances running on the device (hard
>>> to tell just
>>> how many, but i've seen cases where up to 50 or 60 instances may be
>>> running). This ends up creating 100 - 120 kernel threads, and I was
>>> simply trying to cut that down.
>>
>> Sorry, but I don't take this argument. "Too many notes!" :-)
>>
>> So the problem is with memory allocation? Scheduler? Or where?
>> Kernel threads should be cheap.
>>
>
> Well the initial consideration was towards memory overhead with so
> many threads that don't do much (in our use-case) on an embedded
> device.
>
>> If you need 60 crypt devices, you almost surely hit at least starvation
>> problem with one global queue!
>> (Just curious - what are these crypt devices doing?)
>
> The crypt devices are providing small read-only encrypted file-systems
> - whose backing files exist on an external FAT file-system, and are
> created on-demand as needed. In this usage scenario, we'll only see
> typically a few of these devices being simultaneously accessed, (and
> the sd-card throughput is definitely the long-pole in the performance
> profile, so even when I beat on 80 or 90 concurrent instances, we're
> mainly waiting for mmcqd to complete transactions).
>
>>
>>> I'd be more than happy to discuss alternatives; but do we *really*
>>> need 2 work-queue threads per instance?
>>
>> yes.
>
> What if we made a note in the Kconfig advising against using the option in
> stacked configurations? (Or even make it depend on CONFIG_EMBEDDED)
>
> Thanks for your time,
>
> -san
>
>>
>> For separate io queue - see commit cabf08e4d3d1181d7c408edae97fb4d1c31518af
>>
>> | Add post-processing queue (per crypt device) for read operations.
>>
>> | Current implementation uses only one queue for all operations
>> | and this can lead to starvation caused by many requests waiting
>> | for memory allocation. But the needed memory-releasing operation
>> | is queued after these requests (in the same queue).
>>
>>
>> (and there were another problem with async crypt - callback is called
>> in interrupt context, bio must be submitted from separate workqueue IIRC)
>>
>>>> (cc: Alasdair - I think he will not accept the patch anyway.)
>>>
>>> Probably not, but at least we can get the discussion going :)
>>
>> I am not saying that I do not want to discuss this - but we must know
>> the real problems many queues are causing first.
>> And then think about possible solutions.
>>
>> Milan
>>
>
>
>
> --
> San Mehat  |  Staff Software Engineer  |  Android  |  Google Inc.
> 415.366.6172 (san@google.com)
>



-- 
San Mehat  |  Staff Software Engineer  |  Android  |  Google Inc.
415.366.6172 (san@google.com)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* cmwq and dm-crypt devices? (was: Re: md: dm-crypt: Add option to re-use a new global work-queue.)
  2010-04-27 20:58         ` San Mehat
@ 2010-11-02 22:02           ` Mike Snitzer
  2010-11-03  9:46             ` cmwq and dm-crypt devices? Tejun Heo
  0 siblings, 1 reply; 18+ messages in thread
From: Mike Snitzer @ 2010-11-02 22:02 UTC (permalink / raw)
  To: San Mehat
  Cc: Christophe Saout, Brian Swetland, device-mapper development,
	andi, tj, Andrew Morton, Alasdair G Kergon, Milan Broz

Hi,

On Tue, Apr 27 2010 at  4:58pm -0400,
San Mehat <san@google.com> wrote:

> *ping* Any word on my previous counter-proposal? Shall I prepare
> another patch for consideration?

The new concurrency managed workqueues (cmwq) that went in to 2.6.36
_should_ hopefully obviate the need for the patch you proposed:
https://patchwork.kernel.org/patch/94179/

Some background on cmwq:
Documentation/workqueue.txt
http://lwn.net/Articles/403891/

We on the DM team haven't explored the impact cmwq has on dm-crypt
devices yet so more research and testing is needed here.  But it'd be
nice to have a hypothesis on how much cmwq will help us solve the
our dm-crypt goals "for free".

[Cc'ing Tejun]

Tejun,

Your insight on how dm-crypt should be using cmwq to achieve the
following conflicting goals would be appreciated:

1) scale down the number of workqueue threads associated with N devices
   (w/ 2 workqueue threads per device) so that the number of threads is
   reasonable ("reasonable" is TBD but I'd imagine it doesn't buy us a
   lot to have 2 single thread workqueues dedicated to each dm-crypt
   device).

   [seems dm-crypt will already get this "for free" using
    create_singlethread_workqueue's WQ_UNBOUND?]

2) scale up the number of workqueue threads used for a single dm-crypt
   device so that a device can realize per-cpu concurrency (to address
   Andi's scalability concerns: https://patchwork.kernel.org/patch/244031/)

   [the desired locality is currently missing due to dm-crypt's current
    use of WQ_UNBOUND; so it is clear the way the workqueues are created
    will be important]

Regards,
Mike

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: cmwq and dm-crypt devices?
  2010-11-02 22:02           ` cmwq and dm-crypt devices? (was: Re: md: dm-crypt: Add option to re-use a new global work-queue.) Mike Snitzer
@ 2010-11-03  9:46             ` Tejun Heo
  2010-11-03 11:51               ` Andi Kleen
  0 siblings, 1 reply; 18+ messages in thread
From: Tejun Heo @ 2010-11-03  9:46 UTC (permalink / raw)
  To: Mike Snitzer
  Cc: Christophe Saout, Brian Swetland, San Mehat,
	device-mapper development, andi, Andrew Morton,
	Alasdair G Kergon, Milan Broz

Hello, Mike.

On 11/02/2010 11:02 PM, Mike Snitzer wrote:
> 1) scale down the number of workqueue threads associated with N devices
>    (w/ 2 workqueue threads per device) so that the number of threads is
>    reasonable ("reasonable" is TBD but I'd imagine it doesn't buy us a
>    lot to have 2 single thread workqueues dedicated to each dm-crypt
>    device).
> 
>    [seems dm-crypt will already get this "for free" using
>     create_singlethread_workqueue's WQ_UNBOUND?]

Workqueues no longer need dedicated worker threads for usual cases.
Dedicated threads are only necessary to guarantee forward progress
under memory pressure.  So, unless dm crypts are stacked, there only
needs to be one rescuer.  If dm crypts are stacked, you would need
single workqueue w/ WQ_MEM_RECLAIM set at each level.  If figuring
that out is too cumbersome, it might make sense to use one per
instance tho.

Also, unless you need strict execution ordering of queued works,
there's no reason to use alloc_ordered_workqueue().  It probably is a
better idea to use non-reentrant workqueue.  I'm not quite sure
whether using unbound would be better or not tho.

> 2) scale up the number of workqueue threads used for a single dm-crypt
>    device so that a device can realize per-cpu concurrency (to address
>    Andi's scalability concerns: https://patchwork.kernel.org/patch/244031/)
> 
>    [the desired locality is currently missing due to dm-crypt's current
>     use of WQ_UNBOUND; so it is clear the way the workqueues are created
>     will be important]

I don't know enough about dm-crypt workload to tell whether per-cpu
affinity would be better or not, but it's really a simple matter of
adding or omitting WQ_UNBOUND, so playing with it is very easy.  I
think the flags to use would be WQ_MEM_RECLAIM | WQ_NON_REENTRANT |
WQ_CPU_INTENSIVE (| WQ_UNBOUND).

I wish I could be more helpful but I would need a bit more details.
If you have some design on mind, please let me know.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: cmwq and dm-crypt devices?
  2010-11-03  9:46             ` cmwq and dm-crypt devices? Tejun Heo
@ 2010-11-03 11:51               ` Andi Kleen
  2010-11-03 11:56                 ` Milan Broz
  0 siblings, 1 reply; 18+ messages in thread
From: Andi Kleen @ 2010-11-03 11:51 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Christophe Saout, Mike Snitzer, Brian Swetland, San Mehat,
	device-mapper development, andi, Andrew Morton,
	Alasdair G Kergon, Milan Broz

> 
> > 2) scale up the number of workqueue threads used for a single dm-crypt
> >    device so that a device can realize per-cpu concurrency (to address
> >    Andi's scalability concerns: https://patchwork.kernel.org/patch/244031/)

They are already addressed in my patchkit and the patches seem to be used
by more and more users. It's just you guys who are behind.

> > 
> >    [the desired locality is currently missing due to dm-crypt's current
> >     use of WQ_UNBOUND; so it is clear the way the workqueues are created
> >     will be important]
> 
> I don't know enough about dm-crypt workload to tell whether per-cpu
> affinity would be better or not, but it's really a simple matter of

CPU affinity and an own thread makes sense for the crypto helper
because it uses up a lot of CPU time.

For the IO helper you probably still want CPU affinity, but it 
can be concurrency managed.

-andi

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: cmwq and dm-crypt devices?
  2010-11-03 11:51               ` Andi Kleen
@ 2010-11-03 11:56                 ` Milan Broz
  2010-11-03 12:33                   ` Andi Kleen
  0 siblings, 1 reply; 18+ messages in thread
From: Milan Broz @ 2010-11-03 11:56 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Christophe Saout, Mike Snitzer, Brian Swetland, San Mehat,
	device-mapper development, Tejun Heo, Andrew Morton,
	Alasdair G Kergon

On 11/03/2010 12:51 PM, Andi Kleen wrote:
>>
>>> 2) scale up the number of workqueue threads used for a single dm-crypt
>>>    device so that a device can realize per-cpu concurrency (to address
>>>    Andi's scalability concerns: https://patchwork.kernel.org/patch/244031/)
> 
> They are already addressed in my patchkit and the patches seem to be used
> by more and more users. It's just you guys who are behind.

No, sorry, they are not addressed.

Did you read my mail with tests? http://lkml.org/lkml/2010/10/20/215

Async crypto is not solved at all by your patch.

Milan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: cmwq and dm-crypt devices?
  2010-11-03 11:56                 ` Milan Broz
@ 2010-11-03 12:33                   ` Andi Kleen
  2010-11-03 13:02                     ` Milan Broz
  2010-11-03 13:18                     ` Alasdair G Kergon
  0 siblings, 2 replies; 18+ messages in thread
From: Andi Kleen @ 2010-11-03 12:33 UTC (permalink / raw)
  To: Milan Broz
  Cc: Christophe Saout, Mike Snitzer, Brian Swetland, San Mehat,
	device-mapper development, Andi Kleen, Tejun Heo, Andrew Morton,
	Alasdair G Kergon

> Async crypto is not solved at all by your patch.

You mean async crypto with 10 stacked devices? Find me a single
user who does that.

Traditionally stacking didn't work very 
well in the kernel due to the limited kernel stack overflows. 
I don't think that's significantly different here.

Anyways stacking could be probably fixed, but it's also that 99.999999%
of all dm-crypt users don't stack or use async but simply need a 
scalable dm-crypt.  Extreme stacking is extremly low on the 
priority list.

Right now dm-crypt seems to have other problems anyways that need
to be addressed first.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: cmwq and dm-crypt devices?
  2010-11-03 12:33                   ` Andi Kleen
@ 2010-11-03 13:02                     ` Milan Broz
  2010-11-03 13:18                     ` Alasdair G Kergon
  1 sibling, 0 replies; 18+ messages in thread
From: Milan Broz @ 2010-11-03 13:02 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Christophe Saout, Mike Snitzer, Brian Swetland, San Mehat,
	device-mapper development, Tejun Heo, Andrew Morton,
	Alasdair G Kergon

On 11/03/2010 01:33 PM, Andi Kleen wrote:
>> Async crypto is not solved at all by your patch.
> 
> You mean async crypto with 10 stacked devices? Find me a single
> user who does that.

It is just quick reproducer. It can happen with several
non-stacked devices too and just points to the
problem that there was reason for dedicated threads per device.

I would really prefer If we can fix the scalable problem without
dismantling existing stacking support.

There are users who plans to massively use many dm-crypt devices
in system and this will ensure that it will work properly
even in bizarre configurations.

> Traditionally stacking didn't work very 
> well in the kernel due to the limited kernel stack overflows. 
> I don't think that's significantly different here.

Sorry? device-mapper is designed to be stackable. And it works.

> Anyways stacking could be probably fixed, but it's also that 99.999999%
> of all dm-crypt users don't stack or use async but simply need a 
> scalable dm-crypt.  Extreme stacking is extremly low on the 
> priority list.
> 
> Right now dm-crypt seems to have other problems anyways that need
> to be addressed first.

I fixed many bugs in dm-crypt which were caused by reducing problem
to "common situation" (and which appeared later).

Please can you fix you patch instead of this argumentation?

I know that some fix is needed, I am using the patch myself and I am also
receiving mails requesting it.

The whole thread was reopened because we tried to find solution
to this - IOW fix your patch.

Milan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: cmwq and dm-crypt devices?
  2010-11-03 12:33                   ` Andi Kleen
  2010-11-03 13:02                     ` Milan Broz
@ 2010-11-03 13:18                     ` Alasdair G Kergon
  2010-11-03 16:13                       ` Andi Kleen
  1 sibling, 1 reply; 18+ messages in thread
From: Alasdair G Kergon @ 2010-11-03 13:18 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Christophe Saout, Mike Snitzer, Brian Swetland, San Mehat,
	device-mapper development, Tejun Heo, Andrew Morton,
	Alasdair G Kergon, Milan Broz

On Wed, Nov 03, 2010 at 01:33:47PM +0100, Andi Kleen wrote:
> Traditionally stacking didn't work very 
> well in the kernel due to the limited kernel stack overflows. 
> I don't think that's significantly different here.
 
Stacking in dm works reasonably well today and many people rely upon
that.

> Anyways stacking could be probably fixed, but it's also that 99.999999%

Please fix this regression the patch causes then or give some
fundamental arguments why we can't satisfy the two requirements
simultaneously: I just don't understand why it is necessary to cause a
regression in stacking/low memory support in order to provide the
performance boost in the way you desire.  The two things ought to be
independent of each other.

(And I remain sceptical about the suitability of the patch even with a
stack of just two or three layers as I don't see anything in the failed
tests that shows why similar failures couldn't in principle affect
people with thinner stacks.  The compromise I suggested was to amend the
patch to introduce the change only for non-stacked devices.)

Alasdair

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: cmwq and dm-crypt devices?
  2010-11-03 13:18                     ` Alasdair G Kergon
@ 2010-11-03 16:13                       ` Andi Kleen
  2010-11-03 16:17                         ` Tejun Heo
  0 siblings, 1 reply; 18+ messages in thread
From: Andi Kleen @ 2010-11-03 16:13 UTC (permalink / raw)
  To: Andi Kleen, Milan Broz, Christophe Saout, Mike Snitzer, Brian Swetland

> Please fix this regression the patch causes then or give some
> fundamental arguments why we can't satisfy the two requirements
> simultaneously: I just don't understand why it is necessary to cause a
> regression in stacking/low memory support in order to provide the
> performance boost in the way you desire.  The two things ought to be
> independent of each other.

The old stacking relies on per device threads. Per device threads
cannot be combined with per CPU threads, because that would lead
to a thread explosion for large device counts.

I thought I had handled stacking, but my implementation doesn't
seem to fully work. Most likely it needs a fallback thread
or using one of the available workqueues.
 
> (And I remain sceptical about the suitability of the patch even with a
> stack of just two or three layers as I don't see anything in the failed
> tests that shows why similar failures couldn't in principle affect

Yes most likely the problem happens with any stacking > 0.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: cmwq and dm-crypt devices?
  2010-11-03 16:13                       ` Andi Kleen
@ 2010-11-03 16:17                         ` Tejun Heo
  2010-11-03 16:22                           ` Milan Broz
  0 siblings, 1 reply; 18+ messages in thread
From: Tejun Heo @ 2010-11-03 16:17 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Christophe Saout, Mike Snitzer, Brian Swetland, San Mehat,
	device-mapper development, Andrew Morton, Alasdair G Kergon,
	Milan Broz

Hello,

On 11/03/2010 05:13 PM, Andi Kleen wrote:
>> Please fix this regression the patch causes then or give some
>> fundamental arguments why we can't satisfy the two requirements
>> simultaneously: I just don't understand why it is necessary to cause a
>> regression in stacking/low memory support in order to provide the
>> performance boost in the way you desire.  The two things ought to be
>> independent of each other.
> 
> The old stacking relies on per device threads. Per device threads
> cannot be combined with per CPU threads, because that would lead
> to a thread explosion for large device counts.

Dunno the context here but creating WQ_NON_REENTRANT | WQ_MEM_RECLAIM
workqueue per device should achieve what you want.  It will be per-cpu
while reserving only one worker per workqueue, so there won't be
explosion of threads.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: cmwq and dm-crypt devices?
  2010-11-03 16:17                         ` Tejun Heo
@ 2010-11-03 16:22                           ` Milan Broz
  2010-11-04  9:55                             ` Andi Kleen
  0 siblings, 1 reply; 18+ messages in thread
From: Milan Broz @ 2010-11-03 16:22 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Christophe Saout, Mike Snitzer, Brian Swetland, San Mehat,
	device-mapper development, Andi Kleen, Andrew Morton,
	Alasdair G Kergon

On 11/03/2010 05:17 PM, Tejun Heo wrote:
>> The old stacking relies on per device threads. Per device threads
>> cannot be combined with per CPU threads, because that would lead
>> to a thread explosion for large device counts.
> 
> Dunno the context here but creating WQ_NON_REENTRANT | WQ_MEM_RECLAIM
> workqueue per device should achieve what you want.  It will be per-cpu
> while reserving only one worker per workqueue, so there won't be
> explosion of threads.

Thanks for answer,

Yes, that is exactly what I was looking for but was not sure if possible.

So switch to normal per device device threads (not singlethreaded as now)
combined with the Andi's fixes and it should work for everyone?

Milan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: cmwq and dm-crypt devices?
  2010-11-03 16:22                           ` Milan Broz
@ 2010-11-04  9:55                             ` Andi Kleen
  0 siblings, 0 replies; 18+ messages in thread
From: Andi Kleen @ 2010-11-04  9:55 UTC (permalink / raw)
  To: Milan Broz
  Cc: Christophe Saout, Mike Snitzer, Brian Swetland, San Mehat,
	device-mapper development, Andi Kleen, Tejun Heo, Andrew Morton,
	Alasdair G Kergon

> Yes, that is exactly what I was looking for but was not sure if possible.
> 
> So switch to normal per device device threads (not singlethreaded as now)
> combined with the Andi's fixes and it should work for everyone?

Sounds like it could handle this.

You'll need to revert some of the hunks in my patch, I ripped out some
of the per device work queue code. It should not be very hard.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2010-11-04  9:55 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-04-22 17:48 [PATCH] md: dm-crypt: Add option to re-use a new global work-queue San Mehat
2010-04-22 18:03 ` Milan Broz
2010-04-22 18:08   ` San Mehat
2010-04-22 18:47     ` Milan Broz
2010-04-22 19:42       ` San Mehat
2010-04-23 14:01         ` Heinz Mauelshagen
2010-04-27 20:58         ` San Mehat
2010-11-02 22:02           ` cmwq and dm-crypt devices? (was: Re: md: dm-crypt: Add option to re-use a new global work-queue.) Mike Snitzer
2010-11-03  9:46             ` cmwq and dm-crypt devices? Tejun Heo
2010-11-03 11:51               ` Andi Kleen
2010-11-03 11:56                 ` Milan Broz
2010-11-03 12:33                   ` Andi Kleen
2010-11-03 13:02                     ` Milan Broz
2010-11-03 13:18                     ` Alasdair G Kergon
2010-11-03 16:13                       ` Andi Kleen
2010-11-03 16:17                         ` Tejun Heo
2010-11-03 16:22                           ` Milan Broz
2010-11-04  9:55                             ` Andi Kleen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.