linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] dma-debug: dynamic allocation of hash table
@ 2020-01-30 19:10 Eric Dumazet
  2020-01-30 23:46 ` Robin Murphy
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2020-01-30 19:10 UTC (permalink / raw)
  To: Christoph Hellwig, Joerg Roedel
  Cc: linux-kernel, iommu, Eric Dumazet, Eric Dumazet, Geert Uytterhoeven

Increasing the size of dma_entry_hash size by 327680 bytes
has reached some bootloaders limitations.

Simply use dynamic allocations instead, and take
this opportunity to increase the hash table to 65536
buckets. Finally my 40Gbit mlx4 NIC can sustain
line rate with CONFIG_DMA_API_DEBUG=y.

Fixes: 5e76f564572b ("dma-debug: increase HASH_SIZE")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/debug.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 2031ed1ad7fa109bb8a8c290bbbc5f825362baba..a310dbb1515e92c081f8f3f9a7290dd5e53fc889 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -27,7 +27,7 @@
 
 #include <asm/sections.h>
 
-#define HASH_SIZE       16384ULL
+#define HASH_SIZE       65536ULL
 #define HASH_FN_SHIFT   13
 #define HASH_FN_MASK    (HASH_SIZE - 1)
 
@@ -90,7 +90,8 @@ struct hash_bucket {
 };
 
 /* Hash list to save the allocated dma addresses */
-static struct hash_bucket dma_entry_hash[HASH_SIZE];
+static struct hash_bucket *dma_entry_hash __read_mostly;
+
 /* List of pre-allocated dma_debug_entry's */
 static LIST_HEAD(free_entries);
 /* Lock for the list above */
@@ -934,6 +935,10 @@ static int dma_debug_init(void)
 	if (global_disable)
 		return 0;
 
+	dma_entry_hash = vmalloc(HASH_SIZE * sizeof(*dma_entry_hash));
+	if (!dma_entry_hash)
+		goto err;
+
 	for (i = 0; i < HASH_SIZE; ++i) {
 		INIT_LIST_HEAD(&dma_entry_hash[i].list);
 		spin_lock_init(&dma_entry_hash[i].lock);
@@ -950,6 +955,7 @@ static int dma_debug_init(void)
 		pr_warn("%d debug entries requested but only %d allocated\n",
 			nr_prealloc_entries, nr_total_entries);
 	} else {
+err:
 		pr_err("debugging out of memory error - disabled\n");
 		global_disable = true;
 
-- 
2.25.0.341.g760bfbb309-goog


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] dma-debug: dynamic allocation of hash table
  2020-01-30 19:10 [PATCH] dma-debug: dynamic allocation of hash table Eric Dumazet
@ 2020-01-30 23:46 ` Robin Murphy
  2020-01-31  0:17   ` Eric Dumazet
  2020-01-31  9:06   ` Geert Uytterhoeven
  0 siblings, 2 replies; 10+ messages in thread
From: Robin Murphy @ 2020-01-30 23:46 UTC (permalink / raw)
  To: Eric Dumazet, Christoph Hellwig, Joerg Roedel
  Cc: iommu, Eric Dumazet, Geert Uytterhoeven, linux-kernel

Hi Eric,

On 2020-01-30 7:10 pm, Eric Dumazet via iommu wrote:
> Increasing the size of dma_entry_hash size by 327680 bytes
> has reached some bootloaders limitations.

[ That might warrant some further explanation - I don't quite follow how 
this would relate to a bootloader specifically :/ ]

> Simply use dynamic allocations instead, and take
> this opportunity to increase the hash table to 65536
> buckets. Finally my 40Gbit mlx4 NIC can sustain
> line rate with CONFIG_DMA_API_DEBUG=y.

That's pretty cool, but I can't help but wonder if making the table 
bigger caused a problem in the first place, whether making it bigger yet 
again in the name of a fix is really the wisest move. How might this 
impact DMA debugging on 32-bit embedded systems with limited vmalloc 
space and even less RAM, for instance? More to the point, does vmalloc() 
even work for !CONFIG_MMU builds? Obviously we don't want things to be 
*needlessly* slow if avoidable, but is there a genuine justification for 
needing to optimise what is fundamentally an invasive heavyweight 
correctness check - e.g. has it helped expose race conditions that were 
otherwise masked?

That said, by moving to dynamic allocation maybe there's room to be 
cleverer and make HASH_SIZE scale with, say, system memory size? (I 
assume from the context it's not something we can expand on-demand like 
we did for the dma_debug_entry pool)

Robin.

> Fixes: 5e76f564572b ("dma-debug: increase HASH_SIZE")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
> Cc: Christoph Hellwig <hch@lst.de>
> ---
>   kernel/dma/debug.c | 10 ++++++++--
>   1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
> index 2031ed1ad7fa109bb8a8c290bbbc5f825362baba..a310dbb1515e92c081f8f3f9a7290dd5e53fc889 100644
> --- a/kernel/dma/debug.c
> +++ b/kernel/dma/debug.c
> @@ -27,7 +27,7 @@
>   
>   #include <asm/sections.h>
>   
> -#define HASH_SIZE       16384ULL
> +#define HASH_SIZE       65536ULL
>   #define HASH_FN_SHIFT   13
>   #define HASH_FN_MASK    (HASH_SIZE - 1)
>   
> @@ -90,7 +90,8 @@ struct hash_bucket {
>   };
>   
>   /* Hash list to save the allocated dma addresses */
> -static struct hash_bucket dma_entry_hash[HASH_SIZE];
> +static struct hash_bucket *dma_entry_hash __read_mostly;
> +
>   /* List of pre-allocated dma_debug_entry's */
>   static LIST_HEAD(free_entries);
>   /* Lock for the list above */
> @@ -934,6 +935,10 @@ static int dma_debug_init(void)
>   	if (global_disable)
>   		return 0;
>   
> +	dma_entry_hash = vmalloc(HASH_SIZE * sizeof(*dma_entry_hash));
> +	if (!dma_entry_hash)
> +		goto err;
> +
>   	for (i = 0; i < HASH_SIZE; ++i) {
>   		INIT_LIST_HEAD(&dma_entry_hash[i].list);
>   		spin_lock_init(&dma_entry_hash[i].lock);
> @@ -950,6 +955,7 @@ static int dma_debug_init(void)
>   		pr_warn("%d debug entries requested but only %d allocated\n",
>   			nr_prealloc_entries, nr_total_entries);
>   	} else {
> +err:
>   		pr_err("debugging out of memory error - disabled\n");
>   		global_disable = true;
>   
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] dma-debug: dynamic allocation of hash table
  2020-01-30 23:46 ` Robin Murphy
@ 2020-01-31  0:17   ` Eric Dumazet
  2020-01-31 12:30     ` Robin Murphy
  2020-01-31  9:06   ` Geert Uytterhoeven
  1 sibling, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2020-01-31  0:17 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Christoph Hellwig, Joerg Roedel, iommu, Eric Dumazet,
	Geert Uytterhoeven, linux-kernel

On Thu, Jan 30, 2020 at 3:46 PM Robin Murphy <robin.murphy@arm.com> wrote:
>
> Hi Eric,
>
> On 2020-01-30 7:10 pm, Eric Dumazet via iommu wrote:
> > Increasing the size of dma_entry_hash size by 327680 bytes
> > has reached some bootloaders limitations.
>
> [ That might warrant some further explanation - I don't quite follow how
> this would relate to a bootloader specifically :/ ]

I had no details, please look at the prior thread where this has been discussed.

https://www.spinics.net/lists/linux-renesas-soc/msg46157.html


>
> > Simply use dynamic allocations instead, and take
> > this opportunity to increase the hash table to 65536
> > buckets. Finally my 40Gbit mlx4 NIC can sustain
> > line rate with CONFIG_DMA_API_DEBUG=y.
>
> That's pretty cool, but I can't help but wonder if making the table
> bigger caused a problem in the first place, whether making it bigger yet
> again in the name of a fix is really the wisest move. How might this
> impact DMA debugging on 32-bit embedded systems with limited vmalloc
> space and even less RAM, for instance? More to the point, does vmalloc()
> even work for !CONFIG_MMU builds? Obviously we don't want things to be
> *needlessly* slow if avoidable, but is there a genuine justification for
> needing to optimise what is fundamentally an invasive heavyweight
> correctness check - e.g. has it helped expose race conditions that were
> otherwise masked?

Not sure what you are saying.

vmalloc() _is_ supported by all arches, even !CONFIG_MMU

I can not test all platforms, and this is a debugging
feature no one uses in production.

>
> That said, by moving to dynamic allocation maybe there's room to be
> cleverer and make HASH_SIZE scale with, say, system memory size? (I
> assume from the context it's not something we can expand on-demand like
> we did for the dma_debug_entry pool)
>

How memory size can serve as a proxy of the number of entries ?
Current 10Gbit NIC need about 256,000 entries in the table.
Needless to say, the prior hash size was unusable.

As I suggested one month ago, HASH_SIZE can be tuned by a developper
eager to use this facility.

65536 slots are 768 KB on 32bit platforms.

> Robin.
>
> > Fixes: 5e76f564572b ("dma-debug: increase HASH_SIZE")
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
> > Cc: Christoph Hellwig <hch@lst.de>
> > ---
> >   kernel/dma/debug.c | 10 ++++++++--
> >   1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
> > index 2031ed1ad7fa109bb8a8c290bbbc5f825362baba..a310dbb1515e92c081f8f3f9a7290dd5e53fc889 100644
> > --- a/kernel/dma/debug.c
> > +++ b/kernel/dma/debug.c
> > @@ -27,7 +27,7 @@
> >
> >   #include <asm/sections.h>
> >
> > -#define HASH_SIZE       16384ULL
> > +#define HASH_SIZE       65536ULL
> >   #define HASH_FN_SHIFT   13
> >   #define HASH_FN_MASK    (HASH_SIZE - 1)
> >
> > @@ -90,7 +90,8 @@ struct hash_bucket {
> >   };
> >
> >   /* Hash list to save the allocated dma addresses */
> > -static struct hash_bucket dma_entry_hash[HASH_SIZE];
> > +static struct hash_bucket *dma_entry_hash __read_mostly;
> > +
> >   /* List of pre-allocated dma_debug_entry's */
> >   static LIST_HEAD(free_entries);
> >   /* Lock for the list above */
> > @@ -934,6 +935,10 @@ static int dma_debug_init(void)
> >       if (global_disable)
> >               return 0;
> >
> > +     dma_entry_hash = vmalloc(HASH_SIZE * sizeof(*dma_entry_hash));
> > +     if (!dma_entry_hash)
> > +             goto err;
> > +
> >       for (i = 0; i < HASH_SIZE; ++i) {
> >               INIT_LIST_HEAD(&dma_entry_hash[i].list);
> >               spin_lock_init(&dma_entry_hash[i].lock);
> > @@ -950,6 +955,7 @@ static int dma_debug_init(void)
> >               pr_warn("%d debug entries requested but only %d allocated\n",
> >                       nr_prealloc_entries, nr_total_entries);
> >       } else {
> > +err:
> >               pr_err("debugging out of memory error - disabled\n");
> >               global_disable = true;
> >
> >

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] dma-debug: dynamic allocation of hash table
  2020-01-30 23:46 ` Robin Murphy
  2020-01-31  0:17   ` Eric Dumazet
@ 2020-01-31  9:06   ` Geert Uytterhoeven
  2020-01-31 11:33     ` Robin Murphy
  1 sibling, 1 reply; 10+ messages in thread
From: Geert Uytterhoeven @ 2020-01-31  9:06 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Eric Dumazet, Christoph Hellwig, Joerg Roedel, Linux IOMMU,
	Eric Dumazet, linux-kernel

Hi Robin,

On Fri, Jan 31, 2020 at 12:46 AM Robin Murphy <robin.murphy@arm.com> wrote:
> On 2020-01-30 7:10 pm, Eric Dumazet via iommu wrote:
> > Increasing the size of dma_entry_hash size by 327680 bytes
> > has reached some bootloaders limitations.
>
> [ That might warrant some further explanation - I don't quite follow how
> this would relate to a bootloader specifically :/ ]

Increasing the size of a static array increases kernel size.
Some (all? ;-) bootloaders have limitations on the maximum size of a
kernel image they can boot (usually something critical gets overwritten
when handling a too large image).  While boot loaders can be fixed and
upgraded, this is usually much more cumbersome than updating the
kernel.

Besides, a static array always consumes valuable unswapable memory,
even when the feature would not be used (e.g. disabled by a command
line option).

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] dma-debug: dynamic allocation of hash table
  2020-01-31  9:06   ` Geert Uytterhoeven
@ 2020-01-31 11:33     ` Robin Murphy
  0 siblings, 0 replies; 10+ messages in thread
From: Robin Murphy @ 2020-01-31 11:33 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Eric Dumazet, Christoph Hellwig, Joerg Roedel, Linux IOMMU,
	Eric Dumazet, linux-kernel

On 2020-01-31 9:06 am, Geert Uytterhoeven wrote:
> Hi Robin,
> 
> On Fri, Jan 31, 2020 at 12:46 AM Robin Murphy <robin.murphy@arm.com> wrote:
>> On 2020-01-30 7:10 pm, Eric Dumazet via iommu wrote:
>>> Increasing the size of dma_entry_hash size by 327680 bytes
>>> has reached some bootloaders limitations.
>>
>> [ That might warrant some further explanation - I don't quite follow how
>> this would relate to a bootloader specifically :/ ]
> 
> Increasing the size of a static array increases kernel size.
> Some (all? ;-) bootloaders have limitations on the maximum size of a
> kernel image they can boot (usually something critical gets overwritten
> when handling a too large image).  While boot loaders can be fixed and
> upgraded, this is usually much more cumbersome than updating the
> kernel.

Ah, OK - I'm all too familiar with bootloaders having image size limits, 
but I'm also used to implicitly-initialised statics being collected into 
a runtime-initialised .bss section, so I hadn't realised that there 
might still be platforms where that space is actually allocated in the 
image at link-time.

> Besides, a static array always consumes valuable unswapable memory,
> even when the feature would not be used (e.g. disabled by a command
> line option).

Indeed, and that alone might have been a reasonable rationale for the 
patch - I was merely querying the wording of the commit message, not its 
intent :)

Thanks,
Robin.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] dma-debug: dynamic allocation of hash table
  2020-01-31  0:17   ` Eric Dumazet
@ 2020-01-31 12:30     ` Robin Murphy
  2020-01-31 14:42       ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Robin Murphy @ 2020-01-31 12:30 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Christoph Hellwig, Joerg Roedel, iommu, Eric Dumazet,
	Geert Uytterhoeven, linux-kernel

On 2020-01-31 12:17 am, Eric Dumazet wrote:
> On Thu, Jan 30, 2020 at 3:46 PM Robin Murphy <robin.murphy@arm.com> wrote:
>>
>> Hi Eric,
>>
>> On 2020-01-30 7:10 pm, Eric Dumazet via iommu wrote:
>>> Increasing the size of dma_entry_hash size by 327680 bytes
>>> has reached some bootloaders limitations.
>>
>> [ That might warrant some further explanation - I don't quite follow how
>> this would relate to a bootloader specifically :/ ]
> 
> I had no details, please look at the prior thread where this has been discussed.
> 
> https://www.spinics.net/lists/linux-renesas-soc/msg46157.html

Thanks for the pointer - that had passed me by as it doesn't seem to 
have been CCed to the IOMMU list or me as a reviewer.

>>> Simply use dynamic allocations instead, and take
>>> this opportunity to increase the hash table to 65536
>>> buckets. Finally my 40Gbit mlx4 NIC can sustain
>>> line rate with CONFIG_DMA_API_DEBUG=y.
>>
>> That's pretty cool, but I can't help but wonder if making the table
>> bigger caused a problem in the first place, whether making it bigger yet
>> again in the name of a fix is really the wisest move. How might this
>> impact DMA debugging on 32-bit embedded systems with limited vmalloc
>> space and even less RAM, for instance? More to the point, does vmalloc()
>> even work for !CONFIG_MMU builds? Obviously we don't want things to be
>> *needlessly* slow if avoidable, but is there a genuine justification for
>> needing to optimise what is fundamentally an invasive heavyweight
>> correctness check - e.g. has it helped expose race conditions that were
>> otherwise masked?
> 
> Not sure what you are saying.
> 
> vmalloc() _is_ supported by all arches, even !CONFIG_MMU

OK, that's good - I wasn't sure off-hand, and I was on the wrong machine 
to go digging into the source.

> I can not test all platforms, and this is a debugging
> feature no one uses in production.

...which is exactly why I'd prefer to see a stronger justification for 
making a change which only benefits performance on large systems, while 
potentially impacting usability on small ones.

>> That said, by moving to dynamic allocation maybe there's room to be
>> cleverer and make HASH_SIZE scale with, say, system memory size? (I
>> assume from the context it's not something we can expand on-demand like
>> we did for the dma_debug_entry pool)
>>
> 
> How memory size can serve as a proxy of the number of entries ?
> Current 10Gbit NIC need about 256,000 entries in the table.
> Needless to say, the prior hash size was unusable.

Because it's actually a reasonable proxy for "system size" in this 
context - there aren't many good reasons for a driver to maintain 
hundreds of mappings of the *same* buffer, so in general the maximum 
number of live mappings is effectively going to scale with the total 
amount of memory, particularly at the smaller end. Consider it a pretty 
safe assumption that an embedded system with RAM measured in megabytes 
won't ever be running anything like an MLX4, let alone in a debug 
situation. If those 256,000 mappings each represent a typical network 
packet, that implies well over 300MB of data alone, not to mention the 
size of the queues themselves, the actual DMA debug entries, and the 
whole rest of the kernel beyond that one driver - I doubt it's 
physically possible to 'need' 64K hash buckets without at very least 1GB 
of total RAM, quite likely much more.

> As I suggested one month ago, HASH_SIZE can be tuned by a developper
> eager to use this facility.
> 
> 65536 slots are 768 KB on 32bit platforms.

...and when that represents ~5% of the total system RAM it is a *lot* 
less reasonable than even 12KB. As I said, it's great to make a debug 
option more efficient such that what it observes is more representative 
of the non-debug behaviour, but it mustn't come at the cost of making 
the entire option unworkable for other users.

Robin.

>>> Fixes: 5e76f564572b ("dma-debug: increase HASH_SIZE")
>>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>>> Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
>>> Cc: Christoph Hellwig <hch@lst.de>
>>> ---
>>>    kernel/dma/debug.c | 10 ++++++++--
>>>    1 file changed, 8 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
>>> index 2031ed1ad7fa109bb8a8c290bbbc5f825362baba..a310dbb1515e92c081f8f3f9a7290dd5e53fc889 100644
>>> --- a/kernel/dma/debug.c
>>> +++ b/kernel/dma/debug.c
>>> @@ -27,7 +27,7 @@
>>>
>>>    #include <asm/sections.h>
>>>
>>> -#define HASH_SIZE       16384ULL
>>> +#define HASH_SIZE       65536ULL
>>>    #define HASH_FN_SHIFT   13
>>>    #define HASH_FN_MASK    (HASH_SIZE - 1)
>>>
>>> @@ -90,7 +90,8 @@ struct hash_bucket {
>>>    };
>>>
>>>    /* Hash list to save the allocated dma addresses */
>>> -static struct hash_bucket dma_entry_hash[HASH_SIZE];
>>> +static struct hash_bucket *dma_entry_hash __read_mostly;
>>> +
>>>    /* List of pre-allocated dma_debug_entry's */
>>>    static LIST_HEAD(free_entries);
>>>    /* Lock for the list above */
>>> @@ -934,6 +935,10 @@ static int dma_debug_init(void)
>>>        if (global_disable)
>>>                return 0;
>>>
>>> +     dma_entry_hash = vmalloc(HASH_SIZE * sizeof(*dma_entry_hash));
>>> +     if (!dma_entry_hash)
>>> +             goto err;
>>> +
>>>        for (i = 0; i < HASH_SIZE; ++i) {
>>>                INIT_LIST_HEAD(&dma_entry_hash[i].list);
>>>                spin_lock_init(&dma_entry_hash[i].lock);
>>> @@ -950,6 +955,7 @@ static int dma_debug_init(void)
>>>                pr_warn("%d debug entries requested but only %d allocated\n",
>>>                        nr_prealloc_entries, nr_total_entries);
>>>        } else {
>>> +err:
>>>                pr_err("debugging out of memory error - disabled\n");
>>>                global_disable = true;
>>>
>>>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] dma-debug: dynamic allocation of hash table
  2020-01-31 12:30     ` Robin Murphy
@ 2020-01-31 14:42       ` Eric Dumazet
  2020-01-31 17:43         ` Robin Murphy
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2020-01-31 14:42 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Christoph Hellwig, Joerg Roedel, iommu, Eric Dumazet,
	Geert Uytterhoeven, linux-kernel

On Fri, Jan 31, 2020 at 4:30 AM Robin Murphy <robin.murphy@arm.com> wrote:
>
> ...and when that represents ~5% of the total system RAM it is a *lot*
> less reasonable than even 12KB. As I said, it's great to make a debug
> option more efficient such that what it observes is more representative
> of the non-debug behaviour, but it mustn't come at the cost of making
> the entire option unworkable for other users.
>

Then I suggest you send a patch to reduce PREALLOC_DMA_DEBUG_ENTRIES
because having 65536 preallocated entries consume 4 MB of memory.

Actually this whole attempt to re-implement slab allocations in this
file is suspect.

Do not get me wrong, but if you really want to run linux on a 16MB host,
I guess you need to add CONFIG_BASE_SMALL all over the places,
not only in this kernel/dma/debug.c file.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] dma-debug: dynamic allocation of hash table
  2020-01-31 14:42       ` Eric Dumazet
@ 2020-01-31 17:43         ` Robin Murphy
  2020-01-31 17:46           ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Robin Murphy @ 2020-01-31 17:43 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Christoph Hellwig, Joerg Roedel, iommu, Eric Dumazet,
	Geert Uytterhoeven, linux-kernel

On 31/01/2020 2:42 pm, Eric Dumazet wrote:
> On Fri, Jan 31, 2020 at 4:30 AM Robin Murphy <robin.murphy@arm.com> wrote:
>>
>> ...and when that represents ~5% of the total system RAM it is a *lot*
>> less reasonable than even 12KB. As I said, it's great to make a debug
>> option more efficient such that what it observes is more representative
>> of the non-debug behaviour, but it mustn't come at the cost of making
>> the entire option unworkable for other users.
>>
> 
> Then I suggest you send a patch to reduce PREALLOC_DMA_DEBUG_ENTRIES
> because having 65536 preallocated entries consume 4 MB of memory.

...unless it's overridden on the command-line ;)

> Actually this whole attempt to re-implement slab allocations in this
> file is suspect.

Again that's a matter of expected usage patterns - typically the 
"reasonable default" or user-defined preallocation should still be 
enough (as it *had* to be before), and the kind of systems that can 
sustain so many live mappings as to regularly hit the dynamic expansion 
path are also likely to have enough memory not to care that later-unused 
entries never get 'properly' freed back to the kernel (plus as you've 
observed, many workloads tend to reach a steady state where entries are 
mostly only transiently free anyway). The reasoning for the exact 
implementation details is there in the commit logs.

> Do not get me wrong, but if you really want to run linux on a 16MB host,
> I guess you need to add CONFIG_BASE_SMALL all over the places,
> not only in this kernel/dma/debug.c file.

Right, nobody's suggesting that defconfig should "just work" on your 
router/watch/IoT shoelaces/whatever, I'm just saying that tuning any 
piece of common code for datacenter behemoths, for quality-of-life 
rather than functional necessity, and leaving no way to adjust it other 
than hacking the source, would represent an unnecessary degree of 
short-sightedness that we can and should avoid.

Taking a closer look at the code, it appears fairly straightforward to 
make the hash size variable, and in fact making it self-adjusting 
doesn't seem too big a jump from there. I'm happy to have a go at that 
myself if you like.

Robin.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] dma-debug: dynamic allocation of hash table
  2020-01-31 17:43         ` Robin Murphy
@ 2020-01-31 17:46           ` Eric Dumazet
  2020-02-01  0:06             ` Robin Murphy
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2020-01-31 17:46 UTC (permalink / raw)
  To: Robin Murphy
  Cc: Christoph Hellwig, Joerg Roedel, iommu, Eric Dumazet,
	Geert Uytterhoeven, linux-kernel

On Fri, Jan 31, 2020 at 9:43 AM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 31/01/2020 2:42 pm, Eric Dumazet wrote:
> > On Fri, Jan 31, 2020 at 4:30 AM Robin Murphy <robin.murphy@arm.com> wrote:
> >>
> >> ...and when that represents ~5% of the total system RAM it is a *lot*
> >> less reasonable than even 12KB. As I said, it's great to make a debug
> >> option more efficient such that what it observes is more representative
> >> of the non-debug behaviour, but it mustn't come at the cost of making
> >> the entire option unworkable for other users.
> >>
> >
> > Then I suggest you send a patch to reduce PREALLOC_DMA_DEBUG_ENTRIES
> > because having 65536 preallocated entries consume 4 MB of memory.
>
> ...unless it's overridden on the command-line ;)
>
> > Actually this whole attempt to re-implement slab allocations in this
> > file is suspect.
>
> Again that's a matter of expected usage patterns - typically the
> "reasonable default" or user-defined preallocation should still be
> enough (as it *had* to be before), and the kind of systems that can
> sustain so many live mappings as to regularly hit the dynamic expansion
> path are also likely to have enough memory not to care that later-unused
> entries never get 'properly' freed back to the kernel (plus as you've
> observed, many workloads tend to reach a steady state where entries are
> mostly only transiently free anyway). The reasoning for the exact
> implementation details is there in the commit logs.
>
> > Do not get me wrong, but if you really want to run linux on a 16MB host,
> > I guess you need to add CONFIG_BASE_SMALL all over the places,
> > not only in this kernel/dma/debug.c file.
>
> Right, nobody's suggesting that defconfig should "just work" on your
> router/watch/IoT shoelaces/whatever, I'm just saying that tuning any
> piece of common code for datacenter behemoths, for quality-of-life
> rather than functional necessity, and leaving no way to adjust it other
> than hacking the source, would represent an unnecessary degree of
> short-sightedness that we can and should avoid.
>
> Taking a closer look at the code, it appears fairly straightforward to
> make the hash size variable, and in fact making it self-adjusting
> doesn't seem too big a jump from there. I'm happy to have a go at that
> myself if you like.


Sure, go ahead, I have no plan implementing changes for 20 years old platforms.

Thanks.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] dma-debug: dynamic allocation of hash table
  2020-01-31 17:46           ` Eric Dumazet
@ 2020-02-01  0:06             ` Robin Murphy
  0 siblings, 0 replies; 10+ messages in thread
From: Robin Murphy @ 2020-02-01  0:06 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Christoph Hellwig, Joerg Roedel, iommu, Eric Dumazet,
	Geert Uytterhoeven, linux-kernel

On 2020-01-31 5:46 pm, Eric Dumazet wrote:
> On Fri, Jan 31, 2020 at 9:43 AM Robin Murphy <robin.murphy@arm.com> wrote:
>>
>> On 31/01/2020 2:42 pm, Eric Dumazet wrote:
>>> On Fri, Jan 31, 2020 at 4:30 AM Robin Murphy <robin.murphy@arm.com> wrote:
>>>>
>>>> ...and when that represents ~5% of the total system RAM it is a *lot*
>>>> less reasonable than even 12KB. As I said, it's great to make a debug
>>>> option more efficient such that what it observes is more representative
>>>> of the non-debug behaviour, but it mustn't come at the cost of making
>>>> the entire option unworkable for other users.
>>>>
>>>
>>> Then I suggest you send a patch to reduce PREALLOC_DMA_DEBUG_ENTRIES
>>> because having 65536 preallocated entries consume 4 MB of memory.
>>
>> ...unless it's overridden on the command-line ;)
>>
>>> Actually this whole attempt to re-implement slab allocations in this
>>> file is suspect.
>>
>> Again that's a matter of expected usage patterns - typically the
>> "reasonable default" or user-defined preallocation should still be
>> enough (as it *had* to be before), and the kind of systems that can
>> sustain so many live mappings as to regularly hit the dynamic expansion
>> path are also likely to have enough memory not to care that later-unused
>> entries never get 'properly' freed back to the kernel (plus as you've
>> observed, many workloads tend to reach a steady state where entries are
>> mostly only transiently free anyway). The reasoning for the exact
>> implementation details is there in the commit logs.
>>
>>> Do not get me wrong, but if you really want to run linux on a 16MB host,
>>> I guess you need to add CONFIG_BASE_SMALL all over the places,
>>> not only in this kernel/dma/debug.c file.
>>
>> Right, nobody's suggesting that defconfig should "just work" on your
>> router/watch/IoT shoelaces/whatever, I'm just saying that tuning any
>> piece of common code for datacenter behemoths, for quality-of-life
>> rather than functional necessity, and leaving no way to adjust it other
>> than hacking the source, would represent an unnecessary degree of
>> short-sightedness that we can and should avoid.
>>
>> Taking a closer look at the code, it appears fairly straightforward to
>> make the hash size variable, and in fact making it self-adjusting
>> doesn't seem too big a jump from there. I'm happy to have a go at that
>> myself if you like.
> 
> 
> Sure, go ahead, I have no plan implementing changes for 20 years old platforms.

Heh, I'm fairly confident the 20-year-old drivers are probably 
well-debugged by now anyway - it's the future low-end SoCs for 
Linux-powered IoT shoelaces that I see being a rich vein of new bugs :)

How does this look to start with? (only compile-tested so far, but I'll 
pick it up again properly next week)

Robin.

----->8-----
From: Robin Murphy <robin.murphy@arm.com>
Subject: [PATCH] dma/debug: Size hash table dynamically

Having a fixed-size hash table fails to scale nicely - making it large
enough to perform well for the mapping behaviour of big network adapters
tends towards unreasonable memory consumption for smaller systems. It's
logical to size the table for the number of expected entries, and doing
so conveniently puts the size/performance tradeoff in the hands of the
user via the "dma_debug_entries" option. The dynamic allocation also
sidesteps the issue of the static array bloating the kernel image on
certain platforms.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
  kernel/dma/debug.c | 31 ++++++++++++++++++++-----------
  1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 2031ed1ad7fa..86a4c68d6ac1 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -27,9 +27,9 @@

  #include <asm/sections.h>

-#define HASH_SIZE       16384ULL
+static int hash_size __read_mostly;
  #define HASH_FN_SHIFT   13
-#define HASH_FN_MASK    (HASH_SIZE - 1)
+#define HASH_FN_MASK    (hash_size - 1)

  #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
  /* If the pool runs out, add this many new entries at once */
@@ -90,7 +90,7 @@ struct hash_bucket {
  };

  /* Hash list to save the allocated dma addresses */
-static struct hash_bucket dma_entry_hash[HASH_SIZE];
+static struct hash_bucket *dma_entry_hash;
  /* List of pre-allocated dma_debug_entry's */
  static LIST_HEAD(free_entries);
  /* Lock for the list above */
@@ -398,7 +398,7 @@ void debug_dma_dump_mappings(struct device *dev)
  {
  	int idx;

-	for (idx = 0; idx < HASH_SIZE; idx++) {
+	for (idx = 0; idx < hash_size; idx++) {
  		struct hash_bucket *bucket = &dma_entry_hash[idx];
  		struct dma_debug_entry *entry;
  		unsigned long flags;
@@ -818,7 +818,7 @@ static int dump_show(struct seq_file *seq, void *v)
  {
  	int idx;

-	for (idx = 0; idx < HASH_SIZE; idx++) {
+	for (idx = 0; idx < hash_size; idx++) {
  		struct hash_bucket *bucket = &dma_entry_hash[idx];
  		struct dma_debug_entry *entry;
  		unsigned long flags;
@@ -862,7 +862,7 @@ static int device_dma_allocations(struct device 
*dev, struct dma_debug_entry **o
  	unsigned long flags;
  	int count = 0, i;

-	for (i = 0; i < HASH_SIZE; ++i) {
+	for (i = 0; i < hash_size; ++i) {
  		spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
  		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
  			if (entry->dev == dev) {
@@ -934,7 +934,13 @@ static int dma_debug_init(void)
  	if (global_disable)
  		return 0;

-	for (i = 0; i < HASH_SIZE; ++i) {
+	hash_size = roundup_pow_of_two(nr_prealloc_entries) / 16;
+	dma_entry_hash = kvmalloc_array(max(hash_size, 16),
+					sizeof(*dma_entry_hash), GFP_KERNEL);
+	if (!dma_entry_hash)
+		goto out_err;
+
+	for (i = 0; i < hash_size; ++i) {
  		INIT_LIST_HEAD(&dma_entry_hash[i].list);
  		spin_lock_init(&dma_entry_hash[i].lock);
  	}
@@ -950,10 +956,7 @@ static int dma_debug_init(void)
  		pr_warn("%d debug entries requested but only %d allocated\n",
  			nr_prealloc_entries, nr_total_entries);
  	} else {
-		pr_err("debugging out of memory error - disabled\n");
-		global_disable = true;
-
-		return 0;
+		goto out_err;
  	}
  	min_free_entries = num_free_entries;

@@ -961,6 +964,12 @@ static int dma_debug_init(void)

  	pr_info("debugging enabled by kernel config\n");
  	return 0;
+
+out_err:
+	pr_err("debugging out of memory error - disabled\n");
+	global_disable = true;
+
+	return 0;
  }
  core_initcall(dma_debug_init);

-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-02-01  0:06 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-30 19:10 [PATCH] dma-debug: dynamic allocation of hash table Eric Dumazet
2020-01-30 23:46 ` Robin Murphy
2020-01-31  0:17   ` Eric Dumazet
2020-01-31 12:30     ` Robin Murphy
2020-01-31 14:42       ` Eric Dumazet
2020-01-31 17:43         ` Robin Murphy
2020-01-31 17:46           ` Eric Dumazet
2020-02-01  0:06             ` Robin Murphy
2020-01-31  9:06   ` Geert Uytterhoeven
2020-01-31 11:33     ` Robin Murphy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).