of: add early boot allocation of of_find_node_by_phandle() cache
diff mbox series

Message ID 1518655465-10759-1-git-send-email-frowand.list@gmail.com
State New, archived
Headers show
Series
  • of: add early boot allocation of of_find_node_by_phandle() cache
Related show

Commit Message

Frank Rowand Feb. 15, 2018, 12:44 a.m. UTC
From: Frank Rowand <frank.rowand@sony.com>

The initial implementation of the of_find_node_by_phandle() cache
allocates the cache using kcalloc().  Add an early boot allocation
of the cache so it will be usable during early boot.  Switch over
to the kcalloc() based cache once normal memory allocation
becomes available.

Signed-off-by: Frank Rowand <frank.rowand@sony.com>
---

This patch is optional, to be added at Rob's discretion.  The
extra complexity is not as much as I had feared, but the boot
speed up is also likely small.

 drivers/of/base.c       | 33 +++++++++++++++++++++++++++++++++
 drivers/of/fdt.c        |  2 ++
 drivers/of/of_private.h |  2 ++
 3 files changed, 37 insertions(+)

Comments

Frank Rowand Feb. 15, 2018, 12:55 a.m. UTC | #1
On 02/14/18 16:44, frowand.list@gmail.com wrote:
> From: Frank Rowand <frank.rowand@sony.com>
> 
> The initial implementation of the of_find_node_by_phandle() cache
> allocates the cache using kcalloc().  Add an early boot allocation
> of the cache so it will be usable during early boot.  Switch over
> to the kcalloc() based cache once normal memory allocation
> becomes available.
> 
> Signed-off-by: Frank Rowand <frank.rowand@sony.com>
> ---
> 
> This patch is optional, to be added at Rob's discretion.  The
> extra complexity is not as much as I had feared, but the boot
> speed up is also likely small.

This patch applies on top of:

[PATCH v3] of: cache phandle nodes to reduce cost of of_find_node_by_phandle()
Chintan Pandya Feb. 16, 2018, 9:07 a.m. UTC | #2
On 2/15/2018 6:14 AM, frowand.list@gmail.com wrote:
> From: Frank Rowand <frank.rowand@sony.com>
> 
> The initial implementation of the of_find_node_by_phandle() cache
> allocates the cache using kcalloc().  Add an early boot allocation
> of the cache so it will be usable during early boot.  Switch over
> to the kcalloc() based cache once normal memory allocation
> becomes available.
> 
> Signed-off-by: Frank Rowand <frank.rowand@sony.com>
> ---
> 
> This patch is optional, to be added at Rob's discretion.  The
> extra complexity is not as much as I had feared, but the boot
> speed up is also likely small.
> 
>   drivers/of/base.c       | 33 +++++++++++++++++++++++++++++++++
>   drivers/of/fdt.c        |  2 ++
>   drivers/of/of_private.h |  2 ++
>   3 files changed, 37 insertions(+)
> 
> diff --git a/drivers/of/base.c b/drivers/of/base.c
> index ab545dfa9173..d7b1ff1209e8 100644
> --- a/drivers/of/base.c
> +++ b/drivers/of/base.c
> @@ -16,9 +16,11 @@
>   
>   #define pr_fmt(fmt)	"OF: " fmt
>   
> +#include <linux/bootmem.h>
>   #include <linux/console.h>
>   #include <linux/ctype.h>
>   #include <linux/cpu.h>
> +#include <linux/memblock.h>
>   #include <linux/module.h>
>   #include <linux/of.h>
>   #include <linux/of_device.h>
> @@ -131,6 +133,29 @@ static void of_populate_phandle_cache(void)
>   	raw_spin_unlock_irqrestore(&devtree_lock, flags);
>   }
>   
> +void __init of_populate_phandle_cache_early(void)
> +{
> +	u32 cache_entries;
> +	struct device_node *np;
> +	u32 phandles = 0;
> +	size_t size;
> +
> +	for_each_of_allnodes(np)
> +		if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
> +			phandles++;
> +
> +	cache_entries = roundup_pow_of_two(phandles);
> +	phandle_cache_mask = cache_entries - 1;
> +
> +	size = cache_entries * sizeof(*phandle_cache);
> +	phandle_cache = memblock_virt_alloc(size, 4);
> +	memset(phandle_cache, 0, size);
> +
> +	for_each_of_allnodes(np)
> +		if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
> +			phandle_cache[np->phandle & phandle_cache_mask] = np;
> +}

There is a lot of code duplication in this function with
of_populate_phandle_cache. Would you think of taking out
common code or differ the function with extra bool parameter
to say 'early' or 'not early'.

> +
>   #ifndef CONFIG_MODULES
>   static int __init of_free_phandle_cache(void)
>   {
> @@ -150,7 +175,15 @@ static int __init of_free_phandle_cache(void)
>   
>   void __init of_core_init(void)
>   {
> +	unsigned long flags;
>   	struct device_node *np;
> +	phys_addr_t size;
> +
> +	raw_spin_lock_irqsave(&devtree_lock, flags);
> +	size = (phandle_cache_mask + 1) * sizeof(*phandle_cache);
> +	memblock_free(__pa(phandle_cache), size);
> +	phandle_cache = NULL;
> +	raw_spin_unlock_irqrestore(&devtree_lock, flags);
>   
>   	of_populate_phandle_cache();
>   
> diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
> index 84aa9d676375..cb320df23f26 100644
> --- a/drivers/of/fdt.c
> +++ b/drivers/of/fdt.c
> @@ -1264,6 +1264,8 @@ void __init unflatten_device_tree(void)
>   	of_alias_scan(early_init_dt_alloc_memory_arch);
>   
>   	unittest_unflatten_overlay_base();
> +
> +	of_populate_phandle_cache_early();
>   }
>   
>   /**
> diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
> index fa70650136b4..6720448c84cc 100644
> --- a/drivers/of/of_private.h
> +++ b/drivers/of/of_private.h
> @@ -134,6 +134,8 @@ extern void __of_sysfs_remove_bin_file(struct device_node *np,
>   /* illegal phandle value (set when unresolved) */
>   #define OF_PHANDLE_ILLEGAL	0xdeadbeef
>   
> +extern void __init of_populate_phandle_cache_early(void);
> +
>   /* iterators for transactions, used for overlays */
>   /* forward iterator */
>   #define for_each_transaction_entry(_oft, _te) \
> 

Chintan
Frank Rowand Feb. 16, 2018, 10:32 p.m. UTC | #3
On 02/16/18 01:07, Chintan Pandya wrote:
> 
> 
> On 2/15/2018 6:14 AM, frowand.list@gmail.com wrote:
>> From: Frank Rowand <frank.rowand@sony.com>
>>
>> The initial implementation of the of_find_node_by_phandle() cache
>> allocates the cache using kcalloc().  Add an early boot allocation
>> of the cache so it will be usable during early boot.  Switch over
>> to the kcalloc() based cache once normal memory allocation
>> becomes available.
>>
>> Signed-off-by: Frank Rowand <frank.rowand@sony.com>
>> ---
>>
>> This patch is optional, to be added at Rob's discretion.  The
>> extra complexity is not as much as I had feared, but the boot
>> speed up is also likely small.
>>
>>   drivers/of/base.c       | 33 +++++++++++++++++++++++++++++++++
>>   drivers/of/fdt.c        |  2 ++
>>   drivers/of/of_private.h |  2 ++
>>   3 files changed, 37 insertions(+)
>>
>> diff --git a/drivers/of/base.c b/drivers/of/base.c
>> index ab545dfa9173..d7b1ff1209e8 100644
>> --- a/drivers/of/base.c
>> +++ b/drivers/of/base.c
>> @@ -16,9 +16,11 @@
>>     #define pr_fmt(fmt)    "OF: " fmt
>>   +#include <linux/bootmem.h>
>>   #include <linux/console.h>
>>   #include <linux/ctype.h>
>>   #include <linux/cpu.h>
>> +#include <linux/memblock.h>
>>   #include <linux/module.h>
>>   #include <linux/of.h>
>>   #include <linux/of_device.h>
>> @@ -131,6 +133,29 @@ static void of_populate_phandle_cache(void)
>>       raw_spin_unlock_irqrestore(&devtree_lock, flags);
>>   }
>>   +void __init of_populate_phandle_cache_early(void)
>> +{
>> +    u32 cache_entries;
>> +    struct device_node *np;
>> +    u32 phandles = 0;
>> +    size_t size;
>> +
>> +    for_each_of_allnodes(np)
>> +        if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
>> +            phandles++;
>> +
>> +    cache_entries = roundup_pow_of_two(phandles);
>> +    phandle_cache_mask = cache_entries - 1;
>> +
>> +    size = cache_entries * sizeof(*phandle_cache);
>> +    phandle_cache = memblock_virt_alloc(size, 4);
>> +    memset(phandle_cache, 0, size);
>> +
>> +    for_each_of_allnodes(np)
>> +        if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
>> +            phandle_cache[np->phandle & phandle_cache_mask] = np;
>> +}
> 
> There is a lot of code duplication in this function with
> of_populate_phandle_cache. Would you think of taking out
> common code or differ the function with extra bool parameter
> to say 'early' or 'not early'.

Good observation, and normally yes.  My first implementation of this
feature actually did what you suggest.

It turns out to be a bit more complicated than one might expect
because some of the code is marked __init.  That results in
passing the memory allocation function as a parameter to
of_populate_phandle_cache().  See __unflatten_device_tree() for
an example of what this entails.  Then the parts of
of_populate_cache() that need to be encapsulated in a 'if (!early)'
test are scattered throughout the function, so the test becomes
rather intrusive in terms of code readability.

In the end, the method I chose results in cleaner code for
of_populate_phandle_cache(), plus the memory used by
of_populate_cache_early() gets reclaimed after boot, since
it is marked __init.  Both functions are relatively small and
the code common to both is unlikely to be modified, so I do
not see this causing a maintenance burden.

-Frank

>> +
>>   #ifndef CONFIG_MODULES
>>   static int __init of_free_phandle_cache(void)
>>   {
>> @@ -150,7 +175,15 @@ static int __init of_free_phandle_cache(void)
>>     void __init of_core_init(void)
>>   {
>> +    unsigned long flags;
>>       struct device_node *np;
>> +    phys_addr_t size;
>> +
>> +    raw_spin_lock_irqsave(&devtree_lock, flags);
>> +    size = (phandle_cache_mask + 1) * sizeof(*phandle_cache);
>> +    memblock_free(__pa(phandle_cache), size);
>> +    phandle_cache = NULL;
>> +    raw_spin_unlock_irqrestore(&devtree_lock, flags);
>>         of_populate_phandle_cache();
>>   diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
>> index 84aa9d676375..cb320df23f26 100644
>> --- a/drivers/of/fdt.c
>> +++ b/drivers/of/fdt.c
>> @@ -1264,6 +1264,8 @@ void __init unflatten_device_tree(void)
>>       of_alias_scan(early_init_dt_alloc_memory_arch);
>>         unittest_unflatten_overlay_base();
>> +
>> +    of_populate_phandle_cache_early();
>>   }
>>     /**
>> diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
>> index fa70650136b4..6720448c84cc 100644
>> --- a/drivers/of/of_private.h
>> +++ b/drivers/of/of_private.h
>> @@ -134,6 +134,8 @@ extern void __of_sysfs_remove_bin_file(struct device_node *np,
>>   /* illegal phandle value (set when unresolved) */
>>   #define OF_PHANDLE_ILLEGAL    0xdeadbeef
>>   +extern void __init of_populate_phandle_cache_early(void);
>> +
>>   /* iterators for transactions, used for overlays */
>>   /* forward iterator */
>>   #define for_each_transaction_entry(_oft, _te) \
>>
> 
> Chintan

Patch
diff mbox series

diff --git a/drivers/of/base.c b/drivers/of/base.c
index ab545dfa9173..d7b1ff1209e8 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -16,9 +16,11 @@ 
 
 #define pr_fmt(fmt)	"OF: " fmt
 
+#include <linux/bootmem.h>
 #include <linux/console.h>
 #include <linux/ctype.h>
 #include <linux/cpu.h>
+#include <linux/memblock.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -131,6 +133,29 @@  static void of_populate_phandle_cache(void)
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 }
 
+void __init of_populate_phandle_cache_early(void)
+{
+	u32 cache_entries;
+	struct device_node *np;
+	u32 phandles = 0;
+	size_t size;
+
+	for_each_of_allnodes(np)
+		if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
+			phandles++;
+
+	cache_entries = roundup_pow_of_two(phandles);
+	phandle_cache_mask = cache_entries - 1;
+
+	size = cache_entries * sizeof(*phandle_cache);
+	phandle_cache = memblock_virt_alloc(size, 4);
+	memset(phandle_cache, 0, size);
+
+	for_each_of_allnodes(np)
+		if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
+			phandle_cache[np->phandle & phandle_cache_mask] = np;
+}
+
 #ifndef CONFIG_MODULES
 static int __init of_free_phandle_cache(void)
 {
@@ -150,7 +175,15 @@  static int __init of_free_phandle_cache(void)
 
 void __init of_core_init(void)
 {
+	unsigned long flags;
 	struct device_node *np;
+	phys_addr_t size;
+
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	size = (phandle_cache_mask + 1) * sizeof(*phandle_cache);
+	memblock_free(__pa(phandle_cache), size);
+	phandle_cache = NULL;
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	of_populate_phandle_cache();
 
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 84aa9d676375..cb320df23f26 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1264,6 +1264,8 @@  void __init unflatten_device_tree(void)
 	of_alias_scan(early_init_dt_alloc_memory_arch);
 
 	unittest_unflatten_overlay_base();
+
+	of_populate_phandle_cache_early();
 }
 
 /**
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index fa70650136b4..6720448c84cc 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -134,6 +134,8 @@  extern void __of_sysfs_remove_bin_file(struct device_node *np,
 /* illegal phandle value (set when unresolved) */
 #define OF_PHANDLE_ILLEGAL	0xdeadbeef
 
+extern void __init of_populate_phandle_cache_early(void);
+
 /* iterators for transactions, used for overlays */
 /* forward iterator */
 #define for_each_transaction_entry(_oft, _te) \