All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware
@ 2017-10-16  5:49 Alexey Kardashevskiy
  2017-10-16  6:11 ` David Gibson
  2017-10-16 11:59 ` Michael Ellerman
  0 siblings, 2 replies; 9+ messages in thread
From: Alexey Kardashevskiy @ 2017-10-16  5:49 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Alexey Kardashevskiy, David Gibson, Benjamin Herrenschmidt,
	Paul Mackerras, Greg Kurz, Balbir Singh, Thomas Huth,
	Nikunj A Dadhania, slof

At the moment, on 256CPU + 256 PCI devices guest, it takes the guest
about 8.5sec to read the entire device tree. Some explanation can be
found here: https://patchwork.ozlabs.org/patch/826124/ but mostly it is
because the kernel traverses the tree twice and it calls "getprop" for
each properly which is really SLOF as it searches from the linked list
beginning every time.

Since SLOF has just learned to build FDT and this takes less than 0.5sec
for such a big guest, this makes use of the proposed client interface
method - "fdt-fetch".

If "fdt-fetch" is not available, the old method is used.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 arch/powerpc/kernel/prom_init.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 02190e90c7ae..daa50a153737 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -2498,6 +2498,31 @@ static void __init flatten_device_tree(void)
 		prom_panic("Can't allocate initial device-tree chunk\n");
 	mem_end = mem_start + room;
 
+	if (!call_prom_ret("fdt-fetch", 2, 1, NULL, mem_start,
+			   room - sizeof(mem_reserve_map))) {
+		u32 size;
+
+		hdr = (void *) mem_start;
+
+		/* Fixup the boot cpuid */
+		hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
+
+		/* Append the reserved map to the end of the blob */
+		hdr->off_mem_rsvmap = hdr->totalsize;
+		size = be32_to_cpu(hdr->totalsize);
+		rsvmap = (void *) hdr + size;
+		hdr->totalsize = cpu_to_be32(size + sizeof(mem_reserve_map));
+		memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
+
+		/* Store the DT address */
+		dt_header_start = mem_start;
+
+#ifdef DEBUG_PROM
+		prom_printf("Fetched DTB: %d bytes to @%x\n", size, mem_start);
+#endif
+		goto print_exit;
+	}
+
 	/* Get root of tree */
 	root = call_prom("peer", 1, 1, (phandle)0);
 	if (root == (phandle)0)
@@ -2548,6 +2573,7 @@ static void __init flatten_device_tree(void)
 	/* Copy the reserve map in */
 	memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
 
+print_exit:
 #ifdef DEBUG_PROM
 	{
 		int i;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware
  2017-10-16  5:49 [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware Alexey Kardashevskiy
@ 2017-10-16  6:11 ` David Gibson
  2017-10-16  6:22   ` Alexey Kardashevskiy
  2017-10-16 11:59 ` Michael Ellerman
  1 sibling, 1 reply; 9+ messages in thread
From: David Gibson @ 2017-10-16  6:11 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: linuxppc-dev, Benjamin Herrenschmidt, Paul Mackerras, Greg Kurz,
	Balbir Singh, Thomas Huth, Nikunj A Dadhania, slof

[-- Attachment #1: Type: text/plain, Size: 2894 bytes --]

On Mon, Oct 16, 2017 at 04:49:17PM +1100, Alexey Kardashevskiy wrote:
> At the moment, on 256CPU + 256 PCI devices guest, it takes the guest
> about 8.5sec to read the entire device tree. Some explanation can be
> found here: https://patchwork.ozlabs.org/patch/826124/ but mostly it is
> because the kernel traverses the tree twice and it calls "getprop" for
> each properly which is really SLOF as it searches from the linked list
> beginning every time.
> 
> Since SLOF has just learned to build FDT and this takes less than 0.5sec
> for such a big guest, this makes use of the proposed client interface
> method - "fdt-fetch".
> 
> If "fdt-fetch" is not available, the old method is used.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

I like the concept, few details though..

> ---
>  arch/powerpc/kernel/prom_init.c | 26 ++++++++++++++++++++++++++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> index 02190e90c7ae..daa50a153737 100644
> --- a/arch/powerpc/kernel/prom_init.c
> +++ b/arch/powerpc/kernel/prom_init.c
> @@ -2498,6 +2498,31 @@ static void __init flatten_device_tree(void)
>  		prom_panic("Can't allocate initial device-tree chunk\n");
>  	mem_end = mem_start + room;
>  
> +	if (!call_prom_ret("fdt-fetch", 2, 1, NULL, mem_start,
> +			   room - sizeof(mem_reserve_map))) {
> +		u32 size;
> +
> +		hdr = (void *) mem_start;
> +
> +		/* Fixup the boot cpuid */
> +		hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);

If SLOF is generating a tree it really should get this header field
right as well.

> +		/* Append the reserved map to the end of the blob */
> +		hdr->off_mem_rsvmap = hdr->totalsize;
> +		size = be32_to_cpu(hdr->totalsize);
> +		rsvmap = (void *) hdr + size;
> +		hdr->totalsize = cpu_to_be32(size + sizeof(mem_reserve_map));
> +		memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));

.. and the reserve map for that matter.  I don't really understand
what you're doing here.  Note also that the reserve map is required to
be 8-byte aligned, which totalsize might not be.

> +		/* Store the DT address */
> +		dt_header_start = mem_start;
> +
> +#ifdef DEBUG_PROM
> +		prom_printf("Fetched DTB: %d bytes to @%x\n", size, mem_start);
> +#endif
> +		goto print_exit;
> +	}
> +
>  	/* Get root of tree */
>  	root = call_prom("peer", 1, 1, (phandle)0);
>  	if (root == (phandle)0)
> @@ -2548,6 +2573,7 @@ static void __init flatten_device_tree(void)
>  	/* Copy the reserve map in */
>  	memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
>  
> +print_exit:
>  #ifdef DEBUG_PROM
>  	{
>  		int i;

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware
  2017-10-16  6:11 ` David Gibson
@ 2017-10-16  6:22   ` Alexey Kardashevskiy
  2017-10-16  6:46     ` David Gibson
  0 siblings, 1 reply; 9+ messages in thread
From: Alexey Kardashevskiy @ 2017-10-16  6:22 UTC (permalink / raw)
  To: David Gibson
  Cc: linuxppc-dev, Benjamin Herrenschmidt, Paul Mackerras, Greg Kurz,
	Balbir Singh, Thomas Huth, Nikunj A Dadhania, slof


[-- Attachment #1.1: Type: text/plain, Size: 3313 bytes --]

On 16/10/17 17:11, David Gibson wrote:
> On Mon, Oct 16, 2017 at 04:49:17PM +1100, Alexey Kardashevskiy wrote:
>> At the moment, on 256CPU + 256 PCI devices guest, it takes the guest
>> about 8.5sec to read the entire device tree. Some explanation can be
>> found here: https://patchwork.ozlabs.org/patch/826124/ but mostly it is
>> because the kernel traverses the tree twice and it calls "getprop" for
>> each properly which is really SLOF as it searches from the linked list
>> beginning every time.
>>
>> Since SLOF has just learned to build FDT and this takes less than 0.5sec
>> for such a big guest, this makes use of the proposed client interface
>> method - "fdt-fetch".
>>
>> If "fdt-fetch" is not available, the old method is used.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> I like the concept, few details though..
> 
>> ---
>>  arch/powerpc/kernel/prom_init.c | 26 ++++++++++++++++++++++++++
>>  1 file changed, 26 insertions(+)
>>
>> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
>> index 02190e90c7ae..daa50a153737 100644
>> --- a/arch/powerpc/kernel/prom_init.c
>> +++ b/arch/powerpc/kernel/prom_init.c
>> @@ -2498,6 +2498,31 @@ static void __init flatten_device_tree(void)
>>  		prom_panic("Can't allocate initial device-tree chunk\n");
>>  	mem_end = mem_start + room;
>>  
>> +	if (!call_prom_ret("fdt-fetch", 2, 1, NULL, mem_start,
>> +			   room - sizeof(mem_reserve_map))) {
>> +		u32 size;
>> +
>> +		hdr = (void *) mem_start;
>> +
>> +		/* Fixup the boot cpuid */
>> +		hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
> 
> If SLOF is generating a tree it really should get this header field
> right as well.


Ah, I did not realize it is just a phandle from /chosen/cpu. Will fix.


> 
>> +		/* Append the reserved map to the end of the blob */
>> +		hdr->off_mem_rsvmap = hdr->totalsize;
>> +		size = be32_to_cpu(hdr->totalsize);
>> +		rsvmap = (void *) hdr + size;
>> +		hdr->totalsize = cpu_to_be32(size + sizeof(mem_reserve_map));
>> +		memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
> 
> .. and the reserve map for that matter.  I don't really understand
> what you're doing here. 

? Get the blob, increase the FDT size by sizeof(mem_reserve_map), fix up
totalsize and off_mem_rsvmap, copy mem_reserve_map to the end of the blob
(the actual order is slightly different, may be a bit confusing).

Asking SLOF to reserve the space seems to be unnecessary complication of
the interface - SLOF does not provide any reserved memory records.

> Note also that the reserve map is required to
> be 8-byte aligned, which totalsize might not be.

Ah, good point.


> 
>> +		/* Store the DT address */
>> +		dt_header_start = mem_start;
>> +
>> +#ifdef DEBUG_PROM
>> +		prom_printf("Fetched DTB: %d bytes to @%x\n", size, mem_start);
>> +#endif
>> +		goto print_exit;
>> +	}
>> +
>>  	/* Get root of tree */
>>  	root = call_prom("peer", 1, 1, (phandle)0);
>>  	if (root == (phandle)0)
>> @@ -2548,6 +2573,7 @@ static void __init flatten_device_tree(void)
>>  	/* Copy the reserve map in */
>>  	memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
>>  
>> +print_exit:
>>  #ifdef DEBUG_PROM
>>  	{
>>  		int i;
> 


-- 
Alexey


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 839 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware
  2017-10-16  6:22   ` Alexey Kardashevskiy
@ 2017-10-16  6:46     ` David Gibson
  2017-10-16  7:07       ` Alexey Kardashevskiy
  0 siblings, 1 reply; 9+ messages in thread
From: David Gibson @ 2017-10-16  6:46 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: linuxppc-dev, Benjamin Herrenschmidt, Paul Mackerras, Greg Kurz,
	Balbir Singh, Thomas Huth, Nikunj A Dadhania, slof

[-- Attachment #1: Type: text/plain, Size: 4162 bytes --]

On Mon, Oct 16, 2017 at 05:22:55PM +1100, Alexey Kardashevskiy wrote:
> On 16/10/17 17:11, David Gibson wrote:
> > On Mon, Oct 16, 2017 at 04:49:17PM +1100, Alexey Kardashevskiy wrote:
> >> At the moment, on 256CPU + 256 PCI devices guest, it takes the guest
> >> about 8.5sec to read the entire device tree. Some explanation can be
> >> found here: https://patchwork.ozlabs.org/patch/826124/ but mostly it is
> >> because the kernel traverses the tree twice and it calls "getprop" for
> >> each properly which is really SLOF as it searches from the linked list
> >> beginning every time.
> >>
> >> Since SLOF has just learned to build FDT and this takes less than 0.5sec
> >> for such a big guest, this makes use of the proposed client interface
> >> method - "fdt-fetch".
> >>
> >> If "fdt-fetch" is not available, the old method is used.
> >>
> >> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> > 
> > I like the concept, few details though..
> > 
> >> ---
> >>  arch/powerpc/kernel/prom_init.c | 26 ++++++++++++++++++++++++++
> >>  1 file changed, 26 insertions(+)
> >>
> >> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> >> index 02190e90c7ae..daa50a153737 100644
> >> --- a/arch/powerpc/kernel/prom_init.c
> >> +++ b/arch/powerpc/kernel/prom_init.c
> >> @@ -2498,6 +2498,31 @@ static void __init flatten_device_tree(void)
> >>  		prom_panic("Can't allocate initial device-tree chunk\n");
> >>  	mem_end = mem_start + room;
> >>  
> >> +	if (!call_prom_ret("fdt-fetch", 2, 1, NULL, mem_start,
> >> +			   room - sizeof(mem_reserve_map))) {
> >> +		u32 size;
> >> +
> >> +		hdr = (void *) mem_start;
> >> +
> >> +		/* Fixup the boot cpuid */
> >> +		hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
> > 
> > If SLOF is generating a tree it really should get this header field
> > right as well.
> 
> 
> Ah, I did not realize it is just a phandle from /chosen/cpu. Will
> fix.

It's not a phandle.  It's just the "address" (i.e. reg value) of the
boot cpu.

> >> +		/* Append the reserved map to the end of the blob */
> >> +		hdr->off_mem_rsvmap = hdr->totalsize;
> >> +		size = be32_to_cpu(hdr->totalsize);
> >> +		rsvmap = (void *) hdr + size;
> >> +		hdr->totalsize = cpu_to_be32(size + sizeof(mem_reserve_map));
> >> +		memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
> > 
> > .. and the reserve map for that matter.  I don't really understand
> > what you're doing here. 
> 
> ? Get the blob, increase the FDT size by sizeof(mem_reserve_map), fix up
> totalsize and off_mem_rsvmap, copy mem_reserve_map to the end of the blob
> (the actual order is slightly different, may be a bit confusing).

Right.. but where is mem_reserve_map coming from, if it hasn't come
from an FDT?

> Asking SLOF to reserve the space seems to be unnecessary complication of
> the interface - SLOF does not provide any reserved memory records.

Ah.. right, the reservations are coming from the pre-prom kernel, not
from the firmware itself.  Yeah, that makes sense.  Ok, this makes
sense then...

> > Note also that the reserve map is required to
> > be 8-byte aligned, which totalsize might not be.
> 
> Ah, good point.

..at least with that fixed and maybe some comments to make what's
gonig on clearer.

> 
> 
> > 
> >> +		/* Store the DT address */
> >> +		dt_header_start = mem_start;
> >> +
> >> +#ifdef DEBUG_PROM
> >> +		prom_printf("Fetched DTB: %d bytes to @%x\n", size, mem_start);
> >> +#endif
> >> +		goto print_exit;
> >> +	}
> >> +
> >>  	/* Get root of tree */
> >>  	root = call_prom("peer", 1, 1, (phandle)0);
> >>  	if (root == (phandle)0)
> >> @@ -2548,6 +2573,7 @@ static void __init flatten_device_tree(void)
> >>  	/* Copy the reserve map in */
> >>  	memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
> >>  
> >> +print_exit:
> >>  #ifdef DEBUG_PROM
> >>  	{
> >>  		int i;
> > 
> 
> 




-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware
  2017-10-16  6:46     ` David Gibson
@ 2017-10-16  7:07       ` Alexey Kardashevskiy
  2017-10-16  9:05         ` David Gibson
  2017-10-16 10:20         ` Segher Boessenkool
  0 siblings, 2 replies; 9+ messages in thread
From: Alexey Kardashevskiy @ 2017-10-16  7:07 UTC (permalink / raw)
  To: David Gibson
  Cc: linuxppc-dev, Benjamin Herrenschmidt, Paul Mackerras, Greg Kurz,
	Balbir Singh, Thomas Huth, Nikunj A Dadhania, slof


[-- Attachment #1.1: Type: text/plain, Size: 4389 bytes --]

On 16/10/17 17:46, David Gibson wrote:
> On Mon, Oct 16, 2017 at 05:22:55PM +1100, Alexey Kardashevskiy wrote:
>> On 16/10/17 17:11, David Gibson wrote:
>>> On Mon, Oct 16, 2017 at 04:49:17PM +1100, Alexey Kardashevskiy wrote:
>>>> At the moment, on 256CPU + 256 PCI devices guest, it takes the guest
>>>> about 8.5sec to read the entire device tree. Some explanation can be
>>>> found here: https://patchwork.ozlabs.org/patch/826124/ but mostly it is
>>>> because the kernel traverses the tree twice and it calls "getprop" for
>>>> each properly which is really SLOF as it searches from the linked list
>>>> beginning every time.
>>>>
>>>> Since SLOF has just learned to build FDT and this takes less than 0.5sec
>>>> for such a big guest, this makes use of the proposed client interface
>>>> method - "fdt-fetch".
>>>>
>>>> If "fdt-fetch" is not available, the old method is used.
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>
>>> I like the concept, few details though..
>>>
>>>> ---
>>>>  arch/powerpc/kernel/prom_init.c | 26 ++++++++++++++++++++++++++
>>>>  1 file changed, 26 insertions(+)
>>>>
>>>> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
>>>> index 02190e90c7ae..daa50a153737 100644
>>>> --- a/arch/powerpc/kernel/prom_init.c
>>>> +++ b/arch/powerpc/kernel/prom_init.c
>>>> @@ -2498,6 +2498,31 @@ static void __init flatten_device_tree(void)
>>>>  		prom_panic("Can't allocate initial device-tree chunk\n");
>>>>  	mem_end = mem_start + room;
>>>>  
>>>> +	if (!call_prom_ret("fdt-fetch", 2, 1, NULL, mem_start,
>>>> +			   room - sizeof(mem_reserve_map))) {
>>>> +		u32 size;
>>>> +
>>>> +		hdr = (void *) mem_start;
>>>> +
>>>> +		/* Fixup the boot cpuid */
>>>> +		hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
>>>
>>> If SLOF is generating a tree it really should get this header field
>>> right as well.
>>
>>
>> Ah, I did not realize it is just a phandle from /chosen/cpu. Will
>> fix.
> 
> It's not a phandle.  It's just the "address" (i.e. reg value) of the
> boot cpu.


Well, it is "reg" of a CPU with phandle==/chosen/cpu so my fdt code needs
to look there to pick the right "reg" rather than just plain 0. I'll fix
this but in general can it possibly be not a zero in QEMU/SLOF?


> 
>>>> +		/* Append the reserved map to the end of the blob */
>>>> +		hdr->off_mem_rsvmap = hdr->totalsize;
>>>> +		size = be32_to_cpu(hdr->totalsize);
>>>> +		rsvmap = (void *) hdr + size;
>>>> +		hdr->totalsize = cpu_to_be32(size + sizeof(mem_reserve_map));
>>>> +		memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
>>>
>>> .. and the reserve map for that matter.  I don't really understand
>>> what you're doing here. 
>>
>> ? Get the blob, increase the FDT size by sizeof(mem_reserve_map), fix up
>> totalsize and off_mem_rsvmap, copy mem_reserve_map to the end of the blob
>> (the actual order is slightly different, may be a bit confusing).
> 
> Right.. but where is mem_reserve_map coming from, if it hasn't come
> from an FDT?
> 
>> Asking SLOF to reserve the space seems to be unnecessary complication of
>> the interface - SLOF does not provide any reserved memory records.
> 
> Ah.. right, the reservations are coming from the pre-prom kernel, not
> from the firmware itself.  Yeah, that makes sense.  Ok, this makes
> sense then...


Right, the reservations are added via reserve_mem() in
arch/powerpc/kernel/prom_init.c

> 
>>> Note also that the reserve map is required to
>>> be 8-byte aligned, which totalsize might not be.
>>
>> Ah, good point.
> 
> ..at least with that fixed and maybe some comments to make what's
> gonig on clearer.

>>
>>
>>>
>>>> +		/* Store the DT address */
>>>> +		dt_header_start = mem_start;
>>>> +
>>>> +#ifdef DEBUG_PROM
>>>> +		prom_printf("Fetched DTB: %d bytes to @%x\n", size, mem_start);
>>>> +#endif
>>>> +		goto print_exit;
>>>> +	}
>>>> +
>>>>  	/* Get root of tree */
>>>>  	root = call_prom("peer", 1, 1, (phandle)0);
>>>>  	if (root == (phandle)0)
>>>> @@ -2548,6 +2573,7 @@ static void __init flatten_device_tree(void)
>>>>  	/* Copy the reserve map in */
>>>>  	memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
>>>>  
>>>> +print_exit:
>>>>  #ifdef DEBUG_PROM
>>>>  	{
>>>>  		int i;
>>>
>>
>>
> 
> 
> 
> 


-- 
Alexey


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 839 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware
  2017-10-16  7:07       ` Alexey Kardashevskiy
@ 2017-10-16  9:05         ` David Gibson
  2017-10-16 10:20         ` Segher Boessenkool
  1 sibling, 0 replies; 9+ messages in thread
From: David Gibson @ 2017-10-16  9:05 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: linuxppc-dev, Benjamin Herrenschmidt, Paul Mackerras, Greg Kurz,
	Balbir Singh, Thomas Huth, Nikunj A Dadhania, slof

[-- Attachment #1: Type: text/plain, Size: 3019 bytes --]

On Mon, Oct 16, 2017 at 06:07:06PM +1100, Alexey Kardashevskiy wrote:
> On 16/10/17 17:46, David Gibson wrote:
> > On Mon, Oct 16, 2017 at 05:22:55PM +1100, Alexey Kardashevskiy wrote:
> >> On 16/10/17 17:11, David Gibson wrote:
> >>> On Mon, Oct 16, 2017 at 04:49:17PM +1100, Alexey Kardashevskiy wrote:
> >>>> At the moment, on 256CPU + 256 PCI devices guest, it takes the guest
> >>>> about 8.5sec to read the entire device tree. Some explanation can be
> >>>> found here: https://patchwork.ozlabs.org/patch/826124/ but mostly it is
> >>>> because the kernel traverses the tree twice and it calls "getprop" for
> >>>> each properly which is really SLOF as it searches from the linked list
> >>>> beginning every time.
> >>>>
> >>>> Since SLOF has just learned to build FDT and this takes less than 0.5sec
> >>>> for such a big guest, this makes use of the proposed client interface
> >>>> method - "fdt-fetch".
> >>>>
> >>>> If "fdt-fetch" is not available, the old method is used.
> >>>>
> >>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>>
> >>> I like the concept, few details though..
> >>>
> >>>> ---
> >>>>  arch/powerpc/kernel/prom_init.c | 26 ++++++++++++++++++++++++++
> >>>>  1 file changed, 26 insertions(+)
> >>>>
> >>>> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> >>>> index 02190e90c7ae..daa50a153737 100644
> >>>> --- a/arch/powerpc/kernel/prom_init.c
> >>>> +++ b/arch/powerpc/kernel/prom_init.c
> >>>> @@ -2498,6 +2498,31 @@ static void __init flatten_device_tree(void)
> >>>>  		prom_panic("Can't allocate initial device-tree chunk\n");
> >>>>  	mem_end = mem_start + room;
> >>>>  
> >>>> +	if (!call_prom_ret("fdt-fetch", 2, 1, NULL, mem_start,
> >>>> +			   room - sizeof(mem_reserve_map))) {
> >>>> +		u32 size;
> >>>> +
> >>>> +		hdr = (void *) mem_start;
> >>>> +
> >>>> +		/* Fixup the boot cpuid */
> >>>> +		hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
> >>>
> >>> If SLOF is generating a tree it really should get this header field
> >>> right as well.
> >>
> >>
> >> Ah, I did not realize it is just a phandle from /chosen/cpu. Will
> >> fix.
> > 
> > It's not a phandle.  It's just the "address" (i.e. reg value) of the
> > boot cpu.
> 
> 
> Well, it is "reg" of a CPU with phandle==/chosen/cpu so my fdt code needs
> to look there to pick the right "reg" rather than just plain 0.

Ah, right, I see what you mean.

> I'll fix
> this but in general can it possibly be not a zero in QEMU/SLOF?

Erm.. probably not, but I'm not totally certain what could happen if
you tried creating all your cpu cores explicitly with -device instead
of just using -smp.

I think it's safer to look it up in SLOF, so that it won't break if we
change how cpu addresses are assigned in qemu.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware
  2017-10-16  7:07       ` Alexey Kardashevskiy
  2017-10-16  9:05         ` David Gibson
@ 2017-10-16 10:20         ` Segher Boessenkool
  2017-10-16 11:08           ` Alexey Kardashevskiy
  1 sibling, 1 reply; 9+ messages in thread
From: Segher Boessenkool @ 2017-10-16 10:20 UTC (permalink / raw)
  To: Alexey Kardashevskiy
  Cc: David Gibson, Thomas Huth, Nikunj A Dadhania, Greg Kurz, slof,
	Paul Mackerras, linuxppc-dev

On Mon, Oct 16, 2017 at 06:07:06PM +1100, Alexey Kardashevskiy wrote:
> On 16/10/17 17:46, David Gibson wrote:

> >>>> +		/* Fixup the boot cpuid */
> >>>> +		hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
> >>>
> >>> If SLOF is generating a tree it really should get this header field
> >>> right as well.
> >>
> >>
> >> Ah, I did not realize it is just a phandle from /chosen/cpu. Will
> >> fix.
> > 
> > It's not a phandle.  It's just the "address" (i.e. reg value) of the
> > boot cpu.
> 
> Well, it is "reg" of a CPU with phandle==/chosen/cpu so my fdt code needs
> to look there to pick the right "reg" rather than just plain 0. I'll fix
> this but in general can it possibly be not a zero in QEMU/SLOF?

/chosen/cpu is an ihandle, not a phandle.  Most (if not all) references
in /chosen are.


Segher

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware
  2017-10-16 10:20         ` Segher Boessenkool
@ 2017-10-16 11:08           ` Alexey Kardashevskiy
  0 siblings, 0 replies; 9+ messages in thread
From: Alexey Kardashevskiy @ 2017-10-16 11:08 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: David Gibson, Thomas Huth, Nikunj A Dadhania, Greg Kurz, slof,
	Paul Mackerras, linuxppc-dev

On 16/10/17 21:20, Segher Boessenkool wrote:
> On Mon, Oct 16, 2017 at 06:07:06PM +1100, Alexey Kardashevskiy wrote:
>> On 16/10/17 17:46, David Gibson wrote:
> 
>>>>>> +		/* Fixup the boot cpuid */
>>>>>> +		hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
>>>>>
>>>>> If SLOF is generating a tree it really should get this header field
>>>>> right as well.
>>>>
>>>>
>>>> Ah, I did not realize it is just a phandle from /chosen/cpu. Will
>>>> fix.
>>>
>>> It's not a phandle.  It's just the "address" (i.e. reg value) of the
>>> boot cpu.
>>
>> Well, it is "reg" of a CPU with phandle==/chosen/cpu so my fdt code needs
>> to look there to pick the right "reg" rather than just plain 0. I'll fix
>> this but in general can it possibly be not a zero in QEMU/SLOF?
> 
> /chosen/cpu is an ihandle, not a phandle. 

Sure, that is what my proposed fdt-boot-cpu  does already.

> Most (if not all) references
> in /chosen are.





-- 
Alexey

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware
  2017-10-16  5:49 [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware Alexey Kardashevskiy
  2017-10-16  6:11 ` David Gibson
@ 2017-10-16 11:59 ` Michael Ellerman
  1 sibling, 0 replies; 9+ messages in thread
From: Michael Ellerman @ 2017-10-16 11:59 UTC (permalink / raw)
  To: Alexey Kardashevskiy, linuxppc-dev
  Cc: Thomas Huth, Nikunj A Dadhania, Alexey Kardashevskiy, Greg Kurz,
	slof, Paul Mackerras, David Gibson

Alexey Kardashevskiy <aik@ozlabs.ru> writes:

> At the moment, on 256CPU + 256 PCI devices guest, it takes the guest
> about 8.5sec to read the entire device tree. Some explanation can be
> found here: https://patchwork.ozlabs.org/patch/826124/ but mostly it is
> because the kernel traverses the tree twice and it calls "getprop" for
> each properly which is really SLOF as it searches from the linked list
> beginning every time.
>
> Since SLOF has just learned to build FDT and this takes less than 0.5sec
> for such a big guest, this makes use of the proposed client interface
> method - "fdt-fetch".

It's a pity doing it the normal way is so slow, but this seems like a
reasonable idea anyway.

> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> index 02190e90c7ae..daa50a153737 100644
> --- a/arch/powerpc/kernel/prom_init.c
> +++ b/arch/powerpc/kernel/prom_init.c
> @@ -2498,6 +2498,31 @@ static void __init flatten_device_tree(void)
>  		prom_panic("Can't allocate initial device-tree chunk\n");
>  	mem_end = mem_start + room;
  
I'd prefer you didn't munge it inside flatten_device_tree(), rather
create a wrapper that does ~=:

void get_flat_devicetree(void)
{
	if (!fetch_flat_devicetree())
        	flatten_device_tree();

	printf(...)
}

> +	if (!call_prom_ret("fdt-fetch", 2, 1, NULL, mem_start,
> +			   room - sizeof(mem_reserve_map))) {
> +		u32 size;
> +
> +		hdr = (void *) mem_start;
> +
> +		/* Fixup the boot cpuid */
> +		hdr->boot_cpuid_phys = cpu_to_be32(prom.cpu);
> +
> +		/* Append the reserved map to the end of the blob */
> +		hdr->off_mem_rsvmap = hdr->totalsize;
> +		size = be32_to_cpu(hdr->totalsize);
> +		rsvmap = (void *) hdr + size;
> +		hdr->totalsize = cpu_to_be32(size + sizeof(mem_reserve_map));
> +		memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
> +
> +		/* Store the DT address */
> +		dt_header_start = mem_start;
> +
> +#ifdef DEBUG_PROM
> +		prom_printf("Fetched DTB: %d bytes to @%x\n", size, mem_start);
> +#endif

I think that should actually not be under DEBUG_PROM. The origin of the
FDT is fairly crucial information, so I think we can tolerate an extra
line of output to know that.

> +		goto print_exit;

This was the clue that it should be in a separate function :)

cheers

> +	}
> +
>  	/* Get root of tree */
>  	root = call_prom("peer", 1, 1, (phandle)0);
>  	if (root == (phandle)0)
> @@ -2548,6 +2573,7 @@ static void __init flatten_device_tree(void)
>  	/* Copy the reserve map in */
>  	memcpy(rsvmap, mem_reserve_map, sizeof(mem_reserve_map));
>  
> +print_exit:
>  #ifdef DEBUG_PROM
>  	{
>  		int i;
> -- 
> 2.11.0

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-10-16 11:59 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-16  5:49 [PATCH kernel] RFC: prom_init: Fetch flatten device tree from the system firmware Alexey Kardashevskiy
2017-10-16  6:11 ` David Gibson
2017-10-16  6:22   ` Alexey Kardashevskiy
2017-10-16  6:46     ` David Gibson
2017-10-16  7:07       ` Alexey Kardashevskiy
2017-10-16  9:05         ` David Gibson
2017-10-16 10:20         ` Segher Boessenkool
2017-10-16 11:08           ` Alexey Kardashevskiy
2017-10-16 11:59 ` Michael Ellerman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.