All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] libfdt: Remove special handling for unaligned reads
@ 2020-01-17 15:31 Tom Rini
       [not found] ` <20200117153106.29909-1-trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Tom Rini @ 2020-01-17 15:31 UTC (permalink / raw)
  To: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA
  Cc: Patrice CHOTARD, Patrick DELAUNAY

6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
changes to support unaligned reads for ARM platforms and 11738cf01f15
"libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
performance of these helpers.

Ultimately however, these helpers should not exist.  Unaligned access
only occurs when images are forced out of alignment by the user.  This
unalignment is not supported and introduces problems later on as other
parts of the system image are unaligned and they too require alignment.

Revert both of these changes.

Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
---
By way of a little more explanation, looking at the archives it seems
that the initial bug reporter said that they had a platform that was
using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
What that does is to tell U-Boot to not do any of the sanity checks and
relocation to ensure alignment that it would normally do because the
user knows best.  This later came up on the U-Boot list as once the DTB
was loaded, Linux is unhappy because it demands correct alignment.

I only realized libfdt had introduced changes here when it was reported
that boot time had gotten much slower once we merged this change in.  It
would be best to just drop it.
---
 fdtget.c        |  2 +-
 libfdt/fdt_ro.c | 18 +++++++++---------
 libfdt/libfdt.h | 33 +--------------------------------
 3 files changed, 11 insertions(+), 42 deletions(-)

diff --git a/fdtget.c b/fdtget.c
index 777582e2d45f..7cee28718cbc 100644
--- a/fdtget.c
+++ b/fdtget.c
@@ -62,7 +62,7 @@ static int show_cell_list(struct display_info *disp, const char *data, int len,
 	for (i = 0; i < len; i += size, p += size) {
 		if (i)
 			printf(" ");
-		value = size == 4 ? fdt32_ld((const fdt32_t *)p) :
+		value = size == 4 ? fdt32_to_cpu(*(const fdt32_t *)p) :
 			size == 2 ? (*p << 8) | p[1] : *p;
 		printf(fmt, value);
 	}
diff --git a/libfdt/fdt_ro.c b/libfdt/fdt_ro.c
index a5c2797cde65..01b4b8579a00 100644
--- a/libfdt/fdt_ro.c
+++ b/libfdt/fdt_ro.c
@@ -167,8 +167,8 @@ int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size)
 	if (!re)
 		return -FDT_ERR_BADOFFSET;
 
-	*address = fdt64_ld(&re->address);
-	*size = fdt64_ld(&re->size);
+	*address = fdt64_to_cpu(re->address);
+	*size = fdt64_to_cpu(re->size);
 	return 0;
 }
 
@@ -178,7 +178,7 @@ int fdt_num_mem_rsv(const void *fdt)
 	const struct fdt_reserve_entry *re;
 
 	for (i = 0; (re = fdt_mem_rsv(fdt, i)) != NULL; i++) {
-		if (fdt64_ld(&re->size) == 0)
+		if (fdt64_to_cpu(re->size) == 0)
 			return i;
 	}
 	return -FDT_ERR_TRUNCATED;
@@ -355,7 +355,7 @@ static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt,
 	prop = fdt_offset_ptr_(fdt, offset);
 
 	if (lenp)
-		*lenp = fdt32_ld(&prop->len);
+		*lenp = fdt32_to_cpu(prop->len);
 
 	return prop;
 }
@@ -392,7 +392,7 @@ static const struct fdt_property *fdt_get_property_namelen_(const void *fdt,
 			offset = -FDT_ERR_INTERNAL;
 			break;
 		}
-		if (fdt_string_eq_(fdt, fdt32_ld(&prop->nameoff),
+		if (fdt_string_eq_(fdt, fdt32_to_cpu(prop->nameoff),
 				   name, namelen)) {
 			if (poffset)
 				*poffset = offset;
@@ -445,7 +445,7 @@ const void *fdt_getprop_namelen(const void *fdt, int nodeoffset,
 
 	/* Handle realignment */
 	if (fdt_version(fdt) < 0x10 && (poffset + sizeof(*prop)) % 8 &&
-	    fdt32_ld(&prop->len) >= 8)
+	    fdt32_to_cpu(prop->len) >= 8)
 		return prop->data + 4;
 	return prop->data;
 }
@@ -461,7 +461,7 @@ const void *fdt_getprop_by_offset(const void *fdt, int offset,
 	if (namep) {
 		const char *name;
 		int namelen;
-		name = fdt_get_string(fdt, fdt32_ld(&prop->nameoff),
+		name = fdt_get_string(fdt, fdt32_to_cpu(prop->nameoff),
 				      &namelen);
 		if (!name) {
 			if (lenp)
@@ -473,7 +473,7 @@ const void *fdt_getprop_by_offset(const void *fdt, int offset,
 
 	/* Handle realignment */
 	if (fdt_version(fdt) < 0x10 && (offset + sizeof(*prop)) % 8 &&
-	    fdt32_ld(&prop->len) >= 8)
+	    fdt32_to_cpu(prop->len) >= 8)
 		return prop->data + 4;
 	return prop->data;
 }
@@ -498,7 +498,7 @@ uint32_t fdt_get_phandle(const void *fdt, int nodeoffset)
 			return 0;
 	}
 
-	return fdt32_ld(php);
+	return fdt32_to_cpu(*php);
 }
 
 const char *fdt_get_alias_namelen(const void *fdt,
diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
index fc4c4962a01c..d4ebe915cf46 100644
--- a/libfdt/libfdt.h
+++ b/libfdt/libfdt.h
@@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
 
 uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
 
-/*
- * Alignment helpers:
- *     These helpers access words from a device tree blob.  They're
- *     built to work even with unaligned pointers on platforms (ike
- *     ARM) that don't like unaligned loads and stores
- */
-
-static inline uint32_t fdt32_ld(const fdt32_t *p)
-{
-	const uint8_t *bp = (const uint8_t *)p;
-
-	return ((uint32_t)bp[0] << 24)
-		| ((uint32_t)bp[1] << 16)
-		| ((uint32_t)bp[2] << 8)
-		| bp[3];
-}
-
 static inline void fdt32_st(void *property, uint32_t value)
 {
 	uint8_t *bp = (uint8_t *)property;
@@ -144,20 +127,6 @@ static inline void fdt32_st(void *property, uint32_t value)
 	bp[3] = value & 0xff;
 }
 
-static inline uint64_t fdt64_ld(const fdt64_t *p)
-{
-	const uint8_t *bp = (const uint8_t *)p;
-
-	return ((uint64_t)bp[0] << 56)
-		| ((uint64_t)bp[1] << 48)
-		| ((uint64_t)bp[2] << 40)
-		| ((uint64_t)bp[3] << 32)
-		| ((uint64_t)bp[4] << 24)
-		| ((uint64_t)bp[5] << 16)
-		| ((uint64_t)bp[6] << 8)
-		| bp[7];
-}
-
 static inline void fdt64_st(void *property, uint64_t value)
 {
 	uint8_t *bp = (uint8_t *)property;
@@ -232,7 +201,7 @@ int fdt_next_subnode(const void *fdt, int offset);
 /* General functions                                                  */
 /**********************************************************************/
 #define fdt_get_header(fdt, field) \
-	(fdt32_ld(&((const struct fdt_header *)(fdt))->field))
+	(fdt32_to_cpu(((const struct fdt_header *)(fdt))->field))
 #define fdt_magic(fdt)			(fdt_get_header(fdt, magic))
 #define fdt_totalsize(fdt)		(fdt_get_header(fdt, totalsize))
 #define fdt_off_dt_struct(fdt)		(fdt_get_header(fdt, off_dt_struct))
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found] ` <20200117153106.29909-1-trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
@ 2020-01-18  0:32   ` Simon Glass
  2020-01-23  9:14   ` David Gibson
  1 sibling, 0 replies; 26+ messages in thread
From: Simon Glass @ 2020-01-18  0:32 UTC (permalink / raw)
  To: Tom Rini; +Cc: Devicetree Compiler, Patrice CHOTARD, Patrick DELAUNAY

On Sat, 18 Jan 2020 at 04:31, Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
>
> 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> changes to support unaligned reads for ARM platforms and 11738cf01f15
> "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> performance of these helpers.
>
> Ultimately however, these helpers should not exist.  Unaligned access
> only occurs when images are forced out of alignment by the user.  This
> unalignment is not supported and introduces problems later on as other
> parts of the system image are unaligned and they too require alignment.
>
> Revert both of these changes.
>
> Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> ---
> By way of a little more explanation, looking at the archives it seems
> that the initial bug reporter said that they had a platform that was
> using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> What that does is to tell U-Boot to not do any of the sanity checks and
> relocation to ensure alignment that it would normally do because the
> user knows best.  This later came up on the U-Boot list as once the DTB
> was loaded, Linux is unhappy because it demands correct alignment.
>
> I only realized libfdt had introduced changes here when it was reported
> that boot time had gotten much slower once we merged this change in.  It
> would be best to just drop it.
> ---
>  fdtget.c        |  2 +-
>  libfdt/fdt_ro.c | 18 +++++++++---------
>  libfdt/libfdt.h | 33 +--------------------------------
>  3 files changed, 11 insertions(+), 42 deletions(-)

Reviewed-by: Simon Glass <sjg-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>

(I note this commit had no test coverage anyway)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found] ` <20200117153106.29909-1-trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
  2020-01-18  0:32   ` Simon Glass
@ 2020-01-23  9:14   ` David Gibson
       [not found]     ` <20200123091440.GQ2347-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
  1 sibling, 1 reply; 26+ messages in thread
From: David Gibson @ 2020-01-23  9:14 UTC (permalink / raw)
  To: Tom Rini
  Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA, Patrice CHOTARD,
	Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 3340 bytes --]

On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> changes to support unaligned reads for ARM platforms and 11738cf01f15
> "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> performance of these helpers.
> 
> Ultimately however, these helpers should not exist.  Unaligned access
> only occurs when images are forced out of alignment by the user.  This
> unalignment is not supported and introduces problems later on as other
> parts of the system image are unaligned and they too require alignment.
> 
> Revert both of these changes.
> 
> Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> ---
> By way of a little more explanation, looking at the archives it seems
> that the initial bug reporter said that they had a platform that was
> using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> What that does is to tell U-Boot to not do any of the sanity checks and
> relocation to ensure alignment that it would normally do because the
> user knows best.  This later came up on the U-Boot list as once the DTB
> was loaded, Linux is unhappy because it demands correct alignment.
> 
> I only realized libfdt had introduced changes here when it was reported
> that boot time had gotten much slower once we merged this change in.  It
> would be best to just drop it.

Hmm.  I'm not sure about this.  The commit message makes a case for
why the unaligned handling isn't necessary, but not a case for why
it's bad.  Even if handling an unaligned tree isn't a common case,
isn't it better to be able to than not?

I gather from the previous discussion that there's a significant
performance impact, but that rationale needs to go into the commit
message for posterity.

[snip]
> diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> index fc4c4962a01c..d4ebe915cf46 100644
> --- a/libfdt/libfdt.h
> +++ b/libfdt/libfdt.h
> @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
>  
>  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
>  
> -/*
> - * Alignment helpers:
> - *     These helpers access words from a device tree blob.  They're
> - *     built to work even with unaligned pointers on platforms (ike
> - *     ARM) that don't like unaligned loads and stores
> - */
> -
> -static inline uint32_t fdt32_ld(const fdt32_t *p)
> -{
> -	const uint8_t *bp = (const uint8_t *)p;
> -
> -	return ((uint32_t)bp[0] << 24)
> -		| ((uint32_t)bp[1] << 16)
> -		| ((uint32_t)bp[2] << 8)
> -		| bp[3];
> -}

In particular, I definitely think removing the helpers entirely is a
no go.  They're now part of the published interface of the library.
Even if they're not used for reading the internal tags, they can be
used to load integers from within particular properties.  Those are
frequently unaligned, since properties generally have packed
representations.

How much of the performance loss would we get back if we put an actual
conditional on an aligned address in the helpers?

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]     ` <20200123091440.GQ2347-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
@ 2020-01-23 12:16       ` Tom Rini
  2020-01-27  3:23         ` David Gibson
  0 siblings, 1 reply; 26+ messages in thread
From: Tom Rini @ 2020-01-23 12:16 UTC (permalink / raw)
  To: David Gibson
  Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA, Patrice CHOTARD,
	Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 4234 bytes --]

On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > performance of these helpers.
> > 
> > Ultimately however, these helpers should not exist.  Unaligned access
> > only occurs when images are forced out of alignment by the user.  This
> > unalignment is not supported and introduces problems later on as other
> > parts of the system image are unaligned and they too require alignment.
> > 
> > Revert both of these changes.
> > 
> > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > ---
> > By way of a little more explanation, looking at the archives it seems
> > that the initial bug reporter said that they had a platform that was
> > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > What that does is to tell U-Boot to not do any of the sanity checks and
> > relocation to ensure alignment that it would normally do because the
> > user knows best.  This later came up on the U-Boot list as once the DTB
> > was loaded, Linux is unhappy because it demands correct alignment.
> > 
> > I only realized libfdt had introduced changes here when it was reported
> > that boot time had gotten much slower once we merged this change in.  It
> > would be best to just drop it.
> 
> Hmm.  I'm not sure about this.  The commit message makes a case for
> why the unaligned handling isn't necessary, but not a case for why
> it's bad.  Even if handling an unaligned tree isn't a common case,
> isn't it better to be able to than not?
> 
> I gather from the previous discussion that there's a significant
> performance impact, but that rationale needs to go into the commit
> message for posterity.

I wanted to emphasize that the code simply isn't ever needed, not that
it's a performance problem.  A performance problem implies that we would
keep this, if it was fast enough.  That's why people noticed it (it
slows things down to an unusable level).  But it's functionally wrong.

> [snip]
> > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > index fc4c4962a01c..d4ebe915cf46 100644
> > --- a/libfdt/libfdt.h
> > +++ b/libfdt/libfdt.h
> > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> >  
> >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> >  
> > -/*
> > - * Alignment helpers:
> > - *     These helpers access words from a device tree blob.  They're
> > - *     built to work even with unaligned pointers on platforms (ike
> > - *     ARM) that don't like unaligned loads and stores
> > - */
> > -
> > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > -{
> > -	const uint8_t *bp = (const uint8_t *)p;
> > -
> > -	return ((uint32_t)bp[0] << 24)
> > -		| ((uint32_t)bp[1] << 16)
> > -		| ((uint32_t)bp[2] << 8)
> > -		| bp[3];
> > -}
> 
> In particular, I definitely think removing the helpers entirely is a
> no go.  They're now part of the published interface of the library.

Perhaps "mistakes were made" ?  I don't think we need to worry about
removing an interface here as projects are avoiding upgrading libfdt
now (TF-A, formerly ATF) or reverting the change (U-Boot).

> Even if they're not used for reading the internal tags, they can be
> used to load integers from within particular properties.  Those are
> frequently unaligned, since properties generally have packed
> representations.

I don't see the relevance.  Go back to the initial problem report.  It's
not "I have a new unique platform I'm using libfdt on and I have
problems".  It's "I keep jabbing myself with a rusty nail and now I have
problems".

> How much of the performance loss would we get back if we put an actual
> conditional on an aligned address in the helpers?

I don't know but to be very clear, we don't ever need these on ARM.
Keep in mind we've been using device trees on ARM for over 10 years at
this point.

-- 
Tom

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
  2020-01-23 12:16       ` Tom Rini
@ 2020-01-27  3:23         ` David Gibson
       [not found]           ` <20200127032351.GA1818-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: David Gibson @ 2020-01-27  3:23 UTC (permalink / raw)
  To: Tom Rini
  Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA, Patrice CHOTARD,
	Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 5926 bytes --]

On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > > performance of these helpers.
> > > 
> > > Ultimately however, these helpers should not exist.  Unaligned access
> > > only occurs when images are forced out of alignment by the user.  This
> > > unalignment is not supported and introduces problems later on as other
> > > parts of the system image are unaligned and they too require alignment.
> > > 
> > > Revert both of these changes.
> > > 
> > > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > > ---
> > > By way of a little more explanation, looking at the archives it seems
> > > that the initial bug reporter said that they had a platform that was
> > > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > > What that does is to tell U-Boot to not do any of the sanity checks and
> > > relocation to ensure alignment that it would normally do because the
> > > user knows best.  This later came up on the U-Boot list as once the DTB
> > > was loaded, Linux is unhappy because it demands correct alignment.
> > > 
> > > I only realized libfdt had introduced changes here when it was reported
> > > that boot time had gotten much slower once we merged this change in.  It
> > > would be best to just drop it.
> > 
> > Hmm.  I'm not sure about this.  The commit message makes a case for
> > why the unaligned handling isn't necessary, but not a case for why
> > it's bad.  Even if handling an unaligned tree isn't a common case,
> > isn't it better to be able to than not?
> > 
> > I gather from the previous discussion that there's a significant
> > performance impact, but that rationale needs to go into the commit
> > message for posterity.
> 
> I wanted to emphasize that the code simply isn't ever needed, not that
> it's a performance problem.  A performance problem implies that we would
> keep this, if it was fast enough.  That's why people noticed it (it
> slows things down to an unusable level).  But it's functionally wrong.
> 
> > [snip]
> > > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > > index fc4c4962a01c..d4ebe915cf46 100644
> > > --- a/libfdt/libfdt.h
> > > +++ b/libfdt/libfdt.h
> > > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > >  
> > >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > >  
> > > -/*
> > > - * Alignment helpers:
> > > - *     These helpers access words from a device tree blob.  They're
> > > - *     built to work even with unaligned pointers on platforms (ike
> > > - *     ARM) that don't like unaligned loads and stores
> > > - */
> > > -
> > > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > -{
> > > -	const uint8_t *bp = (const uint8_t *)p;
> > > -
> > > -	return ((uint32_t)bp[0] << 24)
> > > -		| ((uint32_t)bp[1] << 16)
> > > -		| ((uint32_t)bp[2] << 8)
> > > -		| bp[3];
> > > -}
> > 
> > In particular, I definitely think removing the helpers entirely is a
> > no go.  They're now part of the published interface of the library.
> 
> Perhaps "mistakes were made" ?  I don't think we need to worry about
> removing an interface here as projects are avoiding upgrading libfdt
> now (TF-A, formerly ATF) or reverting the change (U-Boot).
> 
> > Even if they're not used for reading the internal tags, they can be
> > used to load integers from within particular properties.  Those are
> > frequently unaligned, since properties generally have packed
> > representations.
> 
> I don't see the relevance.  Go back to the initial problem report.  It's
> not "I have a new unique platform I'm using libfdt on and I have
> problems".  It's "I keep jabbing myself with a rusty nail and now I have
> problems".

The initial report isn't the only relevant thing here.  Although it
prompted the change, it wasn't the only consideration in making it.

There's also two separate questions here:
  1) Do we want byteswapping integer load helpers?
  2) Should those handle unaligned accesses?

In working out how to address the (as it turns out, non existent)
problem, I realized an abstraction for loading big-endian integers
from the blob would be a useful thing in its own right.  I also
realized that it's a useful thing not just inside the libfdt code, but
as an external interface.  Callers have always needed to interpret the
contents of individual dt properties, and loading integers from them
is often a part of that.

I know of people using those fdtXX_ld() helpers right now, so I'm not
going to take them away.

For the case of external users we absolutely do need to handle
unaligned accesses.  There are a number of existing bindings that mix
strings and integers in packed format, in a single encoded property.
So regardless of the alignment of the whole property, we can easily
get unaligned integers in there, and I don't want to expose multiple
different helpers for different cases.

Now, we don't *have* to use the same helpers for internal use.  We
could open code the internal loads, or use a special aligned-only
version inside.  But using the existing external helpers is the
obvious and simple choice.

So, we're back to: I need a case for changing this now, not just a
case for claiming it wasn't needed in the first place.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]           ` <20200127032351.GA1818-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
@ 2020-01-27 15:04             ` Tom Rini
  2020-01-27 19:18               ` Simon Glass
  2020-01-28  8:59               ` David Gibson
  0 siblings, 2 replies; 26+ messages in thread
From: Tom Rini @ 2020-01-27 15:04 UTC (permalink / raw)
  To: David Gibson
  Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA, Patrice CHOTARD,
	Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 6693 bytes --]

On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > > On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > > > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > > > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > > > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > > > performance of these helpers.
> > > > 
> > > > Ultimately however, these helpers should not exist.  Unaligned access
> > > > only occurs when images are forced out of alignment by the user.  This
> > > > unalignment is not supported and introduces problems later on as other
> > > > parts of the system image are unaligned and they too require alignment.
> > > > 
> > > > Revert both of these changes.
> > > > 
> > > > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > > > ---
> > > > By way of a little more explanation, looking at the archives it seems
> > > > that the initial bug reporter said that they had a platform that was
> > > > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > > > What that does is to tell U-Boot to not do any of the sanity checks and
> > > > relocation to ensure alignment that it would normally do because the
> > > > user knows best.  This later came up on the U-Boot list as once the DTB
> > > > was loaded, Linux is unhappy because it demands correct alignment.
> > > > 
> > > > I only realized libfdt had introduced changes here when it was reported
> > > > that boot time had gotten much slower once we merged this change in.  It
> > > > would be best to just drop it.
> > > 
> > > Hmm.  I'm not sure about this.  The commit message makes a case for
> > > why the unaligned handling isn't necessary, but not a case for why
> > > it's bad.  Even if handling an unaligned tree isn't a common case,
> > > isn't it better to be able to than not?
> > > 
> > > I gather from the previous discussion that there's a significant
> > > performance impact, but that rationale needs to go into the commit
> > > message for posterity.
> > 
> > I wanted to emphasize that the code simply isn't ever needed, not that
> > it's a performance problem.  A performance problem implies that we would
> > keep this, if it was fast enough.  That's why people noticed it (it
> > slows things down to an unusable level).  But it's functionally wrong.
> > 
> > > [snip]
> > > > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > > > index fc4c4962a01c..d4ebe915cf46 100644
> > > > --- a/libfdt/libfdt.h
> > > > +++ b/libfdt/libfdt.h
> > > > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > > >  
> > > >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > > >  
> > > > -/*
> > > > - * Alignment helpers:
> > > > - *     These helpers access words from a device tree blob.  They're
> > > > - *     built to work even with unaligned pointers on platforms (ike
> > > > - *     ARM) that don't like unaligned loads and stores
> > > > - */
> > > > -
> > > > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > > -{
> > > > -	const uint8_t *bp = (const uint8_t *)p;
> > > > -
> > > > -	return ((uint32_t)bp[0] << 24)
> > > > -		| ((uint32_t)bp[1] << 16)
> > > > -		| ((uint32_t)bp[2] << 8)
> > > > -		| bp[3];
> > > > -}
> > > 
> > > In particular, I definitely think removing the helpers entirely is a
> > > no go.  They're now part of the published interface of the library.
> > 
> > Perhaps "mistakes were made" ?  I don't think we need to worry about
> > removing an interface here as projects are avoiding upgrading libfdt
> > now (TF-A, formerly ATF) or reverting the change (U-Boot).
> > 
> > > Even if they're not used for reading the internal tags, they can be
> > > used to load integers from within particular properties.  Those are
> > > frequently unaligned, since properties generally have packed
> > > representations.
> > 
> > I don't see the relevance.  Go back to the initial problem report.  It's
> > not "I have a new unique platform I'm using libfdt on and I have
> > problems".  It's "I keep jabbing myself with a rusty nail and now I have
> > problems".
> 
> The initial report isn't the only relevant thing here.  Although it
> prompted the change, it wasn't the only consideration in making it.
> 
> There's also two separate questions here:
>   1) Do we want byteswapping integer load helpers?
>   2) Should those handle unaligned accesses?
> 
> In working out how to address the (as it turns out, non existent)
> problem, I realized an abstraction for loading big-endian integers
> from the blob would be a useful thing in its own right.  I also
> realized that it's a useful thing not just inside the libfdt code, but
> as an external interface.  Callers have always needed to interpret the
> contents of individual dt properties, and loading integers from them
> is often a part of that.
> 
> I know of people using those fdtXX_ld() helpers right now, so I'm not
> going to take them away.
> 
> For the case of external users we absolutely do need to handle
> unaligned accesses.  There are a number of existing bindings that mix
> strings and integers in packed format, in a single encoded property.
> So regardless of the alignment of the whole property, we can easily
> get unaligned integers in there, and I don't want to expose multiple
> different helpers for different cases.
> 
> Now, we don't *have* to use the same helpers for internal use.  We
> could open code the internal loads, or use a special aligned-only
> version inside.  But using the existing external helpers is the
> obvious and simple choice.
> 
> So, we're back to: I need a case for changing this now, not just a
> case for claiming it wasn't needed in the first place.

For U-Boot, I'm just going to revert this part of the changes.  I would
suggest that you look in to some way to fix the "fast path" here to go
back to the previous way of working so that other project can continue
to use libfdt as well and when callers need to access these helpers and
are not otherwise in the fast path can do so.

You're adding visible boot time delay with things the way they exist
today.  That's not OK.  I wouldn't have updated U-Boot with these
changes if we had automated timing tests like the TF-A folks do.

Please do let us know if you have any changes to try that might
alleviate the overall problem.  Thanks!

-- 
Tom

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
  2020-01-27 15:04             ` Tom Rini
@ 2020-01-27 19:18               ` Simon Glass
  2020-01-28  8:59               ` David Gibson
  1 sibling, 0 replies; 26+ messages in thread
From: Simon Glass @ 2020-01-27 19:18 UTC (permalink / raw)
  To: Tom Rini
  Cc: David Gibson, Devicetree Compiler, Patrice CHOTARD, Patrick DELAUNAY

Hi Tom,

On Mon, 27 Jan 2020 at 08:05, Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
>
> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > > On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > > > On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > > > > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > > > > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > > > > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > > > > performance of these helpers.
> > > > >
> > > > > Ultimately however, these helpers should not exist.  Unaligned access
> > > > > only occurs when images are forced out of alignment by the user.  This
> > > > > unalignment is not supported and introduces problems later on as other
> > > > > parts of the system image are unaligned and they too require alignment.
> > > > >
> > > > > Revert both of these changes.
> > > > >
> > > > > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > > > > ---
> > > > > By way of a little more explanation, looking at the archives it seems
> > > > > that the initial bug reporter said that they had a platform that was
> > > > > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > > > > What that does is to tell U-Boot to not do any of the sanity checks and
> > > > > relocation to ensure alignment that it would normally do because the
> > > > > user knows best.  This later came up on the U-Boot list as once the DTB
> > > > > was loaded, Linux is unhappy because it demands correct alignment.
> > > > >
> > > > > I only realized libfdt had introduced changes here when it was reported
> > > > > that boot time had gotten much slower once we merged this change in.  It
> > > > > would be best to just drop it.
> > > >
> > > > Hmm.  I'm not sure about this.  The commit message makes a case for
> > > > why the unaligned handling isn't necessary, but not a case for why
> > > > it's bad.  Even if handling an unaligned tree isn't a common case,
> > > > isn't it better to be able to than not?
> > > >
> > > > I gather from the previous discussion that there's a significant
> > > > performance impact, but that rationale needs to go into the commit
> > > > message for posterity.
> > >
> > > I wanted to emphasize that the code simply isn't ever needed, not that
> > > it's a performance problem.  A performance problem implies that we would
> > > keep this, if it was fast enough.  That's why people noticed it (it
> > > slows things down to an unusable level).  But it's functionally wrong.
> > >
> > > > [snip]
> > > > > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > > > > index fc4c4962a01c..d4ebe915cf46 100644
> > > > > --- a/libfdt/libfdt.h
> > > > > +++ b/libfdt/libfdt.h
> > > > > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > > > >
> > > > >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > > > >
> > > > > -/*
> > > > > - * Alignment helpers:
> > > > > - *     These helpers access words from a device tree blob.  They're
> > > > > - *     built to work even with unaligned pointers on platforms (ike
> > > > > - *     ARM) that don't like unaligned loads and stores
> > > > > - */
> > > > > -
> > > > > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > > > -{
> > > > > -       const uint8_t *bp = (const uint8_t *)p;
> > > > > -
> > > > > -       return ((uint32_t)bp[0] << 24)
> > > > > -               | ((uint32_t)bp[1] << 16)
> > > > > -               | ((uint32_t)bp[2] << 8)
> > > > > -               | bp[3];
> > > > > -}
> > > >
> > > > In particular, I definitely think removing the helpers entirely is a
> > > > no go.  They're now part of the published interface of the library.
> > >
> > > Perhaps "mistakes were made" ?  I don't think we need to worry about
> > > removing an interface here as projects are avoiding upgrading libfdt
> > > now (TF-A, formerly ATF) or reverting the change (U-Boot).
> > >
> > > > Even if they're not used for reading the internal tags, they can be
> > > > used to load integers from within particular properties.  Those are
> > > > frequently unaligned, since properties generally have packed
> > > > representations.
> > >
> > > I don't see the relevance.  Go back to the initial problem report.  It's
> > > not "I have a new unique platform I'm using libfdt on and I have
> > > problems".  It's "I keep jabbing myself with a rusty nail and now I have
> > > problems".
> >
> > The initial report isn't the only relevant thing here.  Although it
> > prompted the change, it wasn't the only consideration in making it.
> >
> > There's also two separate questions here:
> >   1) Do we want byteswapping integer load helpers?
> >   2) Should those handle unaligned accesses?
> >
> > In working out how to address the (as it turns out, non existent)
> > problem, I realized an abstraction for loading big-endian integers
> > from the blob would be a useful thing in its own right.  I also
> > realized that it's a useful thing not just inside the libfdt code, but
> > as an external interface.  Callers have always needed to interpret the
> > contents of individual dt properties, and loading integers from them
> > is often a part of that.
> >
> > I know of people using those fdtXX_ld() helpers right now, so I'm not
> > going to take them away.
> >
> > For the case of external users we absolutely do need to handle
> > unaligned accesses.  There are a number of existing bindings that mix
> > strings and integers in packed format, in a single encoded property.
> > So regardless of the alignment of the whole property, we can easily
> > get unaligned integers in there, and I don't want to expose multiple
> > different helpers for different cases.
> >
> > Now, we don't *have* to use the same helpers for internal use.  We
> > could open code the internal loads, or use a special aligned-only
> > version inside.  But using the existing external helpers is the
> > obvious and simple choice.
> >
> > So, we're back to: I need a case for changing this now, not just a
> > case for claiming it wasn't needed in the first place.
>
> For U-Boot, I'm just going to revert this part of the changes.  I would
> suggest that you look in to some way to fix the "fast path" here to go
> back to the previous way of working so that other project can continue
> to use libfdt as well and when callers need to access these helpers and
> are not otherwise in the fast path can do so.
>
> You're adding visible boot time delay with things the way they exist
> today.  That's not OK.  I wouldn't have updated U-Boot with these
> changes if we had automated timing tests like the TF-A folks do.
>
> Please do let us know if you have any changes to try that might
> alleviate the overall problem.  Thanks!

For my part I think it is best to revert the change in U-Boot for now.
The code difference thus created is small. Hopefully it can be
reverted in libfdt, perhaps along the lines that David suggests.

Regards,
Simon

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
  2020-01-27 15:04             ` Tom Rini
  2020-01-27 19:18               ` Simon Glass
@ 2020-01-28  8:59               ` David Gibson
       [not found]                 ` <20200128085918.GO42099-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
  1 sibling, 1 reply; 26+ messages in thread
From: David Gibson @ 2020-01-28  8:59 UTC (permalink / raw)
  To: Tom Rini
  Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA, Patrice CHOTARD,
	Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 7759 bytes --]

On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > > On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > > > On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > > > > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > > > > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > > > > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > > > > performance of these helpers.
> > > > > 
> > > > > Ultimately however, these helpers should not exist.  Unaligned access
> > > > > only occurs when images are forced out of alignment by the user.  This
> > > > > unalignment is not supported and introduces problems later on as other
> > > > > parts of the system image are unaligned and they too require alignment.
> > > > > 
> > > > > Revert both of these changes.
> > > > > 
> > > > > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > > > > ---
> > > > > By way of a little more explanation, looking at the archives it seems
> > > > > that the initial bug reporter said that they had a platform that was
> > > > > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > > > > What that does is to tell U-Boot to not do any of the sanity checks and
> > > > > relocation to ensure alignment that it would normally do because the
> > > > > user knows best.  This later came up on the U-Boot list as once the DTB
> > > > > was loaded, Linux is unhappy because it demands correct alignment.
> > > > > 
> > > > > I only realized libfdt had introduced changes here when it was reported
> > > > > that boot time had gotten much slower once we merged this change in.  It
> > > > > would be best to just drop it.
> > > > 
> > > > Hmm.  I'm not sure about this.  The commit message makes a case for
> > > > why the unaligned handling isn't necessary, but not a case for why
> > > > it's bad.  Even if handling an unaligned tree isn't a common case,
> > > > isn't it better to be able to than not?
> > > > 
> > > > I gather from the previous discussion that there's a significant
> > > > performance impact, but that rationale needs to go into the commit
> > > > message for posterity.
> > > 
> > > I wanted to emphasize that the code simply isn't ever needed, not that
> > > it's a performance problem.  A performance problem implies that we would
> > > keep this, if it was fast enough.  That's why people noticed it (it
> > > slows things down to an unusable level).  But it's functionally wrong.
> > > 
> > > > [snip]
> > > > > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > > > > index fc4c4962a01c..d4ebe915cf46 100644
> > > > > --- a/libfdt/libfdt.h
> > > > > +++ b/libfdt/libfdt.h
> > > > > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > > > >  
> > > > >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > > > >  
> > > > > -/*
> > > > > - * Alignment helpers:
> > > > > - *     These helpers access words from a device tree blob.  They're
> > > > > - *     built to work even with unaligned pointers on platforms (ike
> > > > > - *     ARM) that don't like unaligned loads and stores
> > > > > - */
> > > > > -
> > > > > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > > > -{
> > > > > -	const uint8_t *bp = (const uint8_t *)p;
> > > > > -
> > > > > -	return ((uint32_t)bp[0] << 24)
> > > > > -		| ((uint32_t)bp[1] << 16)
> > > > > -		| ((uint32_t)bp[2] << 8)
> > > > > -		| bp[3];
> > > > > -}
> > > > 
> > > > In particular, I definitely think removing the helpers entirely is a
> > > > no go.  They're now part of the published interface of the library.
> > > 
> > > Perhaps "mistakes were made" ?  I don't think we need to worry about
> > > removing an interface here as projects are avoiding upgrading libfdt
> > > now (TF-A, formerly ATF) or reverting the change (U-Boot).
> > > 
> > > > Even if they're not used for reading the internal tags, they can be
> > > > used to load integers from within particular properties.  Those are
> > > > frequently unaligned, since properties generally have packed
> > > > representations.
> > > 
> > > I don't see the relevance.  Go back to the initial problem report.  It's
> > > not "I have a new unique platform I'm using libfdt on and I have
> > > problems".  It's "I keep jabbing myself with a rusty nail and now I have
> > > problems".
> > 
> > The initial report isn't the only relevant thing here.  Although it
> > prompted the change, it wasn't the only consideration in making it.
> > 
> > There's also two separate questions here:
> >   1) Do we want byteswapping integer load helpers?
> >   2) Should those handle unaligned accesses?
> > 
> > In working out how to address the (as it turns out, non existent)
> > problem, I realized an abstraction for loading big-endian integers
> > from the blob would be a useful thing in its own right.  I also
> > realized that it's a useful thing not just inside the libfdt code, but
> > as an external interface.  Callers have always needed to interpret the
> > contents of individual dt properties, and loading integers from them
> > is often a part of that.
> > 
> > I know of people using those fdtXX_ld() helpers right now, so I'm not
> > going to take them away.
> > 
> > For the case of external users we absolutely do need to handle
> > unaligned accesses.  There are a number of existing bindings that mix
> > strings and integers in packed format, in a single encoded property.
> > So regardless of the alignment of the whole property, we can easily
> > get unaligned integers in there, and I don't want to expose multiple
> > different helpers for different cases.
> > 
> > Now, we don't *have* to use the same helpers for internal use.  We
> > could open code the internal loads, or use a special aligned-only
> > version inside.  But using the existing external helpers is the
> > obvious and simple choice.
> > 
> > So, we're back to: I need a case for changing this now, not just a
> > case for claiming it wasn't needed in the first place.
> 
> For U-Boot, I'm just going to revert this part of the changes.  I would

That seems reasonable for the interim.

> suggest that you look in to some way to fix the "fast path" here to go
> back to the previous way of working so that other project can continue
> to use libfdt as well and when callers need to access these helpers and
> are not otherwise in the fast path can do so.
> 
> You're adding visible boot time delay with things the way they exist
> today.  That's not OK.

That's a fair point, but you need to make that case in the commit
message and merge request, not just expect me to find and collate the
arguments from other threads.

> I wouldn't have updated U-Boot with these
> changes if we had automated timing tests like the TF-A folks do.
> 
> Please do let us know if you have any changes to try that might
> alleviate the overall problem.  Thanks!

I finally got around to looking through Simon Glass's patches for
reducing libfdt code size with various "asusmption" flags.  I think
it's a good concept and close to being ready.

I don't think it's the only thing we want to do, but one thing we
could do is to add another ASSUME_ALIGNED flag that will simplify the
load helpers.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                 ` <20200128085918.GO42099-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
@ 2020-01-28 13:43                   ` Tom Rini
  2020-01-28 14:08                     ` Rob Herring
  2020-01-29  1:29                     ` David Gibson
  0 siblings, 2 replies; 26+ messages in thread
From: Tom Rini @ 2020-01-28 13:43 UTC (permalink / raw)
  To: David Gibson
  Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA, Patrice CHOTARD,
	Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 8742 bytes --]

On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> > On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > > On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > > > On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > > > > On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > > > > > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > > > > > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > > > > > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > > > > > performance of these helpers.
> > > > > > 
> > > > > > Ultimately however, these helpers should not exist.  Unaligned access
> > > > > > only occurs when images are forced out of alignment by the user.  This
> > > > > > unalignment is not supported and introduces problems later on as other
> > > > > > parts of the system image are unaligned and they too require alignment.
> > > > > > 
> > > > > > Revert both of these changes.
> > > > > > 
> > > > > > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > > > > > ---
> > > > > > By way of a little more explanation, looking at the archives it seems
> > > > > > that the initial bug reporter said that they had a platform that was
> > > > > > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > > > > > What that does is to tell U-Boot to not do any of the sanity checks and
> > > > > > relocation to ensure alignment that it would normally do because the
> > > > > > user knows best.  This later came up on the U-Boot list as once the DTB
> > > > > > was loaded, Linux is unhappy because it demands correct alignment.
> > > > > > 
> > > > > > I only realized libfdt had introduced changes here when it was reported
> > > > > > that boot time had gotten much slower once we merged this change in.  It
> > > > > > would be best to just drop it.
> > > > > 
> > > > > Hmm.  I'm not sure about this.  The commit message makes a case for
> > > > > why the unaligned handling isn't necessary, but not a case for why
> > > > > it's bad.  Even if handling an unaligned tree isn't a common case,
> > > > > isn't it better to be able to than not?
> > > > > 
> > > > > I gather from the previous discussion that there's a significant
> > > > > performance impact, but that rationale needs to go into the commit
> > > > > message for posterity.
> > > > 
> > > > I wanted to emphasize that the code simply isn't ever needed, not that
> > > > it's a performance problem.  A performance problem implies that we would
> > > > keep this, if it was fast enough.  That's why people noticed it (it
> > > > slows things down to an unusable level).  But it's functionally wrong.
> > > > 
> > > > > [snip]
> > > > > > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > > > > > index fc4c4962a01c..d4ebe915cf46 100644
> > > > > > --- a/libfdt/libfdt.h
> > > > > > +++ b/libfdt/libfdt.h
> > > > > > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > > > > >  
> > > > > >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > > > > >  
> > > > > > -/*
> > > > > > - * Alignment helpers:
> > > > > > - *     These helpers access words from a device tree blob.  They're
> > > > > > - *     built to work even with unaligned pointers on platforms (ike
> > > > > > - *     ARM) that don't like unaligned loads and stores
> > > > > > - */
> > > > > > -
> > > > > > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > > > > -{
> > > > > > -	const uint8_t *bp = (const uint8_t *)p;
> > > > > > -
> > > > > > -	return ((uint32_t)bp[0] << 24)
> > > > > > -		| ((uint32_t)bp[1] << 16)
> > > > > > -		| ((uint32_t)bp[2] << 8)
> > > > > > -		| bp[3];
> > > > > > -}
> > > > > 
> > > > > In particular, I definitely think removing the helpers entirely is a
> > > > > no go.  They're now part of the published interface of the library.
> > > > 
> > > > Perhaps "mistakes were made" ?  I don't think we need to worry about
> > > > removing an interface here as projects are avoiding upgrading libfdt
> > > > now (TF-A, formerly ATF) or reverting the change (U-Boot).
> > > > 
> > > > > Even if they're not used for reading the internal tags, they can be
> > > > > used to load integers from within particular properties.  Those are
> > > > > frequently unaligned, since properties generally have packed
> > > > > representations.
> > > > 
> > > > I don't see the relevance.  Go back to the initial problem report.  It's
> > > > not "I have a new unique platform I'm using libfdt on and I have
> > > > problems".  It's "I keep jabbing myself with a rusty nail and now I have
> > > > problems".
> > > 
> > > The initial report isn't the only relevant thing here.  Although it
> > > prompted the change, it wasn't the only consideration in making it.
> > > 
> > > There's also two separate questions here:
> > >   1) Do we want byteswapping integer load helpers?
> > >   2) Should those handle unaligned accesses?
> > > 
> > > In working out how to address the (as it turns out, non existent)
> > > problem, I realized an abstraction for loading big-endian integers
> > > from the blob would be a useful thing in its own right.  I also
> > > realized that it's a useful thing not just inside the libfdt code, but
> > > as an external interface.  Callers have always needed to interpret the
> > > contents of individual dt properties, and loading integers from them
> > > is often a part of that.
> > > 
> > > I know of people using those fdtXX_ld() helpers right now, so I'm not
> > > going to take them away.
> > > 
> > > For the case of external users we absolutely do need to handle
> > > unaligned accesses.  There are a number of existing bindings that mix
> > > strings and integers in packed format, in a single encoded property.
> > > So regardless of the alignment of the whole property, we can easily
> > > get unaligned integers in there, and I don't want to expose multiple
> > > different helpers for different cases.
> > > 
> > > Now, we don't *have* to use the same helpers for internal use.  We
> > > could open code the internal loads, or use a special aligned-only
> > > version inside.  But using the existing external helpers is the
> > > obvious and simple choice.
> > > 
> > > So, we're back to: I need a case for changing this now, not just a
> > > case for claiming it wasn't needed in the first place.
> > 
> > For U-Boot, I'm just going to revert this part of the changes.  I would
> 
> That seems reasonable for the interim.
> 
> > suggest that you look in to some way to fix the "fast path" here to go
> > back to the previous way of working so that other project can continue
> > to use libfdt as well and when callers need to access these helpers and
> > are not otherwise in the fast path can do so.
> > 
> > You're adding visible boot time delay with things the way they exist
> > today.  That's not OK.
> 
> That's a fair point, but you need to make that case in the commit
> message and merge request, not just expect me to find and collate the
> arguments from other threads.

If you want me to leave the helpers alone but otherwise revert things, I
can do a v2 and expand the commit message.  And perhaps I'm too nice
sometimes then but I do pickup and tweak things that are close enough to
what I want and reword as needed for clarity.

I still believe you have things wrong.  There's not an unaligned access
problem that libfdt needs to care about.  ARM doesn't need help handling
unaligned accesses.  The only problem that's been reported is from when
a user got themselves so far off in the weeds that nothing else matters.

> > I wouldn't have updated U-Boot with these
> > changes if we had automated timing tests like the TF-A folks do.
> > 
> > Please do let us know if you have any changes to try that might
> > alleviate the overall problem.  Thanks!
> 
> I finally got around to looking through Simon Glass's patches for
> reducing libfdt code size with various "asusmption" flags.  I think
> it's a good concept and close to being ready.
> 
> I don't think it's the only thing we want to do, but one thing we
> could do is to add another ASSUME_ALIGNED flag that will simplify the
> load helpers.

You had mentioned other cases you now see as being possible problems.
Can you make these cases fail without these helpers?  If so then the
assumption flag stuff could be used with the default being the way
things were before.

Thanks!

-- 
Tom

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
  2020-01-28 13:43                   ` Tom Rini
@ 2020-01-28 14:08                     ` Rob Herring
       [not found]                       ` <CAL_JsqJ0Pp8e4iFOKuKESO4DJ0ky2aO+GB_HDiGcNhY1qBxgfw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2020-01-29  1:29                     ` David Gibson
  1 sibling, 1 reply; 26+ messages in thread
From: Rob Herring @ 2020-01-28 14:08 UTC (permalink / raw)
  To: Tom Rini
  Cc: David Gibson, Devicetree Compiler, Patrice CHOTARD, Patrick DELAUNAY

On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
>
> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> > On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> > > On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > > > On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > > > > On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > > > > > On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > > > > > > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > > > > > > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > > > > > > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > > > > > > performance of these helpers.
> > > > > > >
> > > > > > > Ultimately however, these helpers should not exist.  Unaligned access
> > > > > > > only occurs when images are forced out of alignment by the user.  This
> > > > > > > unalignment is not supported and introduces problems later on as other
> > > > > > > parts of the system image are unaligned and they too require alignment.
> > > > > > >
> > > > > > > Revert both of these changes.
> > > > > > >
> > > > > > > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > > > > > > ---
> > > > > > > By way of a little more explanation, looking at the archives it seems
> > > > > > > that the initial bug reporter said that they had a platform that was
> > > > > > > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > > > > > > What that does is to tell U-Boot to not do any of the sanity checks and
> > > > > > > relocation to ensure alignment that it would normally do because the
> > > > > > > user knows best.  This later came up on the U-Boot list as once the DTB
> > > > > > > was loaded, Linux is unhappy because it demands correct alignment.
> > > > > > >
> > > > > > > I only realized libfdt had introduced changes here when it was reported
> > > > > > > that boot time had gotten much slower once we merged this change in.  It
> > > > > > > would be best to just drop it.
> > > > > >
> > > > > > Hmm.  I'm not sure about this.  The commit message makes a case for
> > > > > > why the unaligned handling isn't necessary, but not a case for why
> > > > > > it's bad.  Even if handling an unaligned tree isn't a common case,
> > > > > > isn't it better to be able to than not?
> > > > > >
> > > > > > I gather from the previous discussion that there's a significant
> > > > > > performance impact, but that rationale needs to go into the commit
> > > > > > message for posterity.
> > > > >
> > > > > I wanted to emphasize that the code simply isn't ever needed, not that
> > > > > it's a performance problem.  A performance problem implies that we would
> > > > > keep this, if it was fast enough.  That's why people noticed it (it
> > > > > slows things down to an unusable level).  But it's functionally wrong.
> > > > >
> > > > > > [snip]
> > > > > > > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > > > > > > index fc4c4962a01c..d4ebe915cf46 100644
> > > > > > > --- a/libfdt/libfdt.h
> > > > > > > +++ b/libfdt/libfdt.h
> > > > > > > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > > > > > >
> > > > > > >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > > > > > >
> > > > > > > -/*
> > > > > > > - * Alignment helpers:
> > > > > > > - *     These helpers access words from a device tree blob.  They're
> > > > > > > - *     built to work even with unaligned pointers on platforms (ike
> > > > > > > - *     ARM) that don't like unaligned loads and stores
> > > > > > > - */
> > > > > > > -
> > > > > > > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > > > > > -{
> > > > > > > -   const uint8_t *bp = (const uint8_t *)p;
> > > > > > > -
> > > > > > > -   return ((uint32_t)bp[0] << 24)
> > > > > > > -           | ((uint32_t)bp[1] << 16)
> > > > > > > -           | ((uint32_t)bp[2] << 8)
> > > > > > > -           | bp[3];
> > > > > > > -}
> > > > > >
> > > > > > In particular, I definitely think removing the helpers entirely is a
> > > > > > no go.  They're now part of the published interface of the library.
> > > > >
> > > > > Perhaps "mistakes were made" ?  I don't think we need to worry about
> > > > > removing an interface here as projects are avoiding upgrading libfdt
> > > > > now (TF-A, formerly ATF) or reverting the change (U-Boot).
> > > > >
> > > > > > Even if they're not used for reading the internal tags, they can be
> > > > > > used to load integers from within particular properties.  Those are
> > > > > > frequently unaligned, since properties generally have packed
> > > > > > representations.
> > > > >
> > > > > I don't see the relevance.  Go back to the initial problem report.  It's
> > > > > not "I have a new unique platform I'm using libfdt on and I have
> > > > > problems".  It's "I keep jabbing myself with a rusty nail and now I have
> > > > > problems".
> > > >
> > > > The initial report isn't the only relevant thing here.  Although it
> > > > prompted the change, it wasn't the only consideration in making it.
> > > >
> > > > There's also two separate questions here:
> > > >   1) Do we want byteswapping integer load helpers?
> > > >   2) Should those handle unaligned accesses?
> > > >
> > > > In working out how to address the (as it turns out, non existent)
> > > > problem, I realized an abstraction for loading big-endian integers
> > > > from the blob would be a useful thing in its own right.  I also
> > > > realized that it's a useful thing not just inside the libfdt code, but
> > > > as an external interface.  Callers have always needed to interpret the
> > > > contents of individual dt properties, and loading integers from them
> > > > is often a part of that.
> > > >
> > > > I know of people using those fdtXX_ld() helpers right now, so I'm not
> > > > going to take them away.
> > > >
> > > > For the case of external users we absolutely do need to handle
> > > > unaligned accesses.  There are a number of existing bindings that mix
> > > > strings and integers in packed format, in a single encoded property.
> > > > So regardless of the alignment of the whole property, we can easily
> > > > get unaligned integers in there, and I don't want to expose multiple
> > > > different helpers for different cases.
> > > >
> > > > Now, we don't *have* to use the same helpers for internal use.  We
> > > > could open code the internal loads, or use a special aligned-only
> > > > version inside.  But using the existing external helpers is the
> > > > obvious and simple choice.
> > > >
> > > > So, we're back to: I need a case for changing this now, not just a
> > > > case for claiming it wasn't needed in the first place.
> > >
> > > For U-Boot, I'm just going to revert this part of the changes.  I would
> >
> > That seems reasonable for the interim.
> >
> > > suggest that you look in to some way to fix the "fast path" here to go
> > > back to the previous way of working so that other project can continue
> > > to use libfdt as well and when callers need to access these helpers and
> > > are not otherwise in the fast path can do so.
> > >
> > > You're adding visible boot time delay with things the way they exist
> > > today.  That's not OK.
> >
> > That's a fair point, but you need to make that case in the commit
> > message and merge request, not just expect me to find and collate the
> > arguments from other threads.
>
> If you want me to leave the helpers alone but otherwise revert things, I
> can do a v2 and expand the commit message.  And perhaps I'm too nice
> sometimes then but I do pickup and tweak things that are close enough to
> what I want and reword as needed for clarity.

Why not just fix the helpers for the aligned case and be done with it:

static inline uint32_t fdt32_ld(const fdt32_t *p)
{
       const uint8_t *bp = (const uint8_t *)p;

+       if (!((unsigned long)p & 0x3))
+               return fdt32_to_cpu(*p);
+
       return ((uint32_t)bp[0] << 24)
               | ((uint32_t)bp[1] << 16)
               | ((uint32_t)bp[2] << 8)
               | bp[3];
}


> I still believe you have things wrong.  There's not an unaligned access
> problem that libfdt needs to care about.  ARM doesn't need help handling
> unaligned accesses.  The only problem that's been reported is from when
> a user got themselves so far off in the weeds that nothing else matters.

I think while the vast majority of DTBs don't have anything that would
cause unaligned accesses, that's not guaranteed by the FDT format.
libfdt needs to handle the worst case.

What about ARMv5 and v4 which don't universally support unaligned
accesses or any other architecture. Do all mips, openrisc, riscv, arc,
microblaze, xtenza, etc. support unaligned accesses?

Rob

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                       ` <CAL_JsqJ0Pp8e4iFOKuKESO4DJ0ky2aO+GB_HDiGcNhY1qBxgfw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2020-01-28 14:51                         ` Tom Rini
  2020-01-29  1:52                           ` David Gibson
  2020-01-29  1:49                         ` David Gibson
                                           ` (2 subsequent siblings)
  3 siblings, 1 reply; 26+ messages in thread
From: Tom Rini @ 2020-01-28 14:51 UTC (permalink / raw)
  To: Rob Herring
  Cc: David Gibson, Devicetree Compiler, Patrice CHOTARD, Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 10679 bytes --]

On Tue, Jan 28, 2020 at 08:08:53AM -0600, Rob Herring wrote:
> On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> >
> > On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> > > On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> > > > On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > > > > On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > > > > > On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > > > > > > On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > > > > > > > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > > > > > > > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > > > > > > > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > > > > > > > performance of these helpers.
> > > > > > > >
> > > > > > > > Ultimately however, these helpers should not exist.  Unaligned access
> > > > > > > > only occurs when images are forced out of alignment by the user.  This
> > > > > > > > unalignment is not supported and introduces problems later on as other
> > > > > > > > parts of the system image are unaligned and they too require alignment.
> > > > > > > >
> > > > > > > > Revert both of these changes.
> > > > > > > >
> > > > > > > > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > > > > > > > ---
> > > > > > > > By way of a little more explanation, looking at the archives it seems
> > > > > > > > that the initial bug reporter said that they had a platform that was
> > > > > > > > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > > > > > > > What that does is to tell U-Boot to not do any of the sanity checks and
> > > > > > > > relocation to ensure alignment that it would normally do because the
> > > > > > > > user knows best.  This later came up on the U-Boot list as once the DTB
> > > > > > > > was loaded, Linux is unhappy because it demands correct alignment.
> > > > > > > >
> > > > > > > > I only realized libfdt had introduced changes here when it was reported
> > > > > > > > that boot time had gotten much slower once we merged this change in.  It
> > > > > > > > would be best to just drop it.
> > > > > > >
> > > > > > > Hmm.  I'm not sure about this.  The commit message makes a case for
> > > > > > > why the unaligned handling isn't necessary, but not a case for why
> > > > > > > it's bad.  Even if handling an unaligned tree isn't a common case,
> > > > > > > isn't it better to be able to than not?
> > > > > > >
> > > > > > > I gather from the previous discussion that there's a significant
> > > > > > > performance impact, but that rationale needs to go into the commit
> > > > > > > message for posterity.
> > > > > >
> > > > > > I wanted to emphasize that the code simply isn't ever needed, not that
> > > > > > it's a performance problem.  A performance problem implies that we would
> > > > > > keep this, if it was fast enough.  That's why people noticed it (it
> > > > > > slows things down to an unusable level).  But it's functionally wrong.
> > > > > >
> > > > > > > [snip]
> > > > > > > > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > > > > > > > index fc4c4962a01c..d4ebe915cf46 100644
> > > > > > > > --- a/libfdt/libfdt.h
> > > > > > > > +++ b/libfdt/libfdt.h
> > > > > > > > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > > > > > > >
> > > > > > > >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > > > > > > >
> > > > > > > > -/*
> > > > > > > > - * Alignment helpers:
> > > > > > > > - *     These helpers access words from a device tree blob.  They're
> > > > > > > > - *     built to work even with unaligned pointers on platforms (ike
> > > > > > > > - *     ARM) that don't like unaligned loads and stores
> > > > > > > > - */
> > > > > > > > -
> > > > > > > > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > > > > > > -{
> > > > > > > > -   const uint8_t *bp = (const uint8_t *)p;
> > > > > > > > -
> > > > > > > > -   return ((uint32_t)bp[0] << 24)
> > > > > > > > -           | ((uint32_t)bp[1] << 16)
> > > > > > > > -           | ((uint32_t)bp[2] << 8)
> > > > > > > > -           | bp[3];
> > > > > > > > -}
> > > > > > >
> > > > > > > In particular, I definitely think removing the helpers entirely is a
> > > > > > > no go.  They're now part of the published interface of the library.
> > > > > >
> > > > > > Perhaps "mistakes were made" ?  I don't think we need to worry about
> > > > > > removing an interface here as projects are avoiding upgrading libfdt
> > > > > > now (TF-A, formerly ATF) or reverting the change (U-Boot).
> > > > > >
> > > > > > > Even if they're not used for reading the internal tags, they can be
> > > > > > > used to load integers from within particular properties.  Those are
> > > > > > > frequently unaligned, since properties generally have packed
> > > > > > > representations.
> > > > > >
> > > > > > I don't see the relevance.  Go back to the initial problem report.  It's
> > > > > > not "I have a new unique platform I'm using libfdt on and I have
> > > > > > problems".  It's "I keep jabbing myself with a rusty nail and now I have
> > > > > > problems".
> > > > >
> > > > > The initial report isn't the only relevant thing here.  Although it
> > > > > prompted the change, it wasn't the only consideration in making it.
> > > > >
> > > > > There's also two separate questions here:
> > > > >   1) Do we want byteswapping integer load helpers?
> > > > >   2) Should those handle unaligned accesses?
> > > > >
> > > > > In working out how to address the (as it turns out, non existent)
> > > > > problem, I realized an abstraction for loading big-endian integers
> > > > > from the blob would be a useful thing in its own right.  I also
> > > > > realized that it's a useful thing not just inside the libfdt code, but
> > > > > as an external interface.  Callers have always needed to interpret the
> > > > > contents of individual dt properties, and loading integers from them
> > > > > is often a part of that.
> > > > >
> > > > > I know of people using those fdtXX_ld() helpers right now, so I'm not
> > > > > going to take them away.
> > > > >
> > > > > For the case of external users we absolutely do need to handle
> > > > > unaligned accesses.  There are a number of existing bindings that mix
> > > > > strings and integers in packed format, in a single encoded property.
> > > > > So regardless of the alignment of the whole property, we can easily
> > > > > get unaligned integers in there, and I don't want to expose multiple
> > > > > different helpers for different cases.
> > > > >
> > > > > Now, we don't *have* to use the same helpers for internal use.  We
> > > > > could open code the internal loads, or use a special aligned-only
> > > > > version inside.  But using the existing external helpers is the
> > > > > obvious and simple choice.
> > > > >
> > > > > So, we're back to: I need a case for changing this now, not just a
> > > > > case for claiming it wasn't needed in the first place.
> > > >
> > > > For U-Boot, I'm just going to revert this part of the changes.  I would
> > >
> > > That seems reasonable for the interim.
> > >
> > > > suggest that you look in to some way to fix the "fast path" here to go
> > > > back to the previous way of working so that other project can continue
> > > > to use libfdt as well and when callers need to access these helpers and
> > > > are not otherwise in the fast path can do so.
> > > >
> > > > You're adding visible boot time delay with things the way they exist
> > > > today.  That's not OK.
> > >
> > > That's a fair point, but you need to make that case in the commit
> > > message and merge request, not just expect me to find and collate the
> > > arguments from other threads.
> >
> > If you want me to leave the helpers alone but otherwise revert things, I
> > can do a v2 and expand the commit message.  And perhaps I'm too nice
> > sometimes then but I do pickup and tweak things that are close enough to
> > what I want and reword as needed for clarity.
> 
> Why not just fix the helpers for the aligned case and be done with it:
> 
> static inline uint32_t fdt32_ld(const fdt32_t *p)
> {
>        const uint8_t *bp = (const uint8_t *)p;
> 
> +       if (!((unsigned long)p & 0x3))
> +               return fdt32_to_cpu(*p);
> +
>        return ((uint32_t)bp[0] << 24)
>                | ((uint32_t)bp[1] << 16)
>                | ((uint32_t)bp[2] << 8)
>                | bp[3];
> }

I don't know if that was one of the ideas David tried when the problem
was reported initially.  It would be good to know what the
size/performance impact of that is.  But still, fast path needs to be
fast.

> > I still believe you have things wrong.  There's not an unaligned access
> > problem that libfdt needs to care about.  ARM doesn't need help handling
> > unaligned accesses.  The only problem that's been reported is from when
> > a user got themselves so far off in the weeds that nothing else matters.
> 
> I think while the vast majority of DTBs don't have anything that would
> cause unaligned accesses, that's not guaranteed by the FDT format.
> libfdt needs to handle the worst case.

It also needs to be useful.  Please note that two of the projects using
libfdt are holding back upgrading (TF-A,
https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/log/lib/libfdt/fdt.c)
or reverting this change (U-Boot) because this is increasing boot time
in visible ways.  It's not "you added 100ms".  It's "you added 1 second
or more".

> What about ARMv5 and v4 which don't universally support unaligned
> accesses or any other architecture. Do all mips, openrisc, riscv, arc,
> microblaze, xtenza, etc. support unaligned accesses?

It's been long enough since the last time I was in a what-about
discussion over alignment issues that no, I don't recall just how much
special casing you need to put in the common paths to handle the
uncommon case.  My general recollection is that no, we don't need to go
all out on this as the cases where unaligned access is fatal (rather
than just bad performance in that rare case) is small.  It's so small
that the problem wasn't found here because someone did that, it's
because someone (inadvertently) turned off all the safety and sanity
checks and then saw not safe and not sane results.

Thanks!

-- 
Tom

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
  2020-01-28 13:43                   ` Tom Rini
  2020-01-28 14:08                     ` Rob Herring
@ 2020-01-29  1:29                     ` David Gibson
  1 sibling, 0 replies; 26+ messages in thread
From: David Gibson @ 2020-01-29  1:29 UTC (permalink / raw)
  To: Tom Rini
  Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA, Patrice CHOTARD,
	Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 9757 bytes --]

On Tue, Jan 28, 2020 at 08:43:04AM -0500, Tom Rini wrote:
> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> > On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> > > On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > > > On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > > > > On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > > > > > On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > > > > > > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > > > > > > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > > > > > > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > > > > > > performance of these helpers.
> > > > > > > 
> > > > > > > Ultimately however, these helpers should not exist.  Unaligned access
> > > > > > > only occurs when images are forced out of alignment by the user.  This
> > > > > > > unalignment is not supported and introduces problems later on as other
> > > > > > > parts of the system image are unaligned and they too require alignment.
> > > > > > > 
> > > > > > > Revert both of these changes.
> > > > > > > 
> > > > > > > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > > > > > > ---
> > > > > > > By way of a little more explanation, looking at the archives it seems
> > > > > > > that the initial bug reporter said that they had a platform that was
> > > > > > > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > > > > > > What that does is to tell U-Boot to not do any of the sanity checks and
> > > > > > > relocation to ensure alignment that it would normally do because the
> > > > > > > user knows best.  This later came up on the U-Boot list as once the DTB
> > > > > > > was loaded, Linux is unhappy because it demands correct alignment.
> > > > > > > 
> > > > > > > I only realized libfdt had introduced changes here when it was reported
> > > > > > > that boot time had gotten much slower once we merged this change in.  It
> > > > > > > would be best to just drop it.
> > > > > > 
> > > > > > Hmm.  I'm not sure about this.  The commit message makes a case for
> > > > > > why the unaligned handling isn't necessary, but not a case for why
> > > > > > it's bad.  Even if handling an unaligned tree isn't a common case,
> > > > > > isn't it better to be able to than not?
> > > > > > 
> > > > > > I gather from the previous discussion that there's a significant
> > > > > > performance impact, but that rationale needs to go into the commit
> > > > > > message for posterity.
> > > > > 
> > > > > I wanted to emphasize that the code simply isn't ever needed, not that
> > > > > it's a performance problem.  A performance problem implies that we would
> > > > > keep this, if it was fast enough.  That's why people noticed it (it
> > > > > slows things down to an unusable level).  But it's functionally wrong.
> > > > > 
> > > > > > [snip]
> > > > > > > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > > > > > > index fc4c4962a01c..d4ebe915cf46 100644
> > > > > > > --- a/libfdt/libfdt.h
> > > > > > > +++ b/libfdt/libfdt.h
> > > > > > > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > > > > > >  
> > > > > > >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > > > > > >  
> > > > > > > -/*
> > > > > > > - * Alignment helpers:
> > > > > > > - *     These helpers access words from a device tree blob.  They're
> > > > > > > - *     built to work even with unaligned pointers on platforms (ike
> > > > > > > - *     ARM) that don't like unaligned loads and stores
> > > > > > > - */
> > > > > > > -
> > > > > > > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > > > > > -{
> > > > > > > -	const uint8_t *bp = (const uint8_t *)p;
> > > > > > > -
> > > > > > > -	return ((uint32_t)bp[0] << 24)
> > > > > > > -		| ((uint32_t)bp[1] << 16)
> > > > > > > -		| ((uint32_t)bp[2] << 8)
> > > > > > > -		| bp[3];
> > > > > > > -}
> > > > > > 
> > > > > > In particular, I definitely think removing the helpers entirely is a
> > > > > > no go.  They're now part of the published interface of the library.
> > > > > 
> > > > > Perhaps "mistakes were made" ?  I don't think we need to worry about
> > > > > removing an interface here as projects are avoiding upgrading libfdt
> > > > > now (TF-A, formerly ATF) or reverting the change (U-Boot).
> > > > > 
> > > > > > Even if they're not used for reading the internal tags, they can be
> > > > > > used to load integers from within particular properties.  Those are
> > > > > > frequently unaligned, since properties generally have packed
> > > > > > representations.
> > > > > 
> > > > > I don't see the relevance.  Go back to the initial problem report.  It's
> > > > > not "I have a new unique platform I'm using libfdt on and I have
> > > > > problems".  It's "I keep jabbing myself with a rusty nail and now I have
> > > > > problems".
> > > > 
> > > > The initial report isn't the only relevant thing here.  Although it
> > > > prompted the change, it wasn't the only consideration in making it.
> > > > 
> > > > There's also two separate questions here:
> > > >   1) Do we want byteswapping integer load helpers?
> > > >   2) Should those handle unaligned accesses?
> > > > 
> > > > In working out how to address the (as it turns out, non existent)
> > > > problem, I realized an abstraction for loading big-endian integers
> > > > from the blob would be a useful thing in its own right.  I also
> > > > realized that it's a useful thing not just inside the libfdt code, but
> > > > as an external interface.  Callers have always needed to interpret the
> > > > contents of individual dt properties, and loading integers from them
> > > > is often a part of that.
> > > > 
> > > > I know of people using those fdtXX_ld() helpers right now, so I'm not
> > > > going to take them away.
> > > > 
> > > > For the case of external users we absolutely do need to handle
> > > > unaligned accesses.  There are a number of existing bindings that mix
> > > > strings and integers in packed format, in a single encoded property.
> > > > So regardless of the alignment of the whole property, we can easily
> > > > get unaligned integers in there, and I don't want to expose multiple
> > > > different helpers for different cases.
> > > > 
> > > > Now, we don't *have* to use the same helpers for internal use.  We
> > > > could open code the internal loads, or use a special aligned-only
> > > > version inside.  But using the existing external helpers is the
> > > > obvious and simple choice.
> > > > 
> > > > So, we're back to: I need a case for changing this now, not just a
> > > > case for claiming it wasn't needed in the first place.
> > > 
> > > For U-Boot, I'm just going to revert this part of the changes.  I would
> > 
> > That seems reasonable for the interim.
> > 
> > > suggest that you look in to some way to fix the "fast path" here to go
> > > back to the previous way of working so that other project can continue
> > > to use libfdt as well and when callers need to access these helpers and
> > > are not otherwise in the fast path can do so.
> > > 
> > > You're adding visible boot time delay with things the way they exist
> > > today.  That's not OK.
> > 
> > That's a fair point, but you need to make that case in the commit
> > message and merge request, not just expect me to find and collate the
> > arguments from other threads.
> 
> If you want me to leave the helpers alone but otherwise revert things, I
> can do a v2 and expand the commit message.  And perhaps I'm too nice
> sometimes then but I do pickup and tweak things that are close enough to
> what I want and reword as needed for clarity.
> 
> I still believe you have things wrong.  There's not an unaligned access
> problem that libfdt needs to care about.  ARM doesn't need help handling
> unaligned accesses.  The only problem that's been reported is from when
> a user got themselves so far off in the weeds that nothing else matters.

Ah!  I think I jut figured out the cause of our misunderstanding.
Looks like property encodings with mixed integers and strings are not
as common as I thought.  At least, not ones that have integer data
*after* string data, which is the case that matters for alignment.
The only examples I could find are IBM specific ones in PAPR, not in
1275 or a common binding (e.g. 'ibm,drc-info' and 'ibm,vpd').

So, chances are you've never encountered them in the ARM world.
Nonetheless they do exist, and we do use libfdt on IBM PAPR systems as
well.

Hrmm... so... chances are reverting this won't hit a problem in
practice... for now.  On ARM, properties with unaligned integers seem
to essentially never exist, and POWER cpus can generally handle
unaligned loads fine.

I remain uneasy about removing the unaligned handling, though.  Even
if you haven't hit it yet, it's entirely possible at some point
someone will attach a device to an ARM system that has a binding with
out-of-alignment integers.  Likewise we have Microwatt (POWER based
open ISA systems) chips on the way, which probably won't handle
unaligned loads, and may or may not have devices with funny bindings.

I really don't want to have a load helper that works most of the time,
but occasionally breaks horribly.

*ponders*

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                       ` <CAL_JsqJ0Pp8e4iFOKuKESO4DJ0ky2aO+GB_HDiGcNhY1qBxgfw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2020-01-28 14:51                         ` Tom Rini
@ 2020-01-29  1:49                         ` David Gibson
  2020-01-29  2:15                         ` Ian Lepore
  2020-01-30 14:44                         ` Patrice CHOTARD
  3 siblings, 0 replies; 26+ messages in thread
From: David Gibson @ 2020-01-29  1:49 UTC (permalink / raw)
  To: Rob Herring
  Cc: Tom Rini, Devicetree Compiler, Patrice CHOTARD, Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 10380 bytes --]

On Tue, Jan 28, 2020 at 08:08:53AM -0600, Rob Herring wrote:
> On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> >
> > On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> > > On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> > > > On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > > > > On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > > > > > On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > > > > > > On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > > > > > > > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > > > > > > > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > > > > > > > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > > > > > > > performance of these helpers.
> > > > > > > >
> > > > > > > > Ultimately however, these helpers should not exist.  Unaligned access
> > > > > > > > only occurs when images are forced out of alignment by the user.  This
> > > > > > > > unalignment is not supported and introduces problems later on as other
> > > > > > > > parts of the system image are unaligned and they too require alignment.
> > > > > > > >
> > > > > > > > Revert both of these changes.
> > > > > > > >
> > > > > > > > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > > > > > > > ---
> > > > > > > > By way of a little more explanation, looking at the archives it seems
> > > > > > > > that the initial bug reporter said that they had a platform that was
> > > > > > > > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > > > > > > > What that does is to tell U-Boot to not do any of the sanity checks and
> > > > > > > > relocation to ensure alignment that it would normally do because the
> > > > > > > > user knows best.  This later came up on the U-Boot list as once the DTB
> > > > > > > > was loaded, Linux is unhappy because it demands correct alignment.
> > > > > > > >
> > > > > > > > I only realized libfdt had introduced changes here when it was reported
> > > > > > > > that boot time had gotten much slower once we merged this change in.  It
> > > > > > > > would be best to just drop it.
> > > > > > >
> > > > > > > Hmm.  I'm not sure about this.  The commit message makes a case for
> > > > > > > why the unaligned handling isn't necessary, but not a case for why
> > > > > > > it's bad.  Even if handling an unaligned tree isn't a common case,
> > > > > > > isn't it better to be able to than not?
> > > > > > >
> > > > > > > I gather from the previous discussion that there's a significant
> > > > > > > performance impact, but that rationale needs to go into the commit
> > > > > > > message for posterity.
> > > > > >
> > > > > > I wanted to emphasize that the code simply isn't ever needed, not that
> > > > > > it's a performance problem.  A performance problem implies that we would
> > > > > > keep this, if it was fast enough.  That's why people noticed it (it
> > > > > > slows things down to an unusable level).  But it's functionally wrong.
> > > > > >
> > > > > > > [snip]
> > > > > > > > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > > > > > > > index fc4c4962a01c..d4ebe915cf46 100644
> > > > > > > > --- a/libfdt/libfdt.h
> > > > > > > > +++ b/libfdt/libfdt.h
> > > > > > > > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > > > > > > >
> > > > > > > >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > > > > > > >
> > > > > > > > -/*
> > > > > > > > - * Alignment helpers:
> > > > > > > > - *     These helpers access words from a device tree blob.  They're
> > > > > > > > - *     built to work even with unaligned pointers on platforms (ike
> > > > > > > > - *     ARM) that don't like unaligned loads and stores
> > > > > > > > - */
> > > > > > > > -
> > > > > > > > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > > > > > > -{
> > > > > > > > -   const uint8_t *bp = (const uint8_t *)p;
> > > > > > > > -
> > > > > > > > -   return ((uint32_t)bp[0] << 24)
> > > > > > > > -           | ((uint32_t)bp[1] << 16)
> > > > > > > > -           | ((uint32_t)bp[2] << 8)
> > > > > > > > -           | bp[3];
> > > > > > > > -}
> > > > > > >
> > > > > > > In particular, I definitely think removing the helpers entirely is a
> > > > > > > no go.  They're now part of the published interface of the library.
> > > > > >
> > > > > > Perhaps "mistakes were made" ?  I don't think we need to worry about
> > > > > > removing an interface here as projects are avoiding upgrading libfdt
> > > > > > now (TF-A, formerly ATF) or reverting the change (U-Boot).
> > > > > >
> > > > > > > Even if they're not used for reading the internal tags, they can be
> > > > > > > used to load integers from within particular properties.  Those are
> > > > > > > frequently unaligned, since properties generally have packed
> > > > > > > representations.
> > > > > >
> > > > > > I don't see the relevance.  Go back to the initial problem report.  It's
> > > > > > not "I have a new unique platform I'm using libfdt on and I have
> > > > > > problems".  It's "I keep jabbing myself with a rusty nail and now I have
> > > > > > problems".
> > > > >
> > > > > The initial report isn't the only relevant thing here.  Although it
> > > > > prompted the change, it wasn't the only consideration in making it.
> > > > >
> > > > > There's also two separate questions here:
> > > > >   1) Do we want byteswapping integer load helpers?
> > > > >   2) Should those handle unaligned accesses?
> > > > >
> > > > > In working out how to address the (as it turns out, non existent)
> > > > > problem, I realized an abstraction for loading big-endian integers
> > > > > from the blob would be a useful thing in its own right.  I also
> > > > > realized that it's a useful thing not just inside the libfdt code, but
> > > > > as an external interface.  Callers have always needed to interpret the
> > > > > contents of individual dt properties, and loading integers from them
> > > > > is often a part of that.
> > > > >
> > > > > I know of people using those fdtXX_ld() helpers right now, so I'm not
> > > > > going to take them away.
> > > > >
> > > > > For the case of external users we absolutely do need to handle
> > > > > unaligned accesses.  There are a number of existing bindings that mix
> > > > > strings and integers in packed format, in a single encoded property.
> > > > > So regardless of the alignment of the whole property, we can easily
> > > > > get unaligned integers in there, and I don't want to expose multiple
> > > > > different helpers for different cases.
> > > > >
> > > > > Now, we don't *have* to use the same helpers for internal use.  We
> > > > > could open code the internal loads, or use a special aligned-only
> > > > > version inside.  But using the existing external helpers is the
> > > > > obvious and simple choice.
> > > > >
> > > > > So, we're back to: I need a case for changing this now, not just a
> > > > > case for claiming it wasn't needed in the first place.
> > > >
> > > > For U-Boot, I'm just going to revert this part of the changes.  I would
> > >
> > > That seems reasonable for the interim.
> > >
> > > > suggest that you look in to some way to fix the "fast path" here to go
> > > > back to the previous way of working so that other project can continue
> > > > to use libfdt as well and when callers need to access these helpers and
> > > > are not otherwise in the fast path can do so.
> > > >
> > > > You're adding visible boot time delay with things the way they exist
> > > > today.  That's not OK.
> > >
> > > That's a fair point, but you need to make that case in the commit
> > > message and merge request, not just expect me to find and collate the
> > > arguments from other threads.
> >
> > If you want me to leave the helpers alone but otherwise revert things, I
> > can do a v2 and expand the commit message.  And perhaps I'm too nice
> > sometimes then but I do pickup and tweak things that are close enough to
> > what I want and reword as needed for clarity.
> 
> Why not just fix the helpers for the aligned case and be done with it:
> 
> static inline uint32_t fdt32_ld(const fdt32_t *p)
> {
>        const uint8_t *bp = (const uint8_t *)p;
> 
> +       if (!((unsigned long)p & 0x3))
> +               return fdt32_to_cpu(*p);
> +
>        return ((uint32_t)bp[0] << 24)
>                | ((uint32_t)bp[1] << 16)
>                | ((uint32_t)bp[2] << 8)
>                | bp[3];
> }
> 
> 
> > I still believe you have things wrong.  There's not an unaligned access
> > problem that libfdt needs to care about.  ARM doesn't need help handling
> > unaligned accesses.  The only problem that's been reported is from when
> > a user got themselves so far off in the weeds that nothing else matters.
> 
> I think while the vast majority of DTBs don't have anything that would
> cause unaligned accesses, that's not guaranteed by the FDT format.
> libfdt needs to handle the worst case.

That's true at a few different levels:

How the dtb is loaded isn't within scope for the fdt spec, and if the
whole thing is loaded unaligned, obviously stuff within it will be
unaligned.  We could make it a requirement for libfdt that the dtb be
loaded at an aligned addresss, though.

The fdt spec does require that the structure block be at an aligned
offset from header, which ensures alignment of the tags (assuming the
load address is aligned).  However, the content of the properties is
again out of scope for the fdt format itself, so bindings can define
unaligned things in there.

In addition the structure block only requires 4-byte alignment (and
only maintains 4-byte alignment even given a more-aligned start
address), which means that 8 byte integers within properties may not
be naturally aligned, even if we don't have weird bindings mixing
strings and numbers.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
  2020-01-28 14:51                         ` Tom Rini
@ 2020-01-29  1:52                           ` David Gibson
  0 siblings, 0 replies; 26+ messages in thread
From: David Gibson @ 2020-01-29  1:52 UTC (permalink / raw)
  To: Tom Rini
  Cc: Rob Herring, Devicetree Compiler, Patrice CHOTARD, Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 11767 bytes --]

On Tue, Jan 28, 2020 at 09:51:43AM -0500, Tom Rini wrote:
> On Tue, Jan 28, 2020 at 08:08:53AM -0600, Rob Herring wrote:
> > On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> > >
> > > On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> > > > On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> > > > > On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > > > > > On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > > > > > > On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > > > > > > > On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > > > > > > > > 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > > > > > > > > changes to support unaligned reads for ARM platforms and 11738cf01f15
> > > > > > > > > "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > > > > > > > > performance of these helpers.
> > > > > > > > >
> > > > > > > > > Ultimately however, these helpers should not exist.  Unaligned access
> > > > > > > > > only occurs when images are forced out of alignment by the user.  This
> > > > > > > > > unalignment is not supported and introduces problems later on as other
> > > > > > > > > parts of the system image are unaligned and they too require alignment.
> > > > > > > > >
> > > > > > > > > Revert both of these changes.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > > > > > > > > ---
> > > > > > > > > By way of a little more explanation, looking at the archives it seems
> > > > > > > > > that the initial bug reporter said that they had a platform that was
> > > > > > > > > using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > > > > > > > > What that does is to tell U-Boot to not do any of the sanity checks and
> > > > > > > > > relocation to ensure alignment that it would normally do because the
> > > > > > > > > user knows best.  This later came up on the U-Boot list as once the DTB
> > > > > > > > > was loaded, Linux is unhappy because it demands correct alignment.
> > > > > > > > >
> > > > > > > > > I only realized libfdt had introduced changes here when it was reported
> > > > > > > > > that boot time had gotten much slower once we merged this change in.  It
> > > > > > > > > would be best to just drop it.
> > > > > > > >
> > > > > > > > Hmm.  I'm not sure about this.  The commit message makes a case for
> > > > > > > > why the unaligned handling isn't necessary, but not a case for why
> > > > > > > > it's bad.  Even if handling an unaligned tree isn't a common case,
> > > > > > > > isn't it better to be able to than not?
> > > > > > > >
> > > > > > > > I gather from the previous discussion that there's a significant
> > > > > > > > performance impact, but that rationale needs to go into the commit
> > > > > > > > message for posterity.
> > > > > > >
> > > > > > > I wanted to emphasize that the code simply isn't ever needed, not that
> > > > > > > it's a performance problem.  A performance problem implies that we would
> > > > > > > keep this, if it was fast enough.  That's why people noticed it (it
> > > > > > > slows things down to an unusable level).  But it's functionally wrong.
> > > > > > >
> > > > > > > > [snip]
> > > > > > > > > diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > > > > > > > > index fc4c4962a01c..d4ebe915cf46 100644
> > > > > > > > > --- a/libfdt/libfdt.h
> > > > > > > > > +++ b/libfdt/libfdt.h
> > > > > > > > > @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > > > > > > > >
> > > > > > > > >  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > > > > > > > >
> > > > > > > > > -/*
> > > > > > > > > - * Alignment helpers:
> > > > > > > > > - *     These helpers access words from a device tree blob.  They're
> > > > > > > > > - *     built to work even with unaligned pointers on platforms (ike
> > > > > > > > > - *     ARM) that don't like unaligned loads and stores
> > > > > > > > > - */
> > > > > > > > > -
> > > > > > > > > -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > > > > > > > -{
> > > > > > > > > -   const uint8_t *bp = (const uint8_t *)p;
> > > > > > > > > -
> > > > > > > > > -   return ((uint32_t)bp[0] << 24)
> > > > > > > > > -           | ((uint32_t)bp[1] << 16)
> > > > > > > > > -           | ((uint32_t)bp[2] << 8)
> > > > > > > > > -           | bp[3];
> > > > > > > > > -}
> > > > > > > >
> > > > > > > > In particular, I definitely think removing the helpers entirely is a
> > > > > > > > no go.  They're now part of the published interface of the library.
> > > > > > >
> > > > > > > Perhaps "mistakes were made" ?  I don't think we need to worry about
> > > > > > > removing an interface here as projects are avoiding upgrading libfdt
> > > > > > > now (TF-A, formerly ATF) or reverting the change (U-Boot).
> > > > > > >
> > > > > > > > Even if they're not used for reading the internal tags, they can be
> > > > > > > > used to load integers from within particular properties.  Those are
> > > > > > > > frequently unaligned, since properties generally have packed
> > > > > > > > representations.
> > > > > > >
> > > > > > > I don't see the relevance.  Go back to the initial problem report.  It's
> > > > > > > not "I have a new unique platform I'm using libfdt on and I have
> > > > > > > problems".  It's "I keep jabbing myself with a rusty nail and now I have
> > > > > > > problems".
> > > > > >
> > > > > > The initial report isn't the only relevant thing here.  Although it
> > > > > > prompted the change, it wasn't the only consideration in making it.
> > > > > >
> > > > > > There's also two separate questions here:
> > > > > >   1) Do we want byteswapping integer load helpers?
> > > > > >   2) Should those handle unaligned accesses?
> > > > > >
> > > > > > In working out how to address the (as it turns out, non existent)
> > > > > > problem, I realized an abstraction for loading big-endian integers
> > > > > > from the blob would be a useful thing in its own right.  I also
> > > > > > realized that it's a useful thing not just inside the libfdt code, but
> > > > > > as an external interface.  Callers have always needed to interpret the
> > > > > > contents of individual dt properties, and loading integers from them
> > > > > > is often a part of that.
> > > > > >
> > > > > > I know of people using those fdtXX_ld() helpers right now, so I'm not
> > > > > > going to take them away.
> > > > > >
> > > > > > For the case of external users we absolutely do need to handle
> > > > > > unaligned accesses.  There are a number of existing bindings that mix
> > > > > > strings and integers in packed format, in a single encoded property.
> > > > > > So regardless of the alignment of the whole property, we can easily
> > > > > > get unaligned integers in there, and I don't want to expose multiple
> > > > > > different helpers for different cases.
> > > > > >
> > > > > > Now, we don't *have* to use the same helpers for internal use.  We
> > > > > > could open code the internal loads, or use a special aligned-only
> > > > > > version inside.  But using the existing external helpers is the
> > > > > > obvious and simple choice.
> > > > > >
> > > > > > So, we're back to: I need a case for changing this now, not just a
> > > > > > case for claiming it wasn't needed in the first place.
> > > > >
> > > > > For U-Boot, I'm just going to revert this part of the changes.  I would
> > > >
> > > > That seems reasonable for the interim.
> > > >
> > > > > suggest that you look in to some way to fix the "fast path" here to go
> > > > > back to the previous way of working so that other project can continue
> > > > > to use libfdt as well and when callers need to access these helpers and
> > > > > are not otherwise in the fast path can do so.
> > > > >
> > > > > You're adding visible boot time delay with things the way they exist
> > > > > today.  That's not OK.
> > > >
> > > > That's a fair point, but you need to make that case in the commit
> > > > message and merge request, not just expect me to find and collate the
> > > > arguments from other threads.
> > >
> > > If you want me to leave the helpers alone but otherwise revert things, I
> > > can do a v2 and expand the commit message.  And perhaps I'm too nice
> > > sometimes then but I do pickup and tweak things that are close enough to
> > > what I want and reword as needed for clarity.
> > 
> > Why not just fix the helpers for the aligned case and be done with it:
> > 
> > static inline uint32_t fdt32_ld(const fdt32_t *p)
> > {
> >        const uint8_t *bp = (const uint8_t *)p;
> > 
> > +       if (!((unsigned long)p & 0x3))
> > +               return fdt32_to_cpu(*p);
> > +
> >        return ((uint32_t)bp[0] << 24)
> >                | ((uint32_t)bp[1] << 16)
> >                | ((uint32_t)bp[2] << 8)
> >                | bp[3];
> > }
> 
> I don't know if that was one of the ideas David tried when the problem
> was reported initially.  It would be good to know what the
> size/performance impact of that is.  But still, fast path needs to be
> fast.

I haven't tried this - I don't have easy access to systems to measure
the performance impact.  Or, rather, I can get access to systems, but
I don't really have the bandwidth to prepare a performance testing
setup.

I think this is worth a shot.  I don't know much the conditional
branch will impact the fast path.

If it's significantly better than what we have now, it might be a good
interim step, even if it's not the final word.

> > > I still believe you have things wrong.  There's not an unaligned access
> > > problem that libfdt needs to care about.  ARM doesn't need help handling
> > > unaligned accesses.  The only problem that's been reported is from when
> > > a user got themselves so far off in the weeds that nothing else matters.
> > 
> > I think while the vast majority of DTBs don't have anything that would
> > cause unaligned accesses, that's not guaranteed by the FDT format.
> > libfdt needs to handle the worst case.
> 
> It also needs to be useful.  Please note that two of the projects using
> libfdt are holding back upgrading (TF-A,
> https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/log/lib/libfdt/fdt.c)
> or reverting this change (U-Boot) because this is increasing boot time
> in visible ways.  It's not "you added 100ms".  It's "you added 1 second
> or more".
> 
> > What about ARMv5 and v4 which don't universally support unaligned
> > accesses or any other architecture. Do all mips, openrisc, riscv, arc,
> > microblaze, xtenza, etc. support unaligned accesses?
> 
> It's been long enough since the last time I was in a what-about
> discussion over alignment issues that no, I don't recall just how much
> special casing you need to put in the common paths to handle the
> uncommon case.  My general recollection is that no, we don't need to go
> all out on this as the cases where unaligned access is fatal (rather
> than just bad performance in that rare case) is small.  It's so small
> that the problem wasn't found here because someone did that, it's
> because someone (inadvertently) turned off all the safety and sanity
> checks and then saw not safe and not sane results.
> 
> Thanks!
> 



-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                       ` <CAL_JsqJ0Pp8e4iFOKuKESO4DJ0ky2aO+GB_HDiGcNhY1qBxgfw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2020-01-28 14:51                         ` Tom Rini
  2020-01-29  1:49                         ` David Gibson
@ 2020-01-29  2:15                         ` Ian Lepore
       [not found]                           ` <b5e2288c5b06419fbfd057b8895ade5c5493cde5.camel-h+KGxgPPiopAfugRpC6u6w@public.gmane.org>
  2020-01-30 14:44                         ` Patrice CHOTARD
  3 siblings, 1 reply; 26+ messages in thread
From: Ian Lepore @ 2020-01-29  2:15 UTC (permalink / raw)
  To: Rob Herring, Tom Rini
  Cc: David Gibson, Devicetree Compiler, Patrice CHOTARD, Patrick DELAUNAY

On Tue, 2020-01-28 at 08:08 -0600, Rob Herring wrote:
> On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> > 
> > 
[...]
> > I still believe you have things wrong.  There's not an unaligned access
> > problem that libfdt needs to care about.  ARM doesn't need help handling
> > unaligned accesses.  The only problem that's been reported is from when
> > a user got themselves so far off in the weeds that nothing else matters.
> 
> I think while the vast majority of DTBs don't have anything that would
> cause unaligned accesses, that's not guaranteed by the FDT format.
> libfdt needs to handle the worst case.
> 
> What about ARMv5 and v4 which don't universally support unaligned
> accesses or any other architecture. Do all mips, openrisc, riscv, arc,
> microblaze, xtenza, etc. support unaligned accesses?
> 

Unaligned support is optional even on armv6 and armv7.  For quite a
long time freebsd ran armv7 chips with strict alignment (mostly because
we had no real need to do otherwise until people started complaining
that 3rd-party opensource software often failed on freebsd because it's
written with the assumption that the whole world is linux, and linux
used relaxed alignment).

-- Ian



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                           ` <b5e2288c5b06419fbfd057b8895ade5c5493cde5.camel-h+KGxgPPiopAfugRpC6u6w@public.gmane.org>
@ 2020-01-29 16:05                             ` Rob Herring
       [not found]                               ` <CAL_JsqLYobRrLtF9z3eMt=YneGuxDpvpKU7DQruza-jq4n6B3Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Rob Herring @ 2020-01-29 16:05 UTC (permalink / raw)
  To: Ian Lepore
  Cc: Tom Rini, David Gibson, Devicetree Compiler, Patrice CHOTARD,
	Patrick DELAUNAY

On Tue, Jan 28, 2020 at 8:15 PM Ian Lepore <ian-h+KGxgPPiopAfugRpC6u6w@public.gmane.org> wrote:
>
> On Tue, 2020-01-28 at 08:08 -0600, Rob Herring wrote:
> > On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> > >
> > >
> [...]
> > > I still believe you have things wrong.  There's not an unaligned access
> > > problem that libfdt needs to care about.  ARM doesn't need help handling
> > > unaligned accesses.  The only problem that's been reported is from when
> > > a user got themselves so far off in the weeds that nothing else matters.
> >
> > I think while the vast majority of DTBs don't have anything that would
> > cause unaligned accesses, that's not guaranteed by the FDT format.
> > libfdt needs to handle the worst case.
> >
> > What about ARMv5 and v4 which don't universally support unaligned
> > accesses or any other architecture. Do all mips, openrisc, riscv, arc,
> > microblaze, xtenza, etc. support unaligned accesses?
> >
>
> Unaligned support is optional even on armv6 and armv7.  For quite a
> long time freebsd ran armv7 chips with strict alignment (mostly because
> we had no real need to do otherwise until people started complaining
> that 3rd-party opensource software often failed on freebsd because it's
> written with the assumption that the whole world is linux, and linux
> used relaxed alignment).

That's not really correct. See sections L.3.1 and O.3.1 of the Arm ARM
for v7-A/R. First, unaligned support is always supported for armv7 and
armv6 hardware. In armv6, alignment behavior can follow armv5 or
armv6+ behavior (SCTLR.U bit). In armv6k, the armv5 behavior is
deprecated. It is possible to configure the processor to fault on
misaligned accesses (SCTLR.A bit) which is probably what you meant.
However, gcc (and probably others) will themselves generate unaligned
accesses for code compiled for armv7 (don't know about armv6). The
specific case I remember was for packed structs.

Linux has always supported unaligned accesses for userspace code
though that is configurable. I think this is because x86 always did
and lots of software only got tested on x86 (less true today). For
armv5 and earlier, the kernel had to handle the faults and perform the
accesses.

There was a long debate about all of this for u-boot[1]. Though since
then, it seems clearing SCTLR.A won out thanks to UEFI which also
requires enabling unaligned accesses if the arch supports it (u-boot
commit 78f90aaeeccc9e).

Rob

[1] http://u-boot.10912.n7.nabble.com/PATCH-arm-enable-unaligned-access-on-ARMv7-td126385.html

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                               ` <CAL_JsqLYobRrLtF9z3eMt=YneGuxDpvpKU7DQruza-jq4n6B3Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2020-01-29 16:08                                 ` Ian Lepore
  0 siblings, 0 replies; 26+ messages in thread
From: Ian Lepore @ 2020-01-29 16:08 UTC (permalink / raw)
  To: Rob Herring
  Cc: Tom Rini, David Gibson, Devicetree Compiler, Patrice CHOTARD,
	Patrick DELAUNAY

On Wed, 2020-01-29 at 10:05 -0600, Rob Herring wrote:
> On Tue, Jan 28, 2020 at 8:15 PM Ian Lepore <ian-h+KGxgPPiopAfugRpC6u6w@public.gmane.org> wrote:
> > 
> > On Tue, 2020-01-28 at 08:08 -0600, Rob Herring wrote:
> > > On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> > > > 
> > > > 
> > 
> > [...]
> > > > I still believe you have things wrong.  There's not an unaligned access
> > > > problem that libfdt needs to care about.  ARM doesn't need help handling
> > > > unaligned accesses.  The only problem that's been reported is from when
> > > > a user got themselves so far off in the weeds that nothing else matters.
> > > 
> > > I think while the vast majority of DTBs don't have anything that would
> > > cause unaligned accesses, that's not guaranteed by the FDT format.
> > > libfdt needs to handle the worst case.
> > > 
> > > What about ARMv5 and v4 which don't universally support unaligned
> > > accesses or any other architecture. Do all mips, openrisc, riscv, arc,
> > > microblaze, xtenza, etc. support unaligned accesses?
> > > 
> > 
> > Unaligned support is optional even on armv6 and armv7.  For quite a
> > long time freebsd ran armv7 chips with strict alignment (mostly because
> > we had no real need to do otherwise until people started complaining
> > that 3rd-party opensource software often failed on freebsd because it's
> > written with the assumption that the whole world is linux, and linux
> > used relaxed alignment).
> 
> That's not really correct. See sections L.3.1 and O.3.1 of the Arm ARM

Well, no, it IS really correct, but I'm not going to get into a big
pissing contest with you about it.  I suspect your knowledge of freebsd
and the toolchains it uses (which do not involve gcc) aren't up to the
task, and the discussion would be nothing but noise on this list.

-- Ian



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                       ` <CAL_JsqJ0Pp8e4iFOKuKESO4DJ0ky2aO+GB_HDiGcNhY1qBxgfw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
                                           ` (2 preceding siblings ...)
  2020-01-29  2:15                         ` Ian Lepore
@ 2020-01-30 14:44                         ` Patrice CHOTARD
       [not found]                           ` <c56c4206-1be1-1701-1423-1f74f6913bbf-qxv4g6HH51o@public.gmane.org>
  3 siblings, 1 reply; 26+ messages in thread
From: Patrice CHOTARD @ 2020-01-30 14:44 UTC (permalink / raw)
  To: Rob Herring, Tom Rini; +Cc: David Gibson, Devicetree Compiler, Patrick DELAUNAY

Hi Rob, Tom, David

On 1/28/20 3:08 PM, Rob Herring wrote:
> On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini@konsulko.com> wrote:
>> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
>>> On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
>>>> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
>>>>> On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
>>>>>> On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
>>>>>>> On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
>>>>>>>> 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
>>>>>>>> changes to support unaligned reads for ARM platforms and 11738cf01f15
>>>>>>>> "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
>>>>>>>> performance of these helpers.
>>>>>>>>
>>>>>>>> Ultimately however, these helpers should not exist.  Unaligned access
>>>>>>>> only occurs when images are forced out of alignment by the user.  This
>>>>>>>> unalignment is not supported and introduces problems later on as other
>>>>>>>> parts of the system image are unaligned and they too require alignment.
>>>>>>>>
>>>>>>>> Revert both of these changes.
>>>>>>>>
>>>>>>>> Signed-off-by: Tom Rini <trini@konsulko.com>
>>>>>>>> ---
>>>>>>>> By way of a little more explanation, looking at the archives it seems
>>>>>>>> that the initial bug reporter said that they had a platform that was
>>>>>>>> using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
>>>>>>>> What that does is to tell U-Boot to not do any of the sanity checks and
>>>>>>>> relocation to ensure alignment that it would normally do because the
>>>>>>>> user knows best.  This later came up on the U-Boot list as once the DTB
>>>>>>>> was loaded, Linux is unhappy because it demands correct alignment.
>>>>>>>>
>>>>>>>> I only realized libfdt had introduced changes here when it was reported
>>>>>>>> that boot time had gotten much slower once we merged this change in.  It
>>>>>>>> would be best to just drop it.
>>>>>>> Hmm.  I'm not sure about this.  The commit message makes a case for
>>>>>>> why the unaligned handling isn't necessary, but not a case for why
>>>>>>> it's bad.  Even if handling an unaligned tree isn't a common case,
>>>>>>> isn't it better to be able to than not?
>>>>>>>
>>>>>>> I gather from the previous discussion that there's a significant
>>>>>>> performance impact, but that rationale needs to go into the commit
>>>>>>> message for posterity.
>>>>>> I wanted to emphasize that the code simply isn't ever needed, not that
>>>>>> it's a performance problem.  A performance problem implies that we would
>>>>>> keep this, if it was fast enough.  That's why people noticed it (it
>>>>>> slows things down to an unusable level).  But it's functionally wrong.
>>>>>>
>>>>>>> [snip]
>>>>>>>> diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
>>>>>>>> index fc4c4962a01c..d4ebe915cf46 100644
>>>>>>>> --- a/libfdt/libfdt.h
>>>>>>>> +++ b/libfdt/libfdt.h
>>>>>>>> @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
>>>>>>>>
>>>>>>>>  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
>>>>>>>>
>>>>>>>> -/*
>>>>>>>> - * Alignment helpers:
>>>>>>>> - *     These helpers access words from a device tree blob.  They're
>>>>>>>> - *     built to work even with unaligned pointers on platforms (ike
>>>>>>>> - *     ARM) that don't like unaligned loads and stores
>>>>>>>> - */
>>>>>>>> -
>>>>>>>> -static inline uint32_t fdt32_ld(const fdt32_t *p)
>>>>>>>> -{
>>>>>>>> -   const uint8_t *bp = (const uint8_t *)p;
>>>>>>>> -
>>>>>>>> -   return ((uint32_t)bp[0] << 24)
>>>>>>>> -           | ((uint32_t)bp[1] << 16)
>>>>>>>> -           | ((uint32_t)bp[2] << 8)
>>>>>>>> -           | bp[3];
>>>>>>>> -}
>>>>>>> In particular, I definitely think removing the helpers entirely is a
>>>>>>> no go.  They're now part of the published interface of the library.
>>>>>> Perhaps "mistakes were made" ?  I don't think we need to worry about
>>>>>> removing an interface here as projects are avoiding upgrading libfdt
>>>>>> now (TF-A, formerly ATF) or reverting the change (U-Boot).
>>>>>>
>>>>>>> Even if they're not used for reading the internal tags, they can be
>>>>>>> used to load integers from within particular properties.  Those are
>>>>>>> frequently unaligned, since properties generally have packed
>>>>>>> representations.
>>>>>> I don't see the relevance.  Go back to the initial problem report.  It's
>>>>>> not "I have a new unique platform I'm using libfdt on and I have
>>>>>> problems".  It's "I keep jabbing myself with a rusty nail and now I have
>>>>>> problems".
>>>>> The initial report isn't the only relevant thing here.  Although it
>>>>> prompted the change, it wasn't the only consideration in making it.
>>>>>
>>>>> There's also two separate questions here:
>>>>>   1) Do we want byteswapping integer load helpers?
>>>>>   2) Should those handle unaligned accesses?
>>>>>
>>>>> In working out how to address the (as it turns out, non existent)
>>>>> problem, I realized an abstraction for loading big-endian integers
>>>>> from the blob would be a useful thing in its own right.  I also
>>>>> realized that it's a useful thing not just inside the libfdt code, but
>>>>> as an external interface.  Callers have always needed to interpret the
>>>>> contents of individual dt properties, and loading integers from them
>>>>> is often a part of that.
>>>>>
>>>>> I know of people using those fdtXX_ld() helpers right now, so I'm not
>>>>> going to take them away.
>>>>>
>>>>> For the case of external users we absolutely do need to handle
>>>>> unaligned accesses.  There are a number of existing bindings that mix
>>>>> strings and integers in packed format, in a single encoded property.
>>>>> So regardless of the alignment of the whole property, we can easily
>>>>> get unaligned integers in there, and I don't want to expose multiple
>>>>> different helpers for different cases.
>>>>>
>>>>> Now, we don't *have* to use the same helpers for internal use.  We
>>>>> could open code the internal loads, or use a special aligned-only
>>>>> version inside.  But using the existing external helpers is the
>>>>> obvious and simple choice.
>>>>>
>>>>> So, we're back to: I need a case for changing this now, not just a
>>>>> case for claiming it wasn't needed in the first place.
>>>> For U-Boot, I'm just going to revert this part of the changes.  I would
>>> That seems reasonable for the interim.
>>>
>>>> suggest that you look in to some way to fix the "fast path" here to go
>>>> back to the previous way of working so that other project can continue
>>>> to use libfdt as well and when callers need to access these helpers and
>>>> are not otherwise in the fast path can do so.
>>>>
>>>> You're adding visible boot time delay with things the way they exist
>>>> today.  That's not OK.
>>> That's a fair point, but you need to make that case in the commit
>>> message and merge request, not just expect me to find and collate the
>>> arguments from other threads.
>> If you want me to leave the helpers alone but otherwise revert things, I
>> can do a v2 and expand the commit message.  And perhaps I'm too nice
>> sometimes then but I do pickup and tweak things that are close enough to
>> what I want and reword as needed for clarity.
> Why not just fix the helpers for the aligned case and be done with it:
>
> static inline uint32_t fdt32_ld(const fdt32_t *p)
> {
>        const uint8_t *bp = (const uint8_t *)p;
>
> +       if (!((unsigned long)p & 0x3))
> +               return fdt32_to_cpu(*p);
> +
>        return ((uint32_t)bp[0] << 24)
>                | ((uint32_t)bp[1] << 16)
>                | ((uint32_t)bp[2] << 8)
>                | bp[3];
> }


Here are the results, before and after your proposed patch:

tested tag: V2020.04-RC1

STM32MP> bootstage report
Timer summary in microseconds (12 records):
       Mark    Elapsed  Stage
          0          0  reset
     84,268     84,268  SPL
    962,921    878,653  end SPL
    965,800      2,879  board_init_f
  4,314,348  3,348,548  board_init_r
  4,863,763    549,415  id=64
  4,908,759     44,996  id=65
  4,909,459        700  main_loop
  5,322,309    412,850  id=175

Accumulated time:
                83,284  dm_r
                95,842  dm_spl
             1,502,020  dm_f


-------------------------------------------------------------------------------

tested tag : V2020.04-RC1 + following fdt32_ld patch :

static inline uint32_t fdt32_ld(const fdt32_t *p)
{
       const uint8_t *bp = (const uint8_t *)p;

+       if (!((unsigned long)p & 0x3))
+               return fdt32_to_cpu(*p);
+
       return ((uint32_t)bp[0] << 24)
               | ((uint32_t)bp[1] << 16)
               | ((uint32_t)bp[2] << 8)
               | bp[3];
}


STM32MP> bootstage report
Timer summary in microseconds (12 records):
       Mark    Elapsed  Stage
          0          0  reset
     84,264     84,264  SPL
    959,300    875,036  end SPL
    962,192      2,892  board_init_f
  4,310,598  3,348,406  board_init_r
  4,860,074    549,476  id=64
  4,905,119     45,045  id=65
  4,905,819        700  main_loop
  5,098,636    192,817  id=175

Accumulated time:
                83,202  dm_r
                95,252  dm_spl
             1,501,950  dm_f


There is no gain in board_init_r(), the added alignment test is expensive itself.

Thanks

Patrice


>
>> I still believe you have things wrong.  There's not an unaligned access
>> problem that libfdt needs to care about.  ARM doesn't need help handling
>> unaligned accesses.  The only problem that's been reported is from when
>> a user got themselves so far off in the weeds that nothing else matters.
> I think while the vast majority of DTBs don't have anything that would
> cause unaligned accesses, that's not guaranteed by the FDT format.
> libfdt needs to handle the worst case.
>
> What about ARMv5 and v4 which don't universally support unaligned
> accesses or any other architecture. Do all mips, openrisc, riscv, arc,
> microblaze, xtenza, etc. support unaligned accesses?
>
> Rob

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                           ` <c56c4206-1be1-1701-1423-1f74f6913bbf-qxv4g6HH51o@public.gmane.org>
@ 2020-01-30 20:18                             ` Rob Herring
       [not found]                               ` <CAL_Jsq+43Opib7vUpZ_VxbJig3gT8crBoL6nw_yC=xOfcsbOvA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Rob Herring @ 2020-01-30 20:18 UTC (permalink / raw)
  To: Patrice CHOTARD
  Cc: Tom Rini, David Gibson, Devicetree Compiler, Patrick DELAUNAY

On Thu, Jan 30, 2020 at 8:44 AM Patrice CHOTARD <patrice.chotard-qxv4g6HH51o@public.gmane.org> wrote:
>
> Hi Rob, Tom, David
>
> On 1/28/20 3:08 PM, Rob Herring wrote:
> > On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> >> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> >>> On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> >>>> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> >>>>> On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> >>>>>> On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> >>>>>>> On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> >>>>>>>> 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> >>>>>>>> changes to support unaligned reads for ARM platforms and 11738cf01f15
> >>>>>>>> "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> >>>>>>>> performance of these helpers.
> >>>>>>>>
> >>>>>>>> Ultimately however, these helpers should not exist.  Unaligned access
> >>>>>>>> only occurs when images are forced out of alignment by the user.  This
> >>>>>>>> unalignment is not supported and introduces problems later on as other
> >>>>>>>> parts of the system image are unaligned and they too require alignment.
> >>>>>>>>
> >>>>>>>> Revert both of these changes.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> >>>>>>>> ---
> >>>>>>>> By way of a little more explanation, looking at the archives it seems
> >>>>>>>> that the initial bug reporter said that they had a platform that was
> >>>>>>>> using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> >>>>>>>> What that does is to tell U-Boot to not do any of the sanity checks and
> >>>>>>>> relocation to ensure alignment that it would normally do because the
> >>>>>>>> user knows best.  This later came up on the U-Boot list as once the DTB
> >>>>>>>> was loaded, Linux is unhappy because it demands correct alignment.
> >>>>>>>>
> >>>>>>>> I only realized libfdt had introduced changes here when it was reported
> >>>>>>>> that boot time had gotten much slower once we merged this change in.  It
> >>>>>>>> would be best to just drop it.
> >>>>>>> Hmm.  I'm not sure about this.  The commit message makes a case for
> >>>>>>> why the unaligned handling isn't necessary, but not a case for why
> >>>>>>> it's bad.  Even if handling an unaligned tree isn't a common case,
> >>>>>>> isn't it better to be able to than not?
> >>>>>>>
> >>>>>>> I gather from the previous discussion that there's a significant
> >>>>>>> performance impact, but that rationale needs to go into the commit
> >>>>>>> message for posterity.
> >>>>>> I wanted to emphasize that the code simply isn't ever needed, not that
> >>>>>> it's a performance problem.  A performance problem implies that we would
> >>>>>> keep this, if it was fast enough.  That's why people noticed it (it
> >>>>>> slows things down to an unusable level).  But it's functionally wrong.
> >>>>>>
> >>>>>>> [snip]
> >>>>>>>> diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> >>>>>>>> index fc4c4962a01c..d4ebe915cf46 100644
> >>>>>>>> --- a/libfdt/libfdt.h
> >>>>>>>> +++ b/libfdt/libfdt.h
> >>>>>>>> @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> >>>>>>>>
> >>>>>>>>  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> >>>>>>>>
> >>>>>>>> -/*
> >>>>>>>> - * Alignment helpers:
> >>>>>>>> - *     These helpers access words from a device tree blob.  They're
> >>>>>>>> - *     built to work even with unaligned pointers on platforms (ike
> >>>>>>>> - *     ARM) that don't like unaligned loads and stores
> >>>>>>>> - */
> >>>>>>>> -
> >>>>>>>> -static inline uint32_t fdt32_ld(const fdt32_t *p)
> >>>>>>>> -{
> >>>>>>>> -   const uint8_t *bp = (const uint8_t *)p;
> >>>>>>>> -
> >>>>>>>> -   return ((uint32_t)bp[0] << 24)
> >>>>>>>> -           | ((uint32_t)bp[1] << 16)
> >>>>>>>> -           | ((uint32_t)bp[2] << 8)
> >>>>>>>> -           | bp[3];
> >>>>>>>> -}
> >>>>>>> In particular, I definitely think removing the helpers entirely is a
> >>>>>>> no go.  They're now part of the published interface of the library.
> >>>>>> Perhaps "mistakes were made" ?  I don't think we need to worry about
> >>>>>> removing an interface here as projects are avoiding upgrading libfdt
> >>>>>> now (TF-A, formerly ATF) or reverting the change (U-Boot).
> >>>>>>
> >>>>>>> Even if they're not used for reading the internal tags, they can be
> >>>>>>> used to load integers from within particular properties.  Those are
> >>>>>>> frequently unaligned, since properties generally have packed
> >>>>>>> representations.
> >>>>>> I don't see the relevance.  Go back to the initial problem report.  It's
> >>>>>> not "I have a new unique platform I'm using libfdt on and I have
> >>>>>> problems".  It's "I keep jabbing myself with a rusty nail and now I have
> >>>>>> problems".
> >>>>> The initial report isn't the only relevant thing here.  Although it
> >>>>> prompted the change, it wasn't the only consideration in making it.
> >>>>>
> >>>>> There's also two separate questions here:
> >>>>>   1) Do we want byteswapping integer load helpers?
> >>>>>   2) Should those handle unaligned accesses?
> >>>>>
> >>>>> In working out how to address the (as it turns out, non existent)
> >>>>> problem, I realized an abstraction for loading big-endian integers
> >>>>> from the blob would be a useful thing in its own right.  I also
> >>>>> realized that it's a useful thing not just inside the libfdt code, but
> >>>>> as an external interface.  Callers have always needed to interpret the
> >>>>> contents of individual dt properties, and loading integers from them
> >>>>> is often a part of that.
> >>>>>
> >>>>> I know of people using those fdtXX_ld() helpers right now, so I'm not
> >>>>> going to take them away.
> >>>>>
> >>>>> For the case of external users we absolutely do need to handle
> >>>>> unaligned accesses.  There are a number of existing bindings that mix
> >>>>> strings and integers in packed format, in a single encoded property.
> >>>>> So regardless of the alignment of the whole property, we can easily
> >>>>> get unaligned integers in there, and I don't want to expose multiple
> >>>>> different helpers for different cases.
> >>>>>
> >>>>> Now, we don't *have* to use the same helpers for internal use.  We
> >>>>> could open code the internal loads, or use a special aligned-only
> >>>>> version inside.  But using the existing external helpers is the
> >>>>> obvious and simple choice.
> >>>>>
> >>>>> So, we're back to: I need a case for changing this now, not just a
> >>>>> case for claiming it wasn't needed in the first place.
> >>>> For U-Boot, I'm just going to revert this part of the changes.  I would
> >>> That seems reasonable for the interim.
> >>>
> >>>> suggest that you look in to some way to fix the "fast path" here to go
> >>>> back to the previous way of working so that other project can continue
> >>>> to use libfdt as well and when callers need to access these helpers and
> >>>> are not otherwise in the fast path can do so.
> >>>>
> >>>> You're adding visible boot time delay with things the way they exist
> >>>> today.  That's not OK.
> >>> That's a fair point, but you need to make that case in the commit
> >>> message and merge request, not just expect me to find and collate the
> >>> arguments from other threads.
> >> If you want me to leave the helpers alone but otherwise revert things, I
> >> can do a v2 and expand the commit message.  And perhaps I'm too nice
> >> sometimes then but I do pickup and tweak things that are close enough to
> >> what I want and reword as needed for clarity.
> > Why not just fix the helpers for the aligned case and be done with it:
> >
> > static inline uint32_t fdt32_ld(const fdt32_t *p)
> > {
> >        const uint8_t *bp = (const uint8_t *)p;
> >
> > +       if (!((unsigned long)p & 0x3))
> > +               return fdt32_to_cpu(*p);
> > +
> >        return ((uint32_t)bp[0] << 24)
> >                | ((uint32_t)bp[1] << 16)
> >                | ((uint32_t)bp[2] << 8)
> >                | bp[3];
> > }
>
>
> Here are the results, before and after your proposed patch:
>
> tested tag: V2020.04-RC1
>
> STM32MP> bootstage report
> Timer summary in microseconds (12 records):
>        Mark    Elapsed  Stage
>           0          0  reset
>      84,268     84,268  SPL
>     962,921    878,653  end SPL
>     965,800      2,879  board_init_f
>   4,314,348  3,348,548  board_init_r
>   4,863,763    549,415  id=64
>   4,908,759     44,996  id=65
>   4,909,459        700  main_loop
>   5,322,309    412,850  id=175
>
> Accumulated time:
>                 83,284  dm_r
>                 95,842  dm_spl
>              1,502,020  dm_f
>
>
> -------------------------------------------------------------------------------
>
> tested tag : V2020.04-RC1 + following fdt32_ld patch :
>
> static inline uint32_t fdt32_ld(const fdt32_t *p)
> {
>        const uint8_t *bp = (const uint8_t *)p;
>
> +       if (!((unsigned long)p & 0x3))
> +               return fdt32_to_cpu(*p);
> +
>        return ((uint32_t)bp[0] << 24)
>                | ((uint32_t)bp[1] << 16)
>                | ((uint32_t)bp[2] << 8)
>                | bp[3];
> }
>
>
> STM32MP> bootstage report
> Timer summary in microseconds (12 records):
>        Mark    Elapsed  Stage
>           0          0  reset
>      84,264     84,264  SPL
>     959,300    875,036  end SPL
>     962,192      2,892  board_init_f
>   4,310,598  3,348,406  board_init_r
>   4,860,074    549,476  id=64
>   4,905,119     45,045  id=65
>   4,905,819        700  main_loop
>   5,098,636    192,817  id=175
>
> Accumulated time:
>                 83,202  dm_r
>                 95,252  dm_spl
>              1,501,950  dm_f
>
>
> There is no gain in board_init_r(), the added alignment test is expensive itself.

That's odd. It should be just an AND and a conditional branch added.
Those should be a few cycles compared to tens to hundreds of cycles
for 4 loads instead of 1.

Doesn't u-boot build with -Os typically? Maybe it's not actually inlining.

Rob

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                               ` <CAL_Jsq+43Opib7vUpZ_VxbJig3gT8crBoL6nw_yC=xOfcsbOvA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2020-01-30 20:49                                 ` Tom Rini
  2020-01-31  3:38                                 ` David Gibson
  1 sibling, 0 replies; 26+ messages in thread
From: Tom Rini @ 2020-01-30 20:49 UTC (permalink / raw)
  To: Rob Herring
  Cc: Patrice CHOTARD, David Gibson, Devicetree Compiler, Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 11002 bytes --]

On Thu, Jan 30, 2020 at 02:18:21PM -0600, Rob Herring wrote:
> On Thu, Jan 30, 2020 at 8:44 AM Patrice CHOTARD <patrice.chotard-qxv4g6HH51o@public.gmane.org> wrote:
> >
> > Hi Rob, Tom, David
> >
> > On 1/28/20 3:08 PM, Rob Herring wrote:
> > > On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> > >> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> > >>> On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> > >>>> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > >>>>> On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > >>>>>> On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > >>>>>>> On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > >>>>>>>> 6dcb8ba4 "libfdt: Add helpers for accessing unaligned words" introduced
> > >>>>>>>> changes to support unaligned reads for ARM platforms and 11738cf01f15
> > >>>>>>>> "libfdt: Don't use memcpy to handle unaligned reads on ARM" improved the
> > >>>>>>>> performance of these helpers.
> > >>>>>>>>
> > >>>>>>>> Ultimately however, these helpers should not exist.  Unaligned access
> > >>>>>>>> only occurs when images are forced out of alignment by the user.  This
> > >>>>>>>> unalignment is not supported and introduces problems later on as other
> > >>>>>>>> parts of the system image are unaligned and they too require alignment.
> > >>>>>>>>
> > >>>>>>>> Revert both of these changes.
> > >>>>>>>>
> > >>>>>>>> Signed-off-by: Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
> > >>>>>>>> ---
> > >>>>>>>> By way of a little more explanation, looking at the archives it seems
> > >>>>>>>> that the initial bug reporter said that they had a platform that was
> > >>>>>>>> using U-Boot and had the "fdt_high=0xffffffff" set in the environment.
> > >>>>>>>> What that does is to tell U-Boot to not do any of the sanity checks and
> > >>>>>>>> relocation to ensure alignment that it would normally do because the
> > >>>>>>>> user knows best.  This later came up on the U-Boot list as once the DTB
> > >>>>>>>> was loaded, Linux is unhappy because it demands correct alignment.
> > >>>>>>>>
> > >>>>>>>> I only realized libfdt had introduced changes here when it was reported
> > >>>>>>>> that boot time had gotten much slower once we merged this change in.  It
> > >>>>>>>> would be best to just drop it.
> > >>>>>>> Hmm.  I'm not sure about this.  The commit message makes a case for
> > >>>>>>> why the unaligned handling isn't necessary, but not a case for why
> > >>>>>>> it's bad.  Even if handling an unaligned tree isn't a common case,
> > >>>>>>> isn't it better to be able to than not?
> > >>>>>>>
> > >>>>>>> I gather from the previous discussion that there's a significant
> > >>>>>>> performance impact, but that rationale needs to go into the commit
> > >>>>>>> message for posterity.
> > >>>>>> I wanted to emphasize that the code simply isn't ever needed, not that
> > >>>>>> it's a performance problem.  A performance problem implies that we would
> > >>>>>> keep this, if it was fast enough.  That's why people noticed it (it
> > >>>>>> slows things down to an unusable level).  But it's functionally wrong.
> > >>>>>>
> > >>>>>>> [snip]
> > >>>>>>>> diff --git a/libfdt/libfdt.h b/libfdt/libfdt.h
> > >>>>>>>> index fc4c4962a01c..d4ebe915cf46 100644
> > >>>>>>>> --- a/libfdt/libfdt.h
> > >>>>>>>> +++ b/libfdt/libfdt.h
> > >>>>>>>> @@ -117,23 +117,6 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
> > >>>>>>>>
> > >>>>>>>>  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
> > >>>>>>>>
> > >>>>>>>> -/*
> > >>>>>>>> - * Alignment helpers:
> > >>>>>>>> - *     These helpers access words from a device tree blob.  They're
> > >>>>>>>> - *     built to work even with unaligned pointers on platforms (ike
> > >>>>>>>> - *     ARM) that don't like unaligned loads and stores
> > >>>>>>>> - */
> > >>>>>>>> -
> > >>>>>>>> -static inline uint32_t fdt32_ld(const fdt32_t *p)
> > >>>>>>>> -{
> > >>>>>>>> -   const uint8_t *bp = (const uint8_t *)p;
> > >>>>>>>> -
> > >>>>>>>> -   return ((uint32_t)bp[0] << 24)
> > >>>>>>>> -           | ((uint32_t)bp[1] << 16)
> > >>>>>>>> -           | ((uint32_t)bp[2] << 8)
> > >>>>>>>> -           | bp[3];
> > >>>>>>>> -}
> > >>>>>>> In particular, I definitely think removing the helpers entirely is a
> > >>>>>>> no go.  They're now part of the published interface of the library.
> > >>>>>> Perhaps "mistakes were made" ?  I don't think we need to worry about
> > >>>>>> removing an interface here as projects are avoiding upgrading libfdt
> > >>>>>> now (TF-A, formerly ATF) or reverting the change (U-Boot).
> > >>>>>>
> > >>>>>>> Even if they're not used for reading the internal tags, they can be
> > >>>>>>> used to load integers from within particular properties.  Those are
> > >>>>>>> frequently unaligned, since properties generally have packed
> > >>>>>>> representations.
> > >>>>>> I don't see the relevance.  Go back to the initial problem report.  It's
> > >>>>>> not "I have a new unique platform I'm using libfdt on and I have
> > >>>>>> problems".  It's "I keep jabbing myself with a rusty nail and now I have
> > >>>>>> problems".
> > >>>>> The initial report isn't the only relevant thing here.  Although it
> > >>>>> prompted the change, it wasn't the only consideration in making it.
> > >>>>>
> > >>>>> There's also two separate questions here:
> > >>>>>   1) Do we want byteswapping integer load helpers?
> > >>>>>   2) Should those handle unaligned accesses?
> > >>>>>
> > >>>>> In working out how to address the (as it turns out, non existent)
> > >>>>> problem, I realized an abstraction for loading big-endian integers
> > >>>>> from the blob would be a useful thing in its own right.  I also
> > >>>>> realized that it's a useful thing not just inside the libfdt code, but
> > >>>>> as an external interface.  Callers have always needed to interpret the
> > >>>>> contents of individual dt properties, and loading integers from them
> > >>>>> is often a part of that.
> > >>>>>
> > >>>>> I know of people using those fdtXX_ld() helpers right now, so I'm not
> > >>>>> going to take them away.
> > >>>>>
> > >>>>> For the case of external users we absolutely do need to handle
> > >>>>> unaligned accesses.  There are a number of existing bindings that mix
> > >>>>> strings and integers in packed format, in a single encoded property.
> > >>>>> So regardless of the alignment of the whole property, we can easily
> > >>>>> get unaligned integers in there, and I don't want to expose multiple
> > >>>>> different helpers for different cases.
> > >>>>>
> > >>>>> Now, we don't *have* to use the same helpers for internal use.  We
> > >>>>> could open code the internal loads, or use a special aligned-only
> > >>>>> version inside.  But using the existing external helpers is the
> > >>>>> obvious and simple choice.
> > >>>>>
> > >>>>> So, we're back to: I need a case for changing this now, not just a
> > >>>>> case for claiming it wasn't needed in the first place.
> > >>>> For U-Boot, I'm just going to revert this part of the changes.  I would
> > >>> That seems reasonable for the interim.
> > >>>
> > >>>> suggest that you look in to some way to fix the "fast path" here to go
> > >>>> back to the previous way of working so that other project can continue
> > >>>> to use libfdt as well and when callers need to access these helpers and
> > >>>> are not otherwise in the fast path can do so.
> > >>>>
> > >>>> You're adding visible boot time delay with things the way they exist
> > >>>> today.  That's not OK.
> > >>> That's a fair point, but you need to make that case in the commit
> > >>> message and merge request, not just expect me to find and collate the
> > >>> arguments from other threads.
> > >> If you want me to leave the helpers alone but otherwise revert things, I
> > >> can do a v2 and expand the commit message.  And perhaps I'm too nice
> > >> sometimes then but I do pickup and tweak things that are close enough to
> > >> what I want and reword as needed for clarity.
> > > Why not just fix the helpers for the aligned case and be done with it:
> > >
> > > static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > {
> > >        const uint8_t *bp = (const uint8_t *)p;
> > >
> > > +       if (!((unsigned long)p & 0x3))
> > > +               return fdt32_to_cpu(*p);
> > > +
> > >        return ((uint32_t)bp[0] << 24)
> > >                | ((uint32_t)bp[1] << 16)
> > >                | ((uint32_t)bp[2] << 8)
> > >                | bp[3];
> > > }
> >
> >
> > Here are the results, before and after your proposed patch:
> >
> > tested tag: V2020.04-RC1
> >
> > STM32MP> bootstage report
> > Timer summary in microseconds (12 records):
> >        Mark    Elapsed  Stage
> >           0          0  reset
> >      84,268     84,268  SPL
> >     962,921    878,653  end SPL
> >     965,800      2,879  board_init_f
> >   4,314,348  3,348,548  board_init_r
> >   4,863,763    549,415  id=64
> >   4,908,759     44,996  id=65
> >   4,909,459        700  main_loop
> >   5,322,309    412,850  id=175
> >
> > Accumulated time:
> >                 83,284  dm_r
> >                 95,842  dm_spl
> >              1,502,020  dm_f
> >
> >
> > -------------------------------------------------------------------------------
> >
> > tested tag : V2020.04-RC1 + following fdt32_ld patch :
> >
> > static inline uint32_t fdt32_ld(const fdt32_t *p)
> > {
> >        const uint8_t *bp = (const uint8_t *)p;
> >
> > +       if (!((unsigned long)p & 0x3))
> > +               return fdt32_to_cpu(*p);
> > +
> >        return ((uint32_t)bp[0] << 24)
> >                | ((uint32_t)bp[1] << 16)
> >                | ((uint32_t)bp[2] << 8)
> >                | bp[3];
> > }
> >
> >
> > STM32MP> bootstage report
> > Timer summary in microseconds (12 records):
> >        Mark    Elapsed  Stage
> >           0          0  reset
> >      84,264     84,264  SPL
> >     959,300    875,036  end SPL
> >     962,192      2,892  board_init_f
> >   4,310,598  3,348,406  board_init_r
> >   4,860,074    549,476  id=64
> >   4,905,119     45,045  id=65
> >   4,905,819        700  main_loop
> >   5,098,636    192,817  id=175
> >
> > Accumulated time:
> >                 83,202  dm_r
> >                 95,252  dm_spl
> >              1,501,950  dm_f
> >
> >
> > There is no gain in board_init_r(), the added alignment test is expensive itself.
> 
> That's odd. It should be just an AND and a conditional branch added.
> Those should be a few cycles compared to tens to hundreds of cycles
> for 4 loads instead of 1.
> 
> Doesn't u-boot build with -Os typically? Maybe it's not actually inlining.

Checking everything over, yes, it's been inlined.

-- 
Tom

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                               ` <CAL_Jsq+43Opib7vUpZ_VxbJig3gT8crBoL6nw_yC=xOfcsbOvA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2020-01-30 20:49                                 ` Tom Rini
@ 2020-01-31  3:38                                 ` David Gibson
       [not found]                                   ` <20200131033810.GE15210-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
  1 sibling, 1 reply; 26+ messages in thread
From: David Gibson @ 2020-01-31  3:38 UTC (permalink / raw)
  To: Rob Herring
  Cc: Patrice CHOTARD, Tom Rini, Devicetree Compiler, Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 4102 bytes --]

On Thu, Jan 30, 2020 at 02:18:21PM -0600, Rob Herring wrote:
> On Thu, Jan 30, 2020 at 8:44 AM Patrice CHOTARD <patrice.chotard-qxv4g6HH51o@public.gmane.org> wrote:
> >
> > Hi Rob, Tom, David
> >
> > On 1/28/20 3:08 PM, Rob Herring wrote:
> > > On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> > >> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> > >>> On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> > >>>> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > >>>>> On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > >>>>>> On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > >>>>>>> On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
[snip]
> > > Why not just fix the helpers for the aligned case and be done with it:
> > >
> > > static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > {
> > >        const uint8_t *bp = (const uint8_t *)p;
> > >
> > > +       if (!((unsigned long)p & 0x3))
> > > +               return fdt32_to_cpu(*p);
> > > +
> > >        return ((uint32_t)bp[0] << 24)
> > >                | ((uint32_t)bp[1] << 16)
> > >                | ((uint32_t)bp[2] << 8)
> > >                | bp[3];
> > > }
> >
> >
> > Here are the results, before and after your proposed patch:
> >
> > tested tag: V2020.04-RC1
> >
> > STM32MP> bootstage report
> > Timer summary in microseconds (12 records):
> >        Mark    Elapsed  Stage
> >           0          0  reset
> >      84,268     84,268  SPL
> >     962,921    878,653  end SPL
> >     965,800      2,879  board_init_f
> >   4,314,348  3,348,548  board_init_r
> >   4,863,763    549,415  id=64
> >   4,908,759     44,996  id=65
> >   4,909,459        700  main_loop
> >   5,322,309    412,850  id=175
> >
> > Accumulated time:
> >                 83,284  dm_r
> >                 95,842  dm_spl
> >              1,502,020  dm_f
> >
> >
> > -------------------------------------------------------------------------------
> >
> > tested tag : V2020.04-RC1 + following fdt32_ld patch :
> >
> > static inline uint32_t fdt32_ld(const fdt32_t *p)
> > {
> >        const uint8_t *bp = (const uint8_t *)p;
> >
> > +       if (!((unsigned long)p & 0x3))
> > +               return fdt32_to_cpu(*p);
> > +
> >        return ((uint32_t)bp[0] << 24)
> >                | ((uint32_t)bp[1] << 16)
> >                | ((uint32_t)bp[2] << 8)
> >                | bp[3];
> > }
> >
> >
> > STM32MP> bootstage report
> > Timer summary in microseconds (12 records):
> >        Mark    Elapsed  Stage
> >           0          0  reset
> >      84,264     84,264  SPL
> >     959,300    875,036  end SPL
> >     962,192      2,892  board_init_f
> >   4,310,598  3,348,406  board_init_r
> >   4,860,074    549,476  id=64
> >   4,905,119     45,045  id=65
> >   4,905,819        700  main_loop
> >   5,098,636    192,817  id=175
> >
> > Accumulated time:
> >                 83,202  dm_r
> >                 95,252  dm_spl
> >              1,501,950  dm_f
> >
> >
> > There is no gain in board_init_r(), the added alignment test is expensive itself.

Drat.

> That's odd. It should be just an AND and a conditional branch added.
> Those should be a few cycles compared to tens to hundreds of cycles
> for 4 loads instead of 1.

The later loads are very likely to hit in cache though, right?  Or is
this done before the caches are activated?

> Doesn't u-boot build with -Os typically? Maybe it's not actually
> inlining.

Apparently it is.  And yet... it seems pretty suspicious that the
numbers are so close, despite doing something quite different here.
Can we somehow verify that the fast path is really being executed?

For comparison purposes, what are the numbers if we change these to
unconditionally do an aligned load?

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                                   ` <20200131033810.GE15210-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
@ 2020-01-31  4:15                                     ` Tom Rini
  2020-01-31  8:44                                     ` Patrice CHOTARD
  1 sibling, 0 replies; 26+ messages in thread
From: Tom Rini @ 2020-01-31  4:15 UTC (permalink / raw)
  To: David Gibson
  Cc: Rob Herring, Patrice CHOTARD, Devicetree Compiler, Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 4311 bytes --]

On Fri, Jan 31, 2020 at 02:38:10PM +1100, David Gibson wrote:
> On Thu, Jan 30, 2020 at 02:18:21PM -0600, Rob Herring wrote:
> > On Thu, Jan 30, 2020 at 8:44 AM Patrice CHOTARD <patrice.chotard-qxv4g6HH51o@public.gmane.org> wrote:
> > >
> > > Hi Rob, Tom, David
> > >
> > > On 1/28/20 3:08 PM, Rob Herring wrote:
> > > > On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> > > >> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> > > >>> On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> > > >>>> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> > > >>>>> On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> > > >>>>>> On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> > > >>>>>>> On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> [snip]
> > > > Why not just fix the helpers for the aligned case and be done with it:
> > > >
> > > > static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > > {
> > > >        const uint8_t *bp = (const uint8_t *)p;
> > > >
> > > > +       if (!((unsigned long)p & 0x3))
> > > > +               return fdt32_to_cpu(*p);
> > > > +
> > > >        return ((uint32_t)bp[0] << 24)
> > > >                | ((uint32_t)bp[1] << 16)
> > > >                | ((uint32_t)bp[2] << 8)
> > > >                | bp[3];
> > > > }
> > >
> > >
> > > Here are the results, before and after your proposed patch:
> > >
> > > tested tag: V2020.04-RC1
> > >
> > > STM32MP> bootstage report
> > > Timer summary in microseconds (12 records):
> > >        Mark    Elapsed  Stage
> > >           0          0  reset
> > >      84,268     84,268  SPL
> > >     962,921    878,653  end SPL
> > >     965,800      2,879  board_init_f
> > >   4,314,348  3,348,548  board_init_r
> > >   4,863,763    549,415  id=64
> > >   4,908,759     44,996  id=65
> > >   4,909,459        700  main_loop
> > >   5,322,309    412,850  id=175
> > >
> > > Accumulated time:
> > >                 83,284  dm_r
> > >                 95,842  dm_spl
> > >              1,502,020  dm_f
> > >
> > >
> > > -------------------------------------------------------------------------------
> > >
> > > tested tag : V2020.04-RC1 + following fdt32_ld patch :
> > >
> > > static inline uint32_t fdt32_ld(const fdt32_t *p)
> > > {
> > >        const uint8_t *bp = (const uint8_t *)p;
> > >
> > > +       if (!((unsigned long)p & 0x3))
> > > +               return fdt32_to_cpu(*p);
> > > +
> > >        return ((uint32_t)bp[0] << 24)
> > >                | ((uint32_t)bp[1] << 16)
> > >                | ((uint32_t)bp[2] << 8)
> > >                | bp[3];
> > > }
> > >
> > >
> > > STM32MP> bootstage report
> > > Timer summary in microseconds (12 records):
> > >        Mark    Elapsed  Stage
> > >           0          0  reset
> > >      84,264     84,264  SPL
> > >     959,300    875,036  end SPL
> > >     962,192      2,892  board_init_f
> > >   4,310,598  3,348,406  board_init_r
> > >   4,860,074    549,476  id=64
> > >   4,905,119     45,045  id=65
> > >   4,905,819        700  main_loop
> > >   5,098,636    192,817  id=175
> > >
> > > Accumulated time:
> > >                 83,202  dm_r
> > >                 95,252  dm_spl
> > >              1,501,950  dm_f
> > >
> > >
> > > There is no gain in board_init_r(), the added alignment test is expensive itself.
> 
> Drat.
> 
> > That's odd. It should be just an AND and a conditional branch added.
> > Those should be a few cycles compared to tens to hundreds of cycles
> > for 4 loads instead of 1.
> 
> The later loads are very likely to hit in cache though, right?  Or is
> this done before the caches are activated?
> 
> > Doesn't u-boot build with -Os typically? Maybe it's not actually
> > inlining.
> 
> Apparently it is.  And yet... it seems pretty suspicious that the
> numbers are so close, despite doing something quite different here.
> Can we somehow verify that the fast path is really being executed?
> 
> For comparison purposes, what are the numbers if we change these to
> unconditionally do an aligned load?

The before / after numbers can be seen at
https://lists.denx.de/pipermail/u-boot/2020-January/396707.html

-- 
Tom

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                                   ` <20200131033810.GE15210-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
  2020-01-31  4:15                                     ` Tom Rini
@ 2020-01-31  8:44                                     ` Patrice CHOTARD
       [not found]                                       ` <7bbbb5a8-8e35-e95c-8d03-6970fd6b023e-qxv4g6HH51o@public.gmane.org>
  1 sibling, 1 reply; 26+ messages in thread
From: Patrice CHOTARD @ 2020-01-31  8:44 UTC (permalink / raw)
  To: David Gibson, Rob Herring; +Cc: Tom Rini, Devicetree Compiler, Patrick DELAUNAY

Hi

On 1/31/20 4:38 AM, David Gibson wrote:
> On Thu, Jan 30, 2020 at 02:18:21PM -0600, Rob Herring wrote:
>> On Thu, Jan 30, 2020 at 8:44 AM Patrice CHOTARD <patrice.chotard@st.com> wrote:
>>> Hi Rob, Tom, David
>>>
>>> On 1/28/20 3:08 PM, Rob Herring wrote:
>>>> On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini@konsulko.com> wrote:
>>>>> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
>>>>>> On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
>>>>>>> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
>>>>>>>> On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
>>>>>>>>> On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
>>>>>>>>>> On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> [snip]
>>>> Why not just fix the helpers for the aligned case and be done with it:
>>>>
>>>> static inline uint32_t fdt32_ld(const fdt32_t *p)
>>>> {
>>>>        const uint8_t *bp = (const uint8_t *)p;
>>>>
>>>> +       if (!((unsigned long)p & 0x3))
>>>> +               return fdt32_to_cpu(*p);
>>>> +
>>>>        return ((uint32_t)bp[0] << 24)
>>>>                | ((uint32_t)bp[1] << 16)
>>>>                | ((uint32_t)bp[2] << 8)
>>>>                | bp[3];
>>>> }
>>>
>>> Here are the results, before and after your proposed patch:
>>>
>>> tested tag: V2020.04-RC1
>>>
>>> STM32MP> bootstage report
>>> Timer summary in microseconds (12 records):
>>>        Mark    Elapsed  Stage
>>>           0          0  reset
>>>      84,268     84,268  SPL
>>>     962,921    878,653  end SPL
>>>     965,800      2,879  board_init_f
>>>   4,314,348  3,348,548  board_init_r
>>>   4,863,763    549,415  id=64
>>>   4,908,759     44,996  id=65
>>>   4,909,459        700  main_loop
>>>   5,322,309    412,850  id=175
>>>
>>> Accumulated time:
>>>                 83,284  dm_r
>>>                 95,842  dm_spl
>>>              1,502,020  dm_f
>>>
>>>
>>> -------------------------------------------------------------------------------
>>>
>>> tested tag : V2020.04-RC1 + following fdt32_ld patch :
>>>
>>> static inline uint32_t fdt32_ld(const fdt32_t *p)
>>> {
>>>        const uint8_t *bp = (const uint8_t *)p;
>>>
>>> +       if (!((unsigned long)p & 0x3))
>>> +               return fdt32_to_cpu(*p);
>>> +
>>>        return ((uint32_t)bp[0] << 24)
>>>                | ((uint32_t)bp[1] << 16)
>>>                | ((uint32_t)bp[2] << 8)
>>>                | bp[3];
>>> }
>>>
>>>
>>> STM32MP> bootstage report
>>> Timer summary in microseconds (12 records):
>>>        Mark    Elapsed  Stage
>>>           0          0  reset
>>>      84,264     84,264  SPL
>>>     959,300    875,036  end SPL
>>>     962,192      2,892  board_init_f
>>>   4,310,598  3,348,406  board_init_r
>>>   4,860,074    549,476  id=64
>>>   4,905,119     45,045  id=65
>>>   4,905,819        700  main_loop
>>>   5,098,636    192,817  id=175
>>>
>>> Accumulated time:
>>>                 83,202  dm_r
>>>                 95,252  dm_spl
>>>              1,501,950  dm_f
>>>
>>>
>>> There is no gain in board_init_r(), the added alignment test is expensive itself.
> Drat.
>
>> That's odd. It should be just an AND and a conditional branch added.
>> Those should be a few cycles compared to tens to hundreds of cycles
>> for 4 loads instead of 1.
> The later loads are very likely to hit in cache though, right?  Or is
> this done before the caches are activated?
>
>> Doesn't u-boot build with -Os typically? Maybe it's not actually
>> inlining.
> Apparently it is.  And yet... it seems pretty suspicious that the
> numbers are so close, despite doing something quite different here.
> Can we somehow verify that the fast path is really being executed?
>
> For comparison purposes, what are the numbers if we change these to
> unconditionally do an aligned load?
>
I made one more test to be 100% sure and the result is weird .... 

static inline uint32_t fdt32_ld(const fdt32_t *p)
{

       if (!((unsigned long)p & 0x3))
             return fdt32_to_cpu(*p);

       while (1) {};
}

the results are back to 'normal', board_init_r() execution is 1.9 second  faster .....

STM32MP> bootstage report
Timer summary in microseconds (12 records):
       Mark    Elapsed  Stage
          0          0  reset
     84,265     84,265  SPL
    868,330    784,065  end SPL
    871,163      2,833  board_init_f
  2,360,135  1,488,972  board_init_r
  2,786,920    426,785  id=64
  2,818,189     31,269  id=65
  2,818,889        700  main_loop
  2,877,661     58,772  id=175

Accumulated time:
                54,489  dm_r
                56,778  dm_spl
               644,883  dm_f

Is the compiler doing some optimization link to the CPU pipeline prediction or something else ?

Patrice

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                                       ` <7bbbb5a8-8e35-e95c-8d03-6970fd6b023e-qxv4g6HH51o@public.gmane.org>
@ 2020-02-02  6:20                                         ` David Gibson
       [not found]                                           ` <20200202062031.GB30687-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: David Gibson @ 2020-02-02  6:20 UTC (permalink / raw)
  To: Patrice CHOTARD
  Cc: Rob Herring, Tom Rini, Devicetree Compiler, Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 5622 bytes --]

On Fri, Jan 31, 2020 at 08:44:41AM +0000, Patrice CHOTARD wrote:
> Hi
> 
> On 1/31/20 4:38 AM, David Gibson wrote:
> > On Thu, Jan 30, 2020 at 02:18:21PM -0600, Rob Herring wrote:
> >> On Thu, Jan 30, 2020 at 8:44 AM Patrice CHOTARD <patrice.chotard-lpHj6iFQ3dU@public.gmane.orgm> wrote:
> >>> Hi Rob, Tom, David
> >>>
> >>> On 1/28/20 3:08 PM, Rob Herring wrote:
> >>>> On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> >>>>> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> >>>>>> On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> >>>>>>> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> >>>>>>>> On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> >>>>>>>>> On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> >>>>>>>>>> On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> > [snip]
> >>>> Why not just fix the helpers for the aligned case and be done with it:
> >>>>
> >>>> static inline uint32_t fdt32_ld(const fdt32_t *p)
> >>>> {
> >>>>        const uint8_t *bp = (const uint8_t *)p;
> >>>>
> >>>> +       if (!((unsigned long)p & 0x3))
> >>>> +               return fdt32_to_cpu(*p);
> >>>> +
> >>>>        return ((uint32_t)bp[0] << 24)
> >>>>                | ((uint32_t)bp[1] << 16)
> >>>>                | ((uint32_t)bp[2] << 8)
> >>>>                | bp[3];
> >>>> }
> >>>
> >>> Here are the results, before and after your proposed patch:
> >>>
> >>> tested tag: V2020.04-RC1
> >>>
> >>> STM32MP> bootstage report
> >>> Timer summary in microseconds (12 records):
> >>>        Mark    Elapsed  Stage
> >>>           0          0  reset
> >>>      84,268     84,268  SPL
> >>>     962,921    878,653  end SPL
> >>>     965,800      2,879  board_init_f
> >>>   4,314,348  3,348,548  board_init_r
> >>>   4,863,763    549,415  id=64
> >>>   4,908,759     44,996  id=65
> >>>   4,909,459        700  main_loop
> >>>   5,322,309    412,850  id=175
> >>>
> >>> Accumulated time:
> >>>                 83,284  dm_r
> >>>                 95,842  dm_spl
> >>>              1,502,020  dm_f
> >>>
> >>>
> >>> -------------------------------------------------------------------------------
> >>>
> >>> tested tag : V2020.04-RC1 + following fdt32_ld patch :
> >>>
> >>> static inline uint32_t fdt32_ld(const fdt32_t *p)
> >>> {
> >>>        const uint8_t *bp = (const uint8_t *)p;
> >>>
> >>> +       if (!((unsigned long)p & 0x3))
> >>> +               return fdt32_to_cpu(*p);
> >>> +
> >>>        return ((uint32_t)bp[0] << 24)
> >>>                | ((uint32_t)bp[1] << 16)
> >>>                | ((uint32_t)bp[2] << 8)
> >>>                | bp[3];
> >>> }
> >>>
> >>>
> >>> STM32MP> bootstage report
> >>> Timer summary in microseconds (12 records):
> >>>        Mark    Elapsed  Stage
> >>>           0          0  reset
> >>>      84,264     84,264  SPL
> >>>     959,300    875,036  end SPL
> >>>     962,192      2,892  board_init_f
> >>>   4,310,598  3,348,406  board_init_r
> >>>   4,860,074    549,476  id=64
> >>>   4,905,119     45,045  id=65
> >>>   4,905,819        700  main_loop
> >>>   5,098,636    192,817  id=175
> >>>
> >>> Accumulated time:
> >>>                 83,202  dm_r
> >>>                 95,252  dm_spl
> >>>              1,501,950  dm_f
> >>>
> >>>
> >>> There is no gain in board_init_r(), the added alignment test is expensive itself.
> > Drat.
> >
> >> That's odd. It should be just an AND and a conditional branch added.
> >> Those should be a few cycles compared to tens to hundreds of cycles
> >> for 4 loads instead of 1.
> > The later loads are very likely to hit in cache though, right?  Or is
> > this done before the caches are activated?
> >
> >> Doesn't u-boot build with -Os typically? Maybe it's not actually
> >> inlining.
> > Apparently it is.  And yet... it seems pretty suspicious that the
> > numbers are so close, despite doing something quite different here.
> > Can we somehow verify that the fast path is really being executed?
> >
> > For comparison purposes, what are the numbers if we change these to
> > unconditionally do an aligned load?
> >
> I made one more test to be 100% sure and the result is weird .... 
> 
> static inline uint32_t fdt32_ld(const fdt32_t *p)
> {
> 
>        if (!((unsigned long)p & 0x3))
>              return fdt32_to_cpu(*p);
> 
>        while (1) {};
> }
> 
> the results are back to 'normal', board_init_r() execution is 1.9 second  faster .....

Huh.  I dunno how, but it definitely seems like the conditional
version wasn't operating properly.

Does putting a "likely" branch prediction on that if make any difference?

> STM32MP> bootstage report
> Timer summary in microseconds (12 records):
>        Mark    Elapsed  Stage
>           0          0  reset
>      84,265     84,265  SPL
>     868,330    784,065  end SPL
>     871,163      2,833  board_init_f
>   2,360,135  1,488,972  board_init_r
>   2,786,920    426,785  id=64
>   2,818,189     31,269  id=65
>   2,818,889        700  main_loop
>   2,877,661     58,772  id=175
> 
> Accumulated time:
>                 54,489  dm_r
>                 56,778  dm_spl
>                644,883  dm_f
> 
> Is the compiler doing some optimization link to the CPU pipeline prediction or something else ?
> 
> Patrice

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                                           ` <20200202062031.GB30687-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
@ 2020-02-03 13:14                                             ` Patrice CHOTARD
       [not found]                                               ` <1f4881a0-8d52-c280-3376-d66943d489a1-qxv4g6HH51o@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Patrice CHOTARD @ 2020-02-03 13:14 UTC (permalink / raw)
  To: David Gibson; +Cc: Rob Herring, Tom Rini, Devicetree Compiler, Patrick DELAUNAY

Hi David

On 2/2/20 7:20 AM, David Gibson wrote:
> On Fri, Jan 31, 2020 at 08:44:41AM +0000, Patrice CHOTARD wrote:
>> Hi
>>
>> On 1/31/20 4:38 AM, David Gibson wrote:
>>> On Thu, Jan 30, 2020 at 02:18:21PM -0600, Rob Herring wrote:
>>>> On Thu, Jan 30, 2020 at 8:44 AM Patrice CHOTARD <patrice.chotard@st.com> wrote:
>>>>> Hi Rob, Tom, David
>>>>>
>>>>> On 1/28/20 3:08 PM, Rob Herring wrote:
>>>>>> On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini@konsulko.com> wrote:
>>>>>>> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
>>>>>>>> On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
>>>>>>>>> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
>>>>>>>>>> On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
>>>>>>>>>>> On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
>>>>>>>>>>>> On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
>>> [snip]
>>>>>> Why not just fix the helpers for the aligned case and be done with it:
>>>>>>
>>>>>> static inline uint32_t fdt32_ld(const fdt32_t *p)
>>>>>> {
>>>>>>        const uint8_t *bp = (const uint8_t *)p;
>>>>>>
>>>>>> +       if (!((unsigned long)p & 0x3))
>>>>>> +               return fdt32_to_cpu(*p);
>>>>>> +
>>>>>>        return ((uint32_t)bp[0] << 24)
>>>>>>                | ((uint32_t)bp[1] << 16)
>>>>>>                | ((uint32_t)bp[2] << 8)
>>>>>>                | bp[3];
>>>>>> }
>>>>> Here are the results, before and after your proposed patch:
>>>>>
>>>>> tested tag: V2020.04-RC1
>>>>>
>>>>> STM32MP> bootstage report
>>>>> Timer summary in microseconds (12 records):
>>>>>        Mark    Elapsed  Stage
>>>>>           0          0  reset
>>>>>      84,268     84,268  SPL
>>>>>     962,921    878,653  end SPL
>>>>>     965,800      2,879  board_init_f
>>>>>   4,314,348  3,348,548  board_init_r
>>>>>   4,863,763    549,415  id=64
>>>>>   4,908,759     44,996  id=65
>>>>>   4,909,459        700  main_loop
>>>>>   5,322,309    412,850  id=175
>>>>>
>>>>> Accumulated time:
>>>>>                 83,284  dm_r
>>>>>                 95,842  dm_spl
>>>>>              1,502,020  dm_f
>>>>>
>>>>>
>>>>> -------------------------------------------------------------------------------
>>>>>
>>>>> tested tag : V2020.04-RC1 + following fdt32_ld patch :
>>>>>
>>>>> static inline uint32_t fdt32_ld(const fdt32_t *p)
>>>>> {
>>>>>        const uint8_t *bp = (const uint8_t *)p;
>>>>>
>>>>> +       if (!((unsigned long)p & 0x3))
>>>>> +               return fdt32_to_cpu(*p);
>>>>> +
>>>>>        return ((uint32_t)bp[0] << 24)
>>>>>                | ((uint32_t)bp[1] << 16)
>>>>>                | ((uint32_t)bp[2] << 8)
>>>>>                | bp[3];
>>>>> }
>>>>>
>>>>>
>>>>> STM32MP> bootstage report
>>>>> Timer summary in microseconds (12 records):
>>>>>        Mark    Elapsed  Stage
>>>>>           0          0  reset
>>>>>      84,264     84,264  SPL
>>>>>     959,300    875,036  end SPL
>>>>>     962,192      2,892  board_init_f
>>>>>   4,310,598  3,348,406  board_init_r
>>>>>   4,860,074    549,476  id=64
>>>>>   4,905,119     45,045  id=65
>>>>>   4,905,819        700  main_loop
>>>>>   5,098,636    192,817  id=175
>>>>>
>>>>> Accumulated time:
>>>>>                 83,202  dm_r
>>>>>                 95,252  dm_spl
>>>>>              1,501,950  dm_f
>>>>>
>>>>>
>>>>> There is no gain in board_init_r(), the added alignment test is expensive itself.
>>> Drat.
>>>
>>>> That's odd. It should be just an AND and a conditional branch added.
>>>> Those should be a few cycles compared to tens to hundreds of cycles
>>>> for 4 loads instead of 1.
>>> The later loads are very likely to hit in cache though, right?  Or is
>>> this done before the caches are activated?
>>>
>>>> Doesn't u-boot build with -Os typically? Maybe it's not actually
>>>> inlining.
>>> Apparently it is.  And yet... it seems pretty suspicious that the
>>> numbers are so close, despite doing something quite different here.
>>> Can we somehow verify that the fast path is really being executed?
>>>
>>> For comparison purposes, what are the numbers if we change these to
>>> unconditionally do an aligned load?
>>>
>> I made one more test to be 100% sure and the result is weird .... 
>>
>> static inline uint32_t fdt32_ld(const fdt32_t *p)
>> {
>>
>>        if (!((unsigned long)p & 0x3))
>>              return fdt32_to_cpu(*p);
>>
>>        while (1) {};
>> }
>>
>> the results are back to 'normal', board_init_r() execution is 1.9 second  faster .....
> Huh.  I dunno how, but it definitely seems like the conditional
> version wasn't operating properly.
>
> Does putting a "likely" branch prediction on that if make any difference?

I already did this test by adding a "likely" but it has no effect :-(

Patrice

>
>> STM32MP> bootstage report
>> Timer summary in microseconds (12 records):
>>        Mark    Elapsed  Stage
>>           0          0  reset
>>      84,265     84,265  SPL
>>     868,330    784,065  end SPL
>>     871,163      2,833  board_init_f
>>   2,360,135  1,488,972  board_init_r
>>   2,786,920    426,785  id=64
>>   2,818,189     31,269  id=65
>>   2,818,889        700  main_loop
>>   2,877,661     58,772  id=175
>>
>> Accumulated time:
>>                 54,489  dm_r
>>                 56,778  dm_spl
>>                644,883  dm_f
>>
>> Is the compiler doing some optimization link to the CPU pipeline prediction or something else ?
>>
>> Patrice

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] libfdt: Remove special handling for unaligned reads
       [not found]                                               ` <1f4881a0-8d52-c280-3376-d66943d489a1-qxv4g6HH51o@public.gmane.org>
@ 2020-02-07  5:23                                                 ` David Gibson
  0 siblings, 0 replies; 26+ messages in thread
From: David Gibson @ 2020-02-07  5:23 UTC (permalink / raw)
  To: Patrice CHOTARD
  Cc: Rob Herring, Tom Rini, Devicetree Compiler, Patrick DELAUNAY

[-- Attachment #1: Type: text/plain, Size: 5526 bytes --]

On Mon, Feb 03, 2020 at 01:14:15PM +0000, Patrice CHOTARD wrote:
> Hi David
> 
> On 2/2/20 7:20 AM, David Gibson wrote:
> > On Fri, Jan 31, 2020 at 08:44:41AM +0000, Patrice CHOTARD wrote:
> >> Hi
> >>
> >> On 1/31/20 4:38 AM, David Gibson wrote:
> >>> On Thu, Jan 30, 2020 at 02:18:21PM -0600, Rob Herring wrote:
> >>>> On Thu, Jan 30, 2020 at 8:44 AM Patrice CHOTARD <patrice.chotard@st.com> wrote:
> >>>>> Hi Rob, Tom, David
> >>>>>
> >>>>> On 1/28/20 3:08 PM, Rob Herring wrote:
> >>>>>> On Tue, Jan 28, 2020 at 7:43 AM Tom Rini <trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org> wrote:
> >>>>>>> On Tue, Jan 28, 2020 at 07:59:18PM +1100, David Gibson wrote:
> >>>>>>>> On Mon, Jan 27, 2020 at 10:04:34AM -0500, Tom Rini wrote:
> >>>>>>>>> On Mon, Jan 27, 2020 at 02:23:51PM +1100, David Gibson wrote:
> >>>>>>>>>> On Thu, Jan 23, 2020 at 07:16:50AM -0500, Tom Rini wrote:
> >>>>>>>>>>> On Thu, Jan 23, 2020 at 08:14:40PM +1100, David Gibson wrote:
> >>>>>>>>>>>> On Fri, Jan 17, 2020 at 10:31:06AM -0500, Tom Rini wrote:
> >>> [snip]
> >>>>>> Why not just fix the helpers for the aligned case and be done with it:
> >>>>>>
> >>>>>> static inline uint32_t fdt32_ld(const fdt32_t *p)
> >>>>>> {
> >>>>>>        const uint8_t *bp = (const uint8_t *)p;
> >>>>>>
> >>>>>> +       if (!((unsigned long)p & 0x3))
> >>>>>> +               return fdt32_to_cpu(*p);
> >>>>>> +
> >>>>>>        return ((uint32_t)bp[0] << 24)
> >>>>>>                | ((uint32_t)bp[1] << 16)
> >>>>>>                | ((uint32_t)bp[2] << 8)
> >>>>>>                | bp[3];
> >>>>>> }
> >>>>> Here are the results, before and after your proposed patch:
> >>>>>
> >>>>> tested tag: V2020.04-RC1
> >>>>>
> >>>>> STM32MP> bootstage report
> >>>>> Timer summary in microseconds (12 records):
> >>>>>        Mark    Elapsed  Stage
> >>>>>           0          0  reset
> >>>>>      84,268     84,268  SPL
> >>>>>     962,921    878,653  end SPL
> >>>>>     965,800      2,879  board_init_f
> >>>>>   4,314,348  3,348,548  board_init_r
> >>>>>   4,863,763    549,415  id=64
> >>>>>   4,908,759     44,996  id=65
> >>>>>   4,909,459        700  main_loop
> >>>>>   5,322,309    412,850  id=175
> >>>>>
> >>>>> Accumulated time:
> >>>>>                 83,284  dm_r
> >>>>>                 95,842  dm_spl
> >>>>>              1,502,020  dm_f
> >>>>>
> >>>>>
> >>>>> -------------------------------------------------------------------------------
> >>>>>
> >>>>> tested tag : V2020.04-RC1 + following fdt32_ld patch :
> >>>>>
> >>>>> static inline uint32_t fdt32_ld(const fdt32_t *p)
> >>>>> {
> >>>>>        const uint8_t *bp = (const uint8_t *)p;
> >>>>>
> >>>>> +       if (!((unsigned long)p & 0x3))
> >>>>> +               return fdt32_to_cpu(*p);
> >>>>> +
> >>>>>        return ((uint32_t)bp[0] << 24)
> >>>>>                | ((uint32_t)bp[1] << 16)
> >>>>>                | ((uint32_t)bp[2] << 8)
> >>>>>                | bp[3];
> >>>>> }
> >>>>>
> >>>>>
> >>>>> STM32MP> bootstage report
> >>>>> Timer summary in microseconds (12 records):
> >>>>>        Mark    Elapsed  Stage
> >>>>>           0          0  reset
> >>>>>      84,264     84,264  SPL
> >>>>>     959,300    875,036  end SPL
> >>>>>     962,192      2,892  board_init_f
> >>>>>   4,310,598  3,348,406  board_init_r
> >>>>>   4,860,074    549,476  id=64
> >>>>>   4,905,119     45,045  id=65
> >>>>>   4,905,819        700  main_loop
> >>>>>   5,098,636    192,817  id=175
> >>>>>
> >>>>> Accumulated time:
> >>>>>                 83,202  dm_r
> >>>>>                 95,252  dm_spl
> >>>>>              1,501,950  dm_f
> >>>>>
> >>>>>
> >>>>> There is no gain in board_init_r(), the added alignment test is expensive itself.
> >>> Drat.
> >>>
> >>>> That's odd. It should be just an AND and a conditional branch added.
> >>>> Those should be a few cycles compared to tens to hundreds of cycles
> >>>> for 4 loads instead of 1.
> >>> The later loads are very likely to hit in cache though, right?  Or is
> >>> this done before the caches are activated?
> >>>
> >>>> Doesn't u-boot build with -Os typically? Maybe it's not actually
> >>>> inlining.
> >>> Apparently it is.  And yet... it seems pretty suspicious that the
> >>> numbers are so close, despite doing something quite different here.
> >>> Can we somehow verify that the fast path is really being executed?
> >>>
> >>> For comparison purposes, what are the numbers if we change these to
> >>> unconditionally do an aligned load?
> >>>
> >> I made one more test to be 100% sure and the result is weird .... 
> >>
> >> static inline uint32_t fdt32_ld(const fdt32_t *p)
> >> {
> >>
> >>        if (!((unsigned long)p & 0x3))
> >>              return fdt32_to_cpu(*p);
> >>
> >>        while (1) {};
> >> }
> >>
> >> the results are back to 'normal', board_init_r() execution is 1.9 second  faster .....
> > Huh.  I dunno how, but it definitely seems like the conditional
> > version wasn't operating properly.
> >
> > Does putting a "likely" branch prediction on that if make any difference?
> 
> I already did this test by adding a "likely" but it has no effect
> :-(

Interesting.  I think someone who knows ARM asm needs to look at the
compiler output for the two cases to see what's going on.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2020-02-07  5:23 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-17 15:31 [PATCH] libfdt: Remove special handling for unaligned reads Tom Rini
     [not found] ` <20200117153106.29909-1-trini-OWPKS81ov/FWk0Htik3J/w@public.gmane.org>
2020-01-18  0:32   ` Simon Glass
2020-01-23  9:14   ` David Gibson
     [not found]     ` <20200123091440.GQ2347-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
2020-01-23 12:16       ` Tom Rini
2020-01-27  3:23         ` David Gibson
     [not found]           ` <20200127032351.GA1818-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
2020-01-27 15:04             ` Tom Rini
2020-01-27 19:18               ` Simon Glass
2020-01-28  8:59               ` David Gibson
     [not found]                 ` <20200128085918.GO42099-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
2020-01-28 13:43                   ` Tom Rini
2020-01-28 14:08                     ` Rob Herring
     [not found]                       ` <CAL_JsqJ0Pp8e4iFOKuKESO4DJ0ky2aO+GB_HDiGcNhY1qBxgfw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2020-01-28 14:51                         ` Tom Rini
2020-01-29  1:52                           ` David Gibson
2020-01-29  1:49                         ` David Gibson
2020-01-29  2:15                         ` Ian Lepore
     [not found]                           ` <b5e2288c5b06419fbfd057b8895ade5c5493cde5.camel-h+KGxgPPiopAfugRpC6u6w@public.gmane.org>
2020-01-29 16:05                             ` Rob Herring
     [not found]                               ` <CAL_JsqLYobRrLtF9z3eMt=YneGuxDpvpKU7DQruza-jq4n6B3Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2020-01-29 16:08                                 ` Ian Lepore
2020-01-30 14:44                         ` Patrice CHOTARD
     [not found]                           ` <c56c4206-1be1-1701-1423-1f74f6913bbf-qxv4g6HH51o@public.gmane.org>
2020-01-30 20:18                             ` Rob Herring
     [not found]                               ` <CAL_Jsq+43Opib7vUpZ_VxbJig3gT8crBoL6nw_yC=xOfcsbOvA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2020-01-30 20:49                                 ` Tom Rini
2020-01-31  3:38                                 ` David Gibson
     [not found]                                   ` <20200131033810.GE15210-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
2020-01-31  4:15                                     ` Tom Rini
2020-01-31  8:44                                     ` Patrice CHOTARD
     [not found]                                       ` <7bbbb5a8-8e35-e95c-8d03-6970fd6b023e-qxv4g6HH51o@public.gmane.org>
2020-02-02  6:20                                         ` David Gibson
     [not found]                                           ` <20200202062031.GB30687-K0bRW+63XPQe6aEkudXLsA@public.gmane.org>
2020-02-03 13:14                                             ` Patrice CHOTARD
     [not found]                                               ` <1f4881a0-8d52-c280-3376-d66943d489a1-qxv4g6HH51o@public.gmane.org>
2020-02-07  5:23                                                 ` David Gibson
2020-01-29  1:29                     ` David Gibson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.