All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH 1/2] buildhistory: simplify buildhistory_list_files()
@ 2019-01-06 18:13 Jacob Kroon
  2019-01-06 18:13 ` [RFC][PATCH 2/2] buildhistory: support generating md5sum of files Jacob Kroon
  0 siblings, 1 reply; 12+ messages in thread
From: Jacob Kroon @ 2019-01-06 18:13 UTC (permalink / raw)
  To: openembedded-core

Avoid duplicating shell code for the two cases, fakeroot/non-fakeroot.

Signed-off-by: Jacob Kroon <jacob.kroon@gmail.com>
---
 meta/classes/buildhistory.bbclass | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/meta/classes/buildhistory.bbclass b/meta/classes/buildhistory.bbclass
index d1f3e6aa82..33eb1b00f6 100644
--- a/meta/classes/buildhistory.bbclass
+++ b/meta/classes/buildhistory.bbclass
@@ -519,12 +519,14 @@ buildhistory_get_sdk_installed_target() {
 
 buildhistory_list_files() {
 	# List the files in the specified directory, but exclude date/time etc.
-	# This awk script is somewhat messy, but handles where the size is not printed for device files under pseudo
+	# This is somewhat messy, but handles where the size is not printed for device files under pseudo
+	( cd $1
+	find_cmd='find . ! -path . -printf "%M %-10u %-10g %10s %p -> %l\n"'
 	if [ "$3" = "fakeroot" ] ; then
-		( cd $1 && ${FAKEROOTENV} ${FAKEROOTCMD} find . ! -path . -printf "%M %-10u %-10g %10s %p -> %l\n" | sort -k5 | sed 's/ * -> $//' > $2 )
+		eval ${FAKEROOTENV} ${FAKEROOTCMD} $find_cmd
 	else
-		( cd $1 && find . ! -path . -printf "%M %-10u %-10g %10s %p -> %l\n" | sort -k5 | sed 's/ * -> $//' > $2 )
-	fi
+		eval $find_cmd
+	fi | sort -k5 | sed 's/ * -> $//' > $2 )
 }
 
 buildhistory_list_pkg_files() {
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-06 18:13 [RFC][PATCH 1/2] buildhistory: simplify buildhistory_list_files() Jacob Kroon
@ 2019-01-06 18:13 ` Jacob Kroon
  2019-01-06 23:08   ` André Draszik
                     ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Jacob Kroon @ 2019-01-06 18:13 UTC (permalink / raw)
  To: openembedded-core

Introduce 'md5' in BUILDHISTORY_FEATURES and enable it by default
when doing reproducible builds.

When enabled this will additionally create:

  files-in-package-md5.txt
  files-in-image-md5.txt
  files-in-sdk-md5.txt

containing the md5 checksums of regular files.

Signed-off-by: Jacob Kroon <jacob.kroon@gmail.com>
---
 meta/classes/buildhistory.bbclass | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/meta/classes/buildhistory.bbclass b/meta/classes/buildhistory.bbclass
index 33eb1b00f6..00f0701dec 100644
--- a/meta/classes/buildhistory.bbclass
+++ b/meta/classes/buildhistory.bbclass
@@ -7,7 +7,8 @@
 # Copyright (C) 2007-2011 Koen Kooi <koen@openembedded.org>
 #
 
-BUILDHISTORY_FEATURES ?= "image package sdk"
+BUILDHISTORY_FEATURES ?= "image package sdk \
+  ${@ "md5" if bb.utils.to_boolean(d.getVar('BUILD_REPRODUCIBLE_BINARIES')) else ""}"
 BUILDHISTORY_DIR ?= "${TOPDIR}/buildhistory"
 BUILDHISTORY_DIR_IMAGE = "${BUILDHISTORY_DIR}/images/${MACHINE_ARCH}/${TCLIBC}/${IMAGE_BASENAME}"
 BUILDHISTORY_DIR_PACKAGE = "${BUILDHISTORY_DIR}/packages/${MULTIMACH_TARGET_SYS}/${PN}"
@@ -526,7 +527,12 @@ buildhistory_list_files() {
 		eval ${FAKEROOTENV} ${FAKEROOTCMD} $find_cmd
 	else
 		eval $find_cmd
-	fi | sort -k5 | sed 's/ * -> $//' > $2 )
+	fi | sort -k5 | sed 's/ * -> $//' > $2
+	if [ "${@bb.utils.contains('BUILDHISTORY_FEATURES', 'md5', '1', '0', d)}" = "1" ] ; then
+		md5filename=$(echo $2 | sed 's/\.txt$/-md5.txt/')
+		find -type f | xargs -I{} -n1 md5sum {} | sort -k2 > $md5filename
+		[ -s $md5filename ] || rm $md5filename # remove result if empty
+	fi )
 }
 
 buildhistory_list_pkg_files() {
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-06 18:13 ` [RFC][PATCH 2/2] buildhistory: support generating md5sum of files Jacob Kroon
@ 2019-01-06 23:08   ` André Draszik
  2019-01-07  9:38     ` Jacob Kroon
  2019-01-07 14:17   ` Jacob Kroon
  2019-01-09 11:20   ` Peter Kjellerstedt
  2 siblings, 1 reply; 12+ messages in thread
From: André Draszik @ 2019-01-06 23:08 UTC (permalink / raw)
  To: openembedded-core

Hi,

On Sun, 2019-01-06 at 19:13 +0100, Jacob Kroon wrote:
> Introduce 'md5' in BUILDHISTORY_FEATURES and enable it by default
> when doing reproducible builds.
> 
> When enabled this will additionally create:
> 
>   files-in-package-md5.txt
>   files-in-image-md5.txt
>   files-in-sdk-md5.txt
> 
> containing the md5 checksums of regular files.
> 
> Signed-off-by: Jacob Kroon <jacob.kroon@gmail.com>
> ---
>  meta/classes/buildhistory.bbclass | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/meta/classes/buildhistory.bbclass
> b/meta/classes/buildhistory.bbclass
> index 33eb1b00f6..00f0701dec 100644
> --- a/meta/classes/buildhistory.bbclass
> +++ b/meta/classes/buildhistory.bbclass
> @@ -7,7 +7,8 @@
>  # Copyright (C) 2007-2011 Koen Kooi <koen@openembedded.org>
>  #
>  
> -BUILDHISTORY_FEATURES ?= "image package sdk"
> +BUILDHISTORY_FEATURES ?= "image package sdk \
> +  ${@ "md5" if
> bb.utils.to_boolean(d.getVar('BUILD_REPRODUCIBLE_BINARIES')) else ""}"
>  BUILDHISTORY_DIR ?= "${TOPDIR}/buildhistory"
>  BUILDHISTORY_DIR_IMAGE =
> "${BUILDHISTORY_DIR}/images/${MACHINE_ARCH}/${TCLIBC}/${IMAGE_BASENAME}"
>  BUILDHISTORY_DIR_PACKAGE =
> "${BUILDHISTORY_DIR}/packages/${MULTIMACH_TARGET_SYS}/${PN}"
> @@ -526,7 +527,12 @@ buildhistory_list_files() {
>  		eval ${FAKEROOTENV} ${FAKEROOTCMD} $find_cmd
>  	else
>  		eval $find_cmd
> -	fi | sort -k5 | sed 's/ * -> $//' > $2 )
> +	fi | sort -k5 | sed 's/ * -> $//' > $2
> +	if [ "${@bb.utils.contains('BUILDHISTORY_FEATURES', 'md5', '1', '0',
> d)}" = "1" ] ; then
> +		md5filename=$(echo $2 | sed 's/\.txt$/-md5.txt/')
> +		find -type f | xargs -I{} -n1 md5sum {} | sort -k2 >
> $md5filename

Why don't you
  find . -type f -exec md5sum {} + | sort -sk2 > $md5filename
?
It'll be quite a bit faster because way fewer processes will be spawned.

Am I missing something?

I don't know what the intended use-case of the md5 files is, but could
sha256 or similar maybe be more appropriate?

Cheers,
Andre'


> +		[ -s $md5filename ] || rm $md5filename # remove result if
> empty
> +	fi )
>  }
>  
>  buildhistory_list_pkg_files() {
> -- 
> 2.11.0
> 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-06 23:08   ` André Draszik
@ 2019-01-07  9:38     ` Jacob Kroon
  2019-01-07 14:31       ` Richard Purdie
  0 siblings, 1 reply; 12+ messages in thread
From: Jacob Kroon @ 2019-01-07  9:38 UTC (permalink / raw)
  To: André Draszik; +Cc: openembedded-core

Hi André,

On Mon, Jan 7, 2019 at 12:09 AM André Draszik <git@andred.net> wrote:
>
> Hi,
>
> On Sun, 2019-01-06 at 19:13 +0100, Jacob Kroon wrote:
> > Introduce 'md5' in BUILDHISTORY_FEATURES and enable it by default
> > when doing reproducible builds.
> >
> > When enabled this will additionally create:
> >
> >   files-in-package-md5.txt
> >   files-in-image-md5.txt
> >   files-in-sdk-md5.txt
> >
> > containing the md5 checksums of regular files.
> >
> > Signed-off-by: Jacob Kroon <jacob.kroon@gmail.com>
> > ---
> >  meta/classes/buildhistory.bbclass | 10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/meta/classes/buildhistory.bbclass
> > b/meta/classes/buildhistory.bbclass
> > index 33eb1b00f6..00f0701dec 100644
> > --- a/meta/classes/buildhistory.bbclass
> > +++ b/meta/classes/buildhistory.bbclass
> > @@ -7,7 +7,8 @@
> >  # Copyright (C) 2007-2011 Koen Kooi <koen@openembedded.org>
> >  #
> >
> > -BUILDHISTORY_FEATURES ?= "image package sdk"
> > +BUILDHISTORY_FEATURES ?= "image package sdk \
> > +  ${@ "md5" if
> > bb.utils.to_boolean(d.getVar('BUILD_REPRODUCIBLE_BINARIES')) else ""}"
> >  BUILDHISTORY_DIR ?= "${TOPDIR}/buildhistory"
> >  BUILDHISTORY_DIR_IMAGE =
> > "${BUILDHISTORY_DIR}/images/${MACHINE_ARCH}/${TCLIBC}/${IMAGE_BASENAME}"
> >  BUILDHISTORY_DIR_PACKAGE =
> > "${BUILDHISTORY_DIR}/packages/${MULTIMACH_TARGET_SYS}/${PN}"
> > @@ -526,7 +527,12 @@ buildhistory_list_files() {
> >               eval ${FAKEROOTENV} ${FAKEROOTCMD} $find_cmd
> >       else
> >               eval $find_cmd
> > -     fi | sort -k5 | sed 's/ * -> $//' > $2 )
> > +     fi | sort -k5 | sed 's/ * -> $//' > $2
> > +     if [ "${@bb.utils.contains('BUILDHISTORY_FEATURES', 'md5', '1', '0',
> > d)}" = "1" ] ; then
> > +             md5filename=$(echo $2 | sed 's/\.txt$/-md5.txt/')
> > +             find -type f | xargs -I{} -n1 md5sum {} | sort -k2 >
> > $md5filename
>
> Why don't you
>   find . -type f -exec md5sum {} + | sort -sk2 > $md5filename
> ?
> It'll be quite a bit faster because way fewer processes will be spawned.
>
> Am I missing something?

You're right, I will update the patch. I'm assuming I don't need the
stable sort, -s,
since the filenames should all be unique.

> I don't know what the intended use-case of the md5 files is, but could
> sha256 or similar maybe be more appropriate?

I thought it would be a good idea to store some sort of checksum of files in the
buildhistory when doing reproducible builds, so that it is easier to detect
when a rebuild produces changed files, but perhaps there is some way to do
this already that I am missing ?

But I have no real motivation for choosing md5, other than that I
assumed it would be less
cpu intensive than sha256, and the fact I'm not too worried about collisions.

Thanks for the feedback,
Jacob

> Cheers,
> Andre'
>
>
> > +             [ -s $md5filename ] || rm $md5filename # remove result if
> > empty
> > +     fi )
> >  }
> >
> >  buildhistory_list_pkg_files() {
> > --
> > 2.11.0
> >
>
> --
> _______________________________________________
> Openembedded-core mailing list
> Openembedded-core@lists.openembedded.org
> http://lists.openembedded.org/mailman/listinfo/openembedded-core


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-06 18:13 ` [RFC][PATCH 2/2] buildhistory: support generating md5sum of files Jacob Kroon
  2019-01-06 23:08   ` André Draszik
@ 2019-01-07 14:17   ` Jacob Kroon
  2019-01-08 10:32     ` Mikko.Rapeli
  2019-01-09 11:20   ` Peter Kjellerstedt
  2 siblings, 1 reply; 12+ messages in thread
From: Jacob Kroon @ 2019-01-07 14:17 UTC (permalink / raw)
  To: openembedded-core

On Sun, Jan 6, 2019 at 7:14 PM Jacob Kroon <jacob.kroon@gmail.com> wrote:
>
> Introduce 'md5' in BUILDHISTORY_FEATURES and enable it by default
> when doing reproducible builds.
>
> When enabled this will additionally create:
>
>   files-in-package-md5.txt
>   files-in-image-md5.txt
>   files-in-sdk-md5.txt
>
> containing the md5 checksums of regular files.
>
> Signed-off-by: Jacob Kroon <jacob.kroon@gmail.com>
> ---
>  meta/classes/buildhistory.bbclass | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/meta/classes/buildhistory.bbclass b/meta/classes/buildhistory.bbclass
> index 33eb1b00f6..00f0701dec 100644
> --- a/meta/classes/buildhistory.bbclass
> +++ b/meta/classes/buildhistory.bbclass
> @@ -7,7 +7,8 @@
>  # Copyright (C) 2007-2011 Koen Kooi <koen@openembedded.org>
>  #
>
> -BUILDHISTORY_FEATURES ?= "image package sdk"
> +BUILDHISTORY_FEATURES ?= "image package sdk \
> +  ${@ "md5" if bb.utils.to_boolean(d.getVar('BUILD_REPRODUCIBLE_BINARIES')) else ""}"
>  BUILDHISTORY_DIR ?= "${TOPDIR}/buildhistory"
>  BUILDHISTORY_DIR_IMAGE = "${BUILDHISTORY_DIR}/images/${MACHINE_ARCH}/${TCLIBC}/${IMAGE_BASENAME}"
>  BUILDHISTORY_DIR_PACKAGE = "${BUILDHISTORY_DIR}/packages/${MULTIMACH_TARGET_SYS}/${PN}"
> @@ -526,7 +527,12 @@ buildhistory_list_files() {
>                 eval ${FAKEROOTENV} ${FAKEROOTCMD} $find_cmd
>         else
>                 eval $find_cmd
> -       fi | sort -k5 | sed 's/ * -> $//' > $2 )
> +       fi | sort -k5 | sed 's/ * -> $//' > $2
> +       if [ "${@bb.utils.contains('BUILDHISTORY_FEATURES', 'md5', '1', '0', d)}" = "1" ] ; then
> +               md5filename=$(echo $2 | sed 's/\.txt$/-md5.txt/')
> +               find -type f | xargs -I{} -n1 md5sum {} | sort -k2 > $md5filename
> +               [ -s $md5filename ] || rm $md5filename # remove result if empty

I added this remove because I thought it didn't make sense to keep
empty files around, but I now realize that the "files-in-package.txt"
file is kept around, even if empty. Is there a preference on what to
do here ?

> +       fi )
>  }
>
>  buildhistory_list_pkg_files() {
> --
> 2.11.0
>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-07  9:38     ` Jacob Kroon
@ 2019-01-07 14:31       ` Richard Purdie
  2019-01-07 15:50         ` Jacob Kroon
  0 siblings, 1 reply; 12+ messages in thread
From: Richard Purdie @ 2019-01-07 14:31 UTC (permalink / raw)
  To: Jacob Kroon, André Draszik; +Cc: openembedded-core

On Mon, 2019-01-07 at 10:38 +0100, Jacob Kroon wrote:
> On Mon, Jan 7, 2019 at 12:09 AM André Draszik <git@andred.net> wrote:
> > I don't know what the intended use-case of the md5 files is, but
> > could
> > sha256 or similar maybe be more appropriate?
> 
> I thought it would be a good idea to store some sort of checksum of
> files in the
> buildhistory when doing reproducible builds, so that it is easier to
> detect
> when a rebuild produces changed files, but perhaps there is some way
> to do
> this already that I am missing ?
> 
> But I have no real motivation for choosing md5, other than that I
> assumed it would be less
> cpu intensive than sha256, and the fact I'm not too worried about
> collisions.

I used to lean that way as well however there is a fair bit of
negativity associated with *any* use of md5 so I'm tempted to say use
sha256 as we don't know how these checksums may be used in the future.

I'd note there is some overlap here with the sstate hash equivalency
work where we build a hash representing a tasks complete output. Can
you reuse any of the functions there?

Cheers,

Richard



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-07 14:31       ` Richard Purdie
@ 2019-01-07 15:50         ` Jacob Kroon
  0 siblings, 0 replies; 12+ messages in thread
From: Jacob Kroon @ 2019-01-07 15:50 UTC (permalink / raw)
  To: Richard Purdie; +Cc: openembedded-core

Hi Richard,

On Mon, Jan 7, 2019 at 3:31 PM Richard Purdie
<richard.purdie@linuxfoundation.org> wrote:
>
> On Mon, 2019-01-07 at 10:38 +0100, Jacob Kroon wrote:
> > On Mon, Jan 7, 2019 at 12:09 AM André Draszik <git@andred.net> wrote:
> > > I don't know what the intended use-case of the md5 files is, but
> > > could
> > > sha256 or similar maybe be more appropriate?
> >
> > I thought it would be a good idea to store some sort of checksum of
> > files in the
> > buildhistory when doing reproducible builds, so that it is easier to
> > detect
> > when a rebuild produces changed files, but perhaps there is some way
> > to do
> > this already that I am missing ?
> >
> > But I have no real motivation for choosing md5, other than that I
> > assumed it would be less
> > cpu intensive than sha256, and the fact I'm not too worried about
> > collisions.
>
> I used to lean that way as well however there is a fair bit of
> negativity associated with *any* use of md5 so I'm tempted to say use
> sha256 as we don't know how these checksums may be used in the future.

Ok.

> I'd note there is some overlap here with the sstate hash equivalency
> work where we build a hash representing a tasks complete output. Can
> you reuse any of the functions there?

I'll look into it, I realize it would of course be nice if we only
needed to calculate the checksums once.

Thanks for the feedback,
Jacob

> Cheers,
>
> Richard
>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-07 14:17   ` Jacob Kroon
@ 2019-01-08 10:32     ` Mikko.Rapeli
  2019-01-08 10:36       ` Jacob Kroon
  0 siblings, 1 reply; 12+ messages in thread
From: Mikko.Rapeli @ 2019-01-08 10:32 UTC (permalink / raw)
  To: jacob.kroon; +Cc: openembedded-core

On Mon, Jan 07, 2019 at 03:17:00PM +0100, Jacob Kroon wrote:
> On Sun, Jan 6, 2019 at 7:14 PM Jacob Kroon <jacob.kroon@gmail.com> wrote:
> >
> > Introduce 'md5' in BUILDHISTORY_FEATURES and enable it by default
> > when doing reproducible builds.
> >
> > When enabled this will additionally create:
> >
> >   files-in-package-md5.txt
> >   files-in-image-md5.txt
> >   files-in-sdk-md5.txt
> >
> > containing the md5 checksums of regular files.
> >
> > Signed-off-by: Jacob Kroon <jacob.kroon@gmail.com>
> > ---
> >  meta/classes/buildhistory.bbclass | 10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/meta/classes/buildhistory.bbclass b/meta/classes/buildhistory.bbclass
> > index 33eb1b00f6..00f0701dec 100644
> > --- a/meta/classes/buildhistory.bbclass
> > +++ b/meta/classes/buildhistory.bbclass
> > @@ -7,7 +7,8 @@
> >  # Copyright (C) 2007-2011 Koen Kooi <koen@openembedded.org>
> >  #
> >
> > -BUILDHISTORY_FEATURES ?= "image package sdk"
> > +BUILDHISTORY_FEATURES ?= "image package sdk \
> > +  ${@ "md5" if bb.utils.to_boolean(d.getVar('BUILD_REPRODUCIBLE_BINARIES')) else ""}"
> >  BUILDHISTORY_DIR ?= "${TOPDIR}/buildhistory"
> >  BUILDHISTORY_DIR_IMAGE = "${BUILDHISTORY_DIR}/images/${MACHINE_ARCH}/${TCLIBC}/${IMAGE_BASENAME}"
> >  BUILDHISTORY_DIR_PACKAGE = "${BUILDHISTORY_DIR}/packages/${MULTIMACH_TARGET_SYS}/${PN}"
> > @@ -526,7 +527,12 @@ buildhistory_list_files() {
> >                 eval ${FAKEROOTENV} ${FAKEROOTCMD} $find_cmd
> >         else
> >                 eval $find_cmd
> > -       fi | sort -k5 | sed 's/ * -> $//' > $2 )
> > +       fi | sort -k5 | sed 's/ * -> $//' > $2
> > +       if [ "${@bb.utils.contains('BUILDHISTORY_FEATURES', 'md5', '1', '0', d)}" = "1" ] ; then
> > +               md5filename=$(echo $2 | sed 's/\.txt$/-md5.txt/')
> > +               find -type f | xargs -I{} -n1 md5sum {} | sort -k2 > $md5filename
> > +               [ -s $md5filename ] || rm $md5filename # remove result if empty
> 
> I added this remove because I thought it didn't make sense to keep
> empty files around, but I now realize that the "files-in-package.txt"
> file is kept around, even if empty. Is there a preference on what to
> do here ?

FWIW, I'm wiping the all buildhistory data with external scripts before doing a
clean build. Basically a "git rm -rf *" in buildhistory directory. Otherwise
stale data about images, packages etc which are no longer built remain in
buildhistory.

-Mikko

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-08 10:32     ` Mikko.Rapeli
@ 2019-01-08 10:36       ` Jacob Kroon
  2019-01-08 11:02         ` Mikko.Rapeli
  0 siblings, 1 reply; 12+ messages in thread
From: Jacob Kroon @ 2019-01-08 10:36 UTC (permalink / raw)
  To: Mikko.Rapeli; +Cc: openembedded-core

On Tue, Jan 8, 2019 at 11:32 AM <Mikko.Rapeli@bmw.de> wrote:
>
> On Mon, Jan 07, 2019 at 03:17:00PM +0100, Jacob Kroon wrote:
> > On Sun, Jan 6, 2019 at 7:14 PM Jacob Kroon <jacob.kroon@gmail.com> wrote:
> > >
> > > Introduce 'md5' in BUILDHISTORY_FEATURES and enable it by default
> > > when doing reproducible builds.
> > >
> > > When enabled this will additionally create:
> > >
> > >   files-in-package-md5.txt
> > >   files-in-image-md5.txt
> > >   files-in-sdk-md5.txt
> > >
> > > containing the md5 checksums of regular files.
> > >
> > > Signed-off-by: Jacob Kroon <jacob.kroon@gmail.com>
> > > ---
> > >  meta/classes/buildhistory.bbclass | 10 ++++++++--
> > >  1 file changed, 8 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/meta/classes/buildhistory.bbclass b/meta/classes/buildhistory.bbclass
> > > index 33eb1b00f6..00f0701dec 100644
> > > --- a/meta/classes/buildhistory.bbclass
> > > +++ b/meta/classes/buildhistory.bbclass
> > > @@ -7,7 +7,8 @@
> > >  # Copyright (C) 2007-2011 Koen Kooi <koen@openembedded.org>
> > >  #
> > >
> > > -BUILDHISTORY_FEATURES ?= "image package sdk"
> > > +BUILDHISTORY_FEATURES ?= "image package sdk \
> > > +  ${@ "md5" if bb.utils.to_boolean(d.getVar('BUILD_REPRODUCIBLE_BINARIES')) else ""}"
> > >  BUILDHISTORY_DIR ?= "${TOPDIR}/buildhistory"
> > >  BUILDHISTORY_DIR_IMAGE = "${BUILDHISTORY_DIR}/images/${MACHINE_ARCH}/${TCLIBC}/${IMAGE_BASENAME}"
> > >  BUILDHISTORY_DIR_PACKAGE = "${BUILDHISTORY_DIR}/packages/${MULTIMACH_TARGET_SYS}/${PN}"
> > > @@ -526,7 +527,12 @@ buildhistory_list_files() {
> > >                 eval ${FAKEROOTENV} ${FAKEROOTCMD} $find_cmd
> > >         else
> > >                 eval $find_cmd
> > > -       fi | sort -k5 | sed 's/ * -> $//' > $2 )
> > > +       fi | sort -k5 | sed 's/ * -> $//' > $2
> > > +       if [ "${@bb.utils.contains('BUILDHISTORY_FEATURES', 'md5', '1', '0', d)}" = "1" ] ; then
> > > +               md5filename=$(echo $2 | sed 's/\.txt$/-md5.txt/')
> > > +               find -type f | xargs -I{} -n1 md5sum {} | sort -k2 > $md5filename
> > > +               [ -s $md5filename ] || rm $md5filename # remove result if empty
> >
> > I added this remove because I thought it didn't make sense to keep
> > empty files around, but I now realize that the "files-in-package.txt"
> > file is kept around, even if empty. Is there a preference on what to
> > do here ?
>
> FWIW, I'm wiping the all buildhistory data with external scripts before doing a
> clean build. Basically a "git rm -rf *" in buildhistory directory. Otherwise
> stale data about images, packages etc which are no longer built remain in
> buildhistory.

I think one can also do:

BB_ENV_EXTRAWHITE=BUILDHISTORY_RESET BUILDHISTORY_RESET=1 bitbake <build>

(see buildhistory.bbclass)

> -Mikko


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-08 10:36       ` Jacob Kroon
@ 2019-01-08 11:02         ` Mikko.Rapeli
  0 siblings, 0 replies; 12+ messages in thread
From: Mikko.Rapeli @ 2019-01-08 11:02 UTC (permalink / raw)
  To: jacob.kroon; +Cc: openembedded-core

On Tue, Jan 08, 2019 at 11:36:24AM +0100, Jacob Kroon wrote:
> On Tue, Jan 8, 2019 at 11:32 AM <Mikko.Rapeli@bmw.de> wrote:
> >
> > On Mon, Jan 07, 2019 at 03:17:00PM +0100, Jacob Kroon wrote:
> > > On Sun, Jan 6, 2019 at 7:14 PM Jacob Kroon <jacob.kroon@gmail.com> wrote:
> > > >
> > > > Introduce 'md5' in BUILDHISTORY_FEATURES and enable it by default
> > > > when doing reproducible builds.
> > > >
> > > > When enabled this will additionally create:
> > > >
> > > >   files-in-package-md5.txt
> > > >   files-in-image-md5.txt
> > > >   files-in-sdk-md5.txt
> > > >
> > > > containing the md5 checksums of regular files.
> > > >
> > > > Signed-off-by: Jacob Kroon <jacob.kroon@gmail.com>
> > > > ---
> > > >  meta/classes/buildhistory.bbclass | 10 ++++++++--
> > > >  1 file changed, 8 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/meta/classes/buildhistory.bbclass b/meta/classes/buildhistory.bbclass
> > > > index 33eb1b00f6..00f0701dec 100644
> > > > --- a/meta/classes/buildhistory.bbclass
> > > > +++ b/meta/classes/buildhistory.bbclass
> > > > @@ -7,7 +7,8 @@
> > > >  # Copyright (C) 2007-2011 Koen Kooi <koen@openembedded.org>
> > > >  #
> > > >
> > > > -BUILDHISTORY_FEATURES ?= "image package sdk"
> > > > +BUILDHISTORY_FEATURES ?= "image package sdk \
> > > > +  ${@ "md5" if bb.utils.to_boolean(d.getVar('BUILD_REPRODUCIBLE_BINARIES')) else ""}"
> > > >  BUILDHISTORY_DIR ?= "${TOPDIR}/buildhistory"
> > > >  BUILDHISTORY_DIR_IMAGE = "${BUILDHISTORY_DIR}/images/${MACHINE_ARCH}/${TCLIBC}/${IMAGE_BASENAME}"
> > > >  BUILDHISTORY_DIR_PACKAGE = "${BUILDHISTORY_DIR}/packages/${MULTIMACH_TARGET_SYS}/${PN}"
> > > > @@ -526,7 +527,12 @@ buildhistory_list_files() {
> > > >                 eval ${FAKEROOTENV} ${FAKEROOTCMD} $find_cmd
> > > >         else
> > > >                 eval $find_cmd
> > > > -       fi | sort -k5 | sed 's/ * -> $//' > $2 )
> > > > +       fi | sort -k5 | sed 's/ * -> $//' > $2
> > > > +       if [ "${@bb.utils.contains('BUILDHISTORY_FEATURES', 'md5', '1', '0', d)}" = "1" ] ; then
> > > > +               md5filename=$(echo $2 | sed 's/\.txt$/-md5.txt/')
> > > > +               find -type f | xargs -I{} -n1 md5sum {} | sort -k2 > $md5filename
> > > > +               [ -s $md5filename ] || rm $md5filename # remove result if empty
> > >
> > > I added this remove because I thought it didn't make sense to keep
> > > empty files around, but I now realize that the "files-in-package.txt"
> > > file is kept around, even if empty. Is there a preference on what to
> > > do here ?
> >
> > FWIW, I'm wiping the all buildhistory data with external scripts before doing a
> > clean build. Basically a "git rm -rf *" in buildhistory directory. Otherwise
> > stale data about images, packages etc which are no longer built remain in
> > buildhistory.
> 
> I think one can also do:
> 
> BB_ENV_EXTRAWHITE=BUILDHISTORY_RESET BUILDHISTORY_RESET=1 bitbake <build>
> 
> (see buildhistory.bbclass)

Thanks for the hint!

-Mikko

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-06 18:13 ` [RFC][PATCH 2/2] buildhistory: support generating md5sum of files Jacob Kroon
  2019-01-06 23:08   ` André Draszik
  2019-01-07 14:17   ` Jacob Kroon
@ 2019-01-09 11:20   ` Peter Kjellerstedt
  2019-01-09 18:36     ` Jacob Kroon
  2 siblings, 1 reply; 12+ messages in thread
From: Peter Kjellerstedt @ 2019-01-09 11:20 UTC (permalink / raw)
  To: Jacob Kroon, openembedded-core

> -----Original Message-----
> From: openembedded-core-bounces@lists.openembedded.org <openembedded-
> core-bounces@lists.openembedded.org> On Behalf Of Jacob Kroon
> Sent: den 6 januari 2019 19:14
> To: openembedded-core@lists.openembedded.org
> Subject: [OE-core] [RFC][PATCH 2/2] buildhistory: support generating
> md5sum of files
> 
> Introduce 'md5' in BUILDHISTORY_FEATURES and enable it by default
> when doing reproducible builds.
> 
> When enabled this will additionally create:
> 
>   files-in-package-md5.txt
>   files-in-image-md5.txt
>   files-in-sdk-md5.txt
> 
> containing the md5 checksums of regular files.
> 
> Signed-off-by: Jacob Kroon <jacob.kroon@gmail.com>
> ---
>  meta/classes/buildhistory.bbclass | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/meta/classes/buildhistory.bbclass
> b/meta/classes/buildhistory.bbclass
> index 33eb1b00f6..00f0701dec 100644
> --- a/meta/classes/buildhistory.bbclass
> +++ b/meta/classes/buildhistory.bbclass
> @@ -7,7 +7,8 @@
>  # Copyright (C) 2007-2011 Koen Kooi <koen@openembedded.org>
>  #
> 
> -BUILDHISTORY_FEATURES ?= "image package sdk"
> +BUILDHISTORY_FEATURES ?= "image package sdk \
> +  ${@ "md5" if bb.utils.to_boolean(d.getVar('BUILD_REPRODUCIBLE_BINARIES')) else ""}"

May I suggest using ' instead of " within the Python block to 
lessen confusion when quotes are used within another set of quotes. 
(Even if bitbake handles it fine, not all editor coloring modes do.)

>  BUILDHISTORY_DIR ?= "${TOPDIR}/buildhistory"
>  BUILDHISTORY_DIR_IMAGE = "${BUILDHISTORY_DIR}/images/${MACHINE_ARCH}/${TCLIBC}/${IMAGE_BASENAME}"
>  BUILDHISTORY_DIR_PACKAGE = "${BUILDHISTORY_DIR}/packages/${MULTIMACH_TARGET_SYS}/${PN}"
> @@ -526,7 +527,12 @@ buildhistory_list_files() {
>  		eval ${FAKEROOTENV} ${FAKEROOTCMD} $find_cmd
>  	else
>  		eval $find_cmd
> -	fi | sort -k5 | sed 's/ * -> $//' > $2 )
> +	fi | sort -k5 | sed 's/ * -> $//' > $2
> +	if [ "${@bb.utils.contains('BUILDHISTORY_FEATURES', 'md5', '1', '0', d)}" = "1" ] ; then
> +		md5filename=$(echo $2 | sed 's/\.txt$/-md5.txt/')
> +		find -type f | xargs -I{} -n1 md5sum {} | sort -k2 > $md5filename
> +		[ -s $md5filename ] || rm $md5filename # remove result if empty
> +	fi )
>  }
> 
>  buildhistory_list_pkg_files() {
> --
> 2.11.0

Don't forget to update the documentation for BUILDHISTORY_FEATURES 
if this is added.

//Peter



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC][PATCH 2/2] buildhistory: support generating md5sum of files
  2019-01-09 11:20   ` Peter Kjellerstedt
@ 2019-01-09 18:36     ` Jacob Kroon
  0 siblings, 0 replies; 12+ messages in thread
From: Jacob Kroon @ 2019-01-09 18:36 UTC (permalink / raw)
  To: Peter Kjellerstedt; +Cc: openembedded-core

On Wed, Jan 9, 2019 at 12:20 PM Peter Kjellerstedt
<peter.kjellerstedt@axis.com> wrote:
>
> > -----Original Message-----
> > From: openembedded-core-bounces@lists.openembedded.org <openembedded-
> > core-bounces@lists.openembedded.org> On Behalf Of Jacob Kroon
> > Sent: den 6 januari 2019 19:14
> > To: openembedded-core@lists.openembedded.org
> > Subject: [OE-core] [RFC][PATCH 2/2] buildhistory: support generating
> > md5sum of files
> >
> > Introduce 'md5' in BUILDHISTORY_FEATURES and enable it by default
> > when doing reproducible builds.
> >
> > When enabled this will additionally create:
> >
> >   files-in-package-md5.txt
> >   files-in-image-md5.txt
> >   files-in-sdk-md5.txt
> >
> > containing the md5 checksums of regular files.
> >
> > Signed-off-by: Jacob Kroon <jacob.kroon@gmail.com>
> > ---
> >  meta/classes/buildhistory.bbclass | 10 ++++++++--
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/meta/classes/buildhistory.bbclass
> > b/meta/classes/buildhistory.bbclass
> > index 33eb1b00f6..00f0701dec 100644
> > --- a/meta/classes/buildhistory.bbclass
> > +++ b/meta/classes/buildhistory.bbclass
> > @@ -7,7 +7,8 @@
> >  # Copyright (C) 2007-2011 Koen Kooi <koen@openembedded.org>
> >  #
> >
> > -BUILDHISTORY_FEATURES ?= "image package sdk"
> > +BUILDHISTORY_FEATURES ?= "image package sdk \
> > +  ${@ "md5" if bb.utils.to_boolean(d.getVar('BUILD_REPRODUCIBLE_BINARIES')) else ""}"
>
> May I suggest using ' instead of " within the Python block to
> lessen confusion when quotes are used within another set of quotes.
> (Even if bitbake handles it fine, not all editor coloring modes do.)
>

Ok.

> >  BUILDHISTORY_DIR ?= "${TOPDIR}/buildhistory"
> >  BUILDHISTORY_DIR_IMAGE = "${BUILDHISTORY_DIR}/images/${MACHINE_ARCH}/${TCLIBC}/${IMAGE_BASENAME}"
> >  BUILDHISTORY_DIR_PACKAGE = "${BUILDHISTORY_DIR}/packages/${MULTIMACH_TARGET_SYS}/${PN}"
> > @@ -526,7 +527,12 @@ buildhistory_list_files() {
> >               eval ${FAKEROOTENV} ${FAKEROOTCMD} $find_cmd
> >       else
> >               eval $find_cmd
> > -     fi | sort -k5 | sed 's/ * -> $//' > $2 )
> > +     fi | sort -k5 | sed 's/ * -> $//' > $2
> > +     if [ "${@bb.utils.contains('BUILDHISTORY_FEATURES', 'md5', '1', '0', d)}" = "1" ] ; then
> > +             md5filename=$(echo $2 | sed 's/\.txt$/-md5.txt/')
> > +             find -type f | xargs -I{} -n1 md5sum {} | sort -k2 > $md5filename
> > +             [ -s $md5filename ] || rm $md5filename # remove result if empty
> > +     fi )
> >  }
> >
> >  buildhistory_list_pkg_files() {
> > --
> > 2.11.0
>
> Don't forget to update the documentation for BUILDHISTORY_FEATURES
> if this is added.
>

Oops, yeah I forgot that.

I'ill send a V2 with all comments addressed.

It does seem like once the equivalence work is ready and OE hashes its
outputs, much in buildhistory.bbclass can be replaced with already
prepared information. But for the cases where it is not used, and
considering how small the change is, maybe it makes sense to add it
anyway.

Thanks for the feedback,
Jacob

> //Peter
>


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-01-09 18:36 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-06 18:13 [RFC][PATCH 1/2] buildhistory: simplify buildhistory_list_files() Jacob Kroon
2019-01-06 18:13 ` [RFC][PATCH 2/2] buildhistory: support generating md5sum of files Jacob Kroon
2019-01-06 23:08   ` André Draszik
2019-01-07  9:38     ` Jacob Kroon
2019-01-07 14:31       ` Richard Purdie
2019-01-07 15:50         ` Jacob Kroon
2019-01-07 14:17   ` Jacob Kroon
2019-01-08 10:32     ` Mikko.Rapeli
2019-01-08 10:36       ` Jacob Kroon
2019-01-08 11:02         ` Mikko.Rapeli
2019-01-09 11:20   ` Peter Kjellerstedt
2019-01-09 18:36     ` Jacob Kroon

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.