All of lore.kernel.org
 help / color / mirror / Atom feed
* [WIP/RFC] create-spdx: Get SPDX-License-Identifier from source
@ 2022-01-28 22:03 Saul Wold
  2022-01-28 22:44 ` Joshua Watt
  0 siblings, 1 reply; 2+ messages in thread
From: Saul Wold @ 2022-01-28 22:03 UTC (permalink / raw)
  To: openembedded-core, JPEWhacker; +Cc: Saul Wold

This patch will read the begining of source files and try to find
the SPDX-License-Identifier to populate the licenseInfoInFiles
field for each source file. This does not populate licenseConculed
at this time, nor rolls it up to package level.

Signed-off-by: Saul Wold <saul.wold@windriver.com>
---
 classes/create-spdx.bbclass | 25 +++++++++++++++++++++++++
 lib/oe/spdx.py              |  2 +-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/classes/create-spdx.bbclass b/classes/create-spdx.bbclass
index 180d667..9c11945 100644
--- a/classes/create-spdx.bbclass
+++ b/classes/create-spdx.bbclass
@@ -30,6 +30,21 @@ SPDX_LICENSES ??= "${COREBASE}/meta/files/spdx-licenses.json"
 
 do_image_complete[depends] = "virtual/kernel:do_create_spdx"
 
+def extract_licenses(filename):
+    import re
+    lic_regex = re.compile('SPDX-License-Identifier:\s+([-A-Za-z\d. ]+)[ |\n|\r\n]*?')
+
+    try:
+        with open(filename, 'r') as f:
+            size = min(15000, os.stat(filename).st_size)
+            txt = f.read(size)
+            licenses = re.findall(lic_regex, txt)
+            if licenses:
+                return licenses
+    except Exception as e:
+        bb.warn(f"Exception on {filename}: {e}")
+        return None
+
 def get_doc_namespace(d, doc):
     import uuid
     namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE"))
@@ -232,6 +247,16 @@ def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
                         checksumValue=bb.utils.sha256_file(filepath),
                     ))
 
+                if "SOURCES" in spdx_file.fileTypes:
+                    licenses = extract_licenses(filepath)
+                    if licenses is not None:
+                        for lic in licenses:
+                            spdx_file.licenseInfoInFiles.append(lic.strip())
+                    else:
+                        spdx_file.licenseInfoInFiles.append("NOASSERTATION")
+                else:
+                    spdx_file.licenseInfoInFiles.append("NOASSERTATION")
+
                 doc.files.append(spdx_file)
                 doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file)
                 spdx_pkg.hasFiles.append(spdx_file.SPDXID)
diff --git a/lib/oe/spdx.py b/lib/oe/spdx.py
index 9e7ced5..71e7c1c 100644
--- a/lib/oe/spdx.py
+++ b/lib/oe/spdx.py
@@ -236,7 +236,7 @@ class SPDXFile(SPDXObject):
     fileName = _String()
     licenseConcluded = _String(default="NOASSERTION")
     copyrightText = _String(default="NOASSERTION")
-    licenseInfoInFiles = _StringList(default=["NOASSERTION"])
+    licenseInfoInFiles = _StringList()
     checksums = _ObjectList(SPDXChecksum)
     fileTypes = _StringList()
 
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [WIP/RFC] create-spdx: Get SPDX-License-Identifier from source
  2022-01-28 22:03 [WIP/RFC] create-spdx: Get SPDX-License-Identifier from source Saul Wold
@ 2022-01-28 22:44 ` Joshua Watt
  0 siblings, 0 replies; 2+ messages in thread
From: Joshua Watt @ 2022-01-28 22:44 UTC (permalink / raw)
  To: Saul Wold, openembedded-core


On 1/28/22 4:03 PM, Saul Wold wrote:
> This patch will read the begining of source files and try to find
> the SPDX-License-Identifier to populate the licenseInfoInFiles
> field for each source file. This does not populate licenseConculed
> at this time, nor rolls it up to package level.
>
> Signed-off-by: Saul Wold <saul.wold@windriver.com>
> ---
>   classes/create-spdx.bbclass | 25 +++++++++++++++++++++++++
>   lib/oe/spdx.py              |  2 +-
>   2 files changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/classes/create-spdx.bbclass b/classes/create-spdx.bbclass
> index 180d667..9c11945 100644
> --- a/classes/create-spdx.bbclass
> +++ b/classes/create-spdx.bbclass
> @@ -30,6 +30,21 @@ SPDX_LICENSES ??= "${COREBASE}/meta/files/spdx-licenses.json"
>   
>   do_image_complete[depends] = "virtual/kernel:do_create_spdx"
>   
> +def extract_licenses(filename):
> +    import re
> +    lic_regex = re.compile('SPDX-License-Identifier:\s+([-A-Za-z\d. ]+)[ |\n|\r\n]*?')
> +
> +    try:
> +        with open(filename, 'r') as f:
> +            size = min(15000, os.stat(filename).st_size)
> +            txt = f.read(size)
> +            licenses = re.findall(lic_regex, txt)
> +            if licenses:
> +                return licenses
> +    except Exception as e:
> +        bb.warn(f"Exception on {filename}: {e}")
> +        return None
> +
>   def get_doc_namespace(d, doc):
>       import uuid
>       namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE"))
> @@ -232,6 +247,16 @@ def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
>                           checksumValue=bb.utils.sha256_file(filepath),
>                       ))
>   
> +                if "SOURCES" in spdx_file.fileTypes:
> +                    licenses = extract_licenses(filepath)
> +                    if licenses is not None:
> +                        for lic in licenses:
> +                            spdx_file.licenseInfoInFiles.append(lic.strip())
> +                    else:
> +                        spdx_file.licenseInfoInFiles.append("NOASSERTATION")

"NOASSERTION"


> +                else:
> +                    spdx_file.licenseInfoInFiles.append("NOASSERTATION")

"NOASSERTION"

> +
>                   doc.files.append(spdx_file)
>                   doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file)
>                   spdx_pkg.hasFiles.append(spdx_file.SPDXID)
> diff --git a/lib/oe/spdx.py b/lib/oe/spdx.py
> index 9e7ced5..71e7c1c 100644
> --- a/lib/oe/spdx.py
> +++ b/lib/oe/spdx.py
> @@ -236,7 +236,7 @@ class SPDXFile(SPDXObject):
>       fileName = _String()
>       licenseConcluded = _String(default="NOASSERTION")
>       copyrightText = _String(default="NOASSERTION")
> -    licenseInfoInFiles = _StringList(default=["NOASSERTION"])
> +    licenseInfoInFiles = _StringList()

It's required to have "NOASSERTION" as the default if you don't do 
anything, so we shouldn't change the default here (by and large, this 
file should capture the spec over our use of it).

It's on my TODO list to make the "default" lists behave like default 
scalars, where appending replaces the default instead of appending to 
it, but I haven't gotten there yet; it hasn't come up as a problem before.


Probably need to do something like:


  license_info_from_file = []
  # scan files here
  if license_info_from_files:

     spdx_file.licenseInfoInFiles = license_info_from_files


>       checksums = _ObjectList(SPDXChecksum)
>       fileTypes = _StringList()
>   

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-01-28 22:44 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-28 22:03 [WIP/RFC] create-spdx: Get SPDX-License-Identifier from source Saul Wold
2022-01-28 22:44 ` Joshua Watt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.