All of lore.kernel.org
 help / color / mirror / Atom feed
* [WIP/RFC] create-spdx: Get SPDX-License-Identifier from source
@ 2022-01-28 22:03 Saul Wold
  2022-01-28 22:44 ` Joshua Watt
  0 siblings, 1 reply; 2+ messages in thread
From: Saul Wold @ 2022-01-28 22:03 UTC (permalink / raw)
  To: openembedded-core, JPEWhacker; +Cc: Saul Wold

This patch will read the begining of source files and try to find
the SPDX-License-Identifier to populate the licenseInfoInFiles
field for each source file. This does not populate licenseConculed
at this time, nor rolls it up to package level.

Signed-off-by: Saul Wold <saul.wold@windriver.com>
---
 classes/create-spdx.bbclass | 25 +++++++++++++++++++++++++
 lib/oe/spdx.py              |  2 +-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/classes/create-spdx.bbclass b/classes/create-spdx.bbclass
index 180d667..9c11945 100644
--- a/classes/create-spdx.bbclass
+++ b/classes/create-spdx.bbclass
@@ -30,6 +30,21 @@ SPDX_LICENSES ??= "${COREBASE}/meta/files/spdx-licenses.json"
 
 do_image_complete[depends] = "virtual/kernel:do_create_spdx"
 
+def extract_licenses(filename):
+    import re
+    lic_regex = re.compile('SPDX-License-Identifier:\s+([-A-Za-z\d. ]+)[ |\n|\r\n]*?')
+
+    try:
+        with open(filename, 'r') as f:
+            size = min(15000, os.stat(filename).st_size)
+            txt = f.read(size)
+            licenses = re.findall(lic_regex, txt)
+            if licenses:
+                return licenses
+    except Exception as e:
+        bb.warn(f"Exception on {filename}: {e}")
+        return None
+
 def get_doc_namespace(d, doc):
     import uuid
     namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE"))
@@ -232,6 +247,16 @@ def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archiv
                         checksumValue=bb.utils.sha256_file(filepath),
                     ))
 
+                if "SOURCES" in spdx_file.fileTypes:
+                    licenses = extract_licenses(filepath)
+                    if licenses is not None:
+                        for lic in licenses:
+                            spdx_file.licenseInfoInFiles.append(lic.strip())
+                    else:
+                        spdx_file.licenseInfoInFiles.append("NOASSERTATION")
+                else:
+                    spdx_file.licenseInfoInFiles.append("NOASSERTATION")
+
                 doc.files.append(spdx_file)
                 doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file)
                 spdx_pkg.hasFiles.append(spdx_file.SPDXID)
diff --git a/lib/oe/spdx.py b/lib/oe/spdx.py
index 9e7ced5..71e7c1c 100644
--- a/lib/oe/spdx.py
+++ b/lib/oe/spdx.py
@@ -236,7 +236,7 @@ class SPDXFile(SPDXObject):
     fileName = _String()
     licenseConcluded = _String(default="NOASSERTION")
     copyrightText = _String(default="NOASSERTION")
-    licenseInfoInFiles = _StringList(default=["NOASSERTION"])
+    licenseInfoInFiles = _StringList()
     checksums = _ObjectList(SPDXChecksum)
     fileTypes = _StringList()
 
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-01-28 22:44 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-28 22:03 [WIP/RFC] create-spdx: Get SPDX-License-Identifier from source Saul Wold
2022-01-28 22:44 ` Joshua Watt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.