All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] cve-update-db-native: use executemany() to optimise CPE insertion
@ 2019-07-19 20:33 Ross Burton
  2019-07-19 20:33 ` [PATCH 2/3] cve-update-db-native: improve metadata parsing Ross Burton
  2019-07-19 20:33 ` [PATCH 3/3] cve-update-db-native: clean up JSON fetching Ross Burton
  0 siblings, 2 replies; 3+ messages in thread
From: Ross Burton @ 2019-07-19 20:33 UTC (permalink / raw)
  To: openembedded-core

Instead of calling execute() repeatedly, rewrite the function to be a generator
and use executemany() for performance.

Signed-off-by: Ross Burton <ross.burton@intel.com>
---
 .../recipes-core/meta/cve-update-db-native.bb | 85 +++++++------------
 1 file changed, 32 insertions(+), 53 deletions(-)

diff --git a/meta/recipes-core/meta/cve-update-db-native.bb b/meta/recipes-core/meta/cve-update-db-native.bb
index cabbde5066c..09e19c0aaef 100644
--- a/meta/recipes-core/meta/cve-update-db-native.bb
+++ b/meta/recipes-core/meta/cve-update-db-native.bb
@@ -102,70 +102,49 @@ def initialize_db(c):
         VENDOR TEXT, PRODUCT TEXT, VERSION_START TEXT, OPERATOR_START TEXT, \
         VERSION_END TEXT, OPERATOR_END TEXT)")
 
-def insert_elt(c, db_values):
-    query = "insert into PRODUCTS values (?, ?, ?, ?, ?, ?, ?)"
-    c.execute(query, db_values)
-
 def parse_node_and_insert(c, node, cveId):
     # Parse children node if needed
-    try:
-        for child in node['children']:
-            parse_node_and_insert(c, child, cveId)
-    except:
-        pass
-
-    # Exit if the cpe_match node does not exists
-    try:
-        cpe_match = node['cpe_match']
-    except:
-        return
-
-    for cpe in cpe_match:
-        if not cpe['vulnerable']:
-            return
-        cpe23 = cpe['cpe23Uri'].split(':')
-        vendor = cpe23[3]
-        product = cpe23[4]
-        version = cpe23[5]
-
-        if version != '*':
-            # Version is defined, this is a '=' match
-            db_values = [cveId, vendor, product, version, '=', '', '']
-            insert_elt(c, db_values)
-        else:
-            # Parse start version, end version and operators
-            op_start = ''
-            op_end = ''
-            v_start = ''
-            v_end = ''
-
-            try:
-                if cpe['versionStartIncluding']:
+    for child in node.get('children', ()):
+        parse_node_and_insert(c, child, cveId)
+
+    def cpe_generator():
+        for cpe in node.get('cpe_match', ()):
+            if not cpe['vulnerable']:
+                return
+            cpe23 = cpe['cpe23Uri'].split(':')
+            vendor = cpe23[3]
+            product = cpe23[4]
+            version = cpe23[5]
+
+            if version != '*':
+                # Version is defined, this is a '=' match
+                yield [cveId, vendor, product, version, '=', '', '']
+            else:
+                # Parse start version, end version and operators
+                op_start = ''
+                op_end = ''
+                v_start = ''
+                v_end = ''
+
+                if 'versionStartIncluding' in cpe:
                     op_start = '>='
                     v_start = cpe['versionStartIncluding']
-            except:
-                pass
-            try:
-                if cpe['versionStartExcluding']:
+
+                if 'versionStartExcluding' in cpe:
                     op_start = '>'
                     v_start = cpe['versionStartExcluding']
-            except:
-                pass
-            try:
-                if cpe['versionEndIncluding']:
+
+                if 'versionEndIncluding' in cpe:
                     op_end = '<='
                     v_end = cpe['versionEndIncluding']
-            except:
-                pass
-            try:
-                if cpe['versionEndExcluding']:
+
+                if 'versionEndExcluding' in cpe:
                     op_end = '<'
                     v_end = cpe['versionEndExcluding']
-            except:
-                pass
 
-            db_values = [cveId, vendor, product, v_start, op_start, v_end, op_end]
-            insert_elt(c, db_values)
+                yield [cveId, vendor, product, v_start, op_start, v_end, op_end]
+
+    c.executemany("insert into PRODUCTS values (?, ?, ?, ?, ?, ?, ?)", cpe_generator())
 
 def update_db(c, json_filename):
     import json
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/3] cve-update-db-native: improve metadata parsing
  2019-07-19 20:33 [PATCH 1/3] cve-update-db-native: use executemany() to optimise CPE insertion Ross Burton
@ 2019-07-19 20:33 ` Ross Burton
  2019-07-19 20:33 ` [PATCH 3/3] cve-update-db-native: clean up JSON fetching Ross Burton
  1 sibling, 0 replies; 3+ messages in thread
From: Ross Burton @ 2019-07-19 20:33 UTC (permalink / raw)
  To: openembedded-core

The metadata parser is fragile: first it coerces a bytes() to a str() (so the
string is b'LastModifiedDate:2019...'), assumes the first line is the date, and
then uses a regex to parse (which then includes the trailing quote as part of
the date).

Clean this up by parsing the bytes as UTF-8 (ASCII is probably fine, but this is
safer), iterate through the lines and split on colons to find the right
key/value pair.

Signed-off-by: Ross Burton <ross.burton@intel.com>
---
 meta/recipes-core/meta/cve-update-db-native.bb | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/meta/recipes-core/meta/cve-update-db-native.bb b/meta/recipes-core/meta/cve-update-db-native.bb
index 09e19c0aaef..41a2aa8f207 100644
--- a/meta/recipes-core/meta/cve-update-db-native.bb
+++ b/meta/recipes-core/meta/cve-update-db-native.bb
@@ -22,7 +22,7 @@ python do_populate_cve_db() {
     Update NVD database with json data feed
     """
 
-    import sqlite3, urllib, shutil, gzip, re
+    import sqlite3, urllib, shutil, gzip
     from datetime import date
 
     BASE_URL = "https://nvd.nist.gov/feeds/json/cve/1.0/nvdcve-1.0-"
@@ -52,13 +52,15 @@ python do_populate_cve_db() {
         req = urllib.request.Request(meta_url)
         if proxy:
             req.set_proxy(proxy, 'https')
-        try:
-            with urllib.request.urlopen(req, timeout=1) as r:
-                date_line = str(r.read().splitlines()[0])
-                last_modified = re.search('lastModifiedDate:(.*)', date_line).group(1)
-        except:
-            cve_f.write('Warning: CVE db update error, CVE data is outdated.\n\n')
-            break
+        with urllib.request.urlopen(req) as r:
+            for l in r.read().decode("utf-8").splitlines():
+                key, value = l.split(":", 1)
+                if key == "lastModifiedDate":
+                    last_modified = value
+                    break
+            else:
+                bb.warn("Cannot parse CVE metadata, update failed")
+                return
 
         # Compare with current db last modified date
         c.execute("select DATE from META where YEAR = ?", (year,))
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 3/3] cve-update-db-native: clean up JSON fetching
  2019-07-19 20:33 [PATCH 1/3] cve-update-db-native: use executemany() to optimise CPE insertion Ross Burton
  2019-07-19 20:33 ` [PATCH 2/3] cve-update-db-native: improve metadata parsing Ross Burton
@ 2019-07-19 20:33 ` Ross Burton
  1 sibling, 0 replies; 3+ messages in thread
From: Ross Burton @ 2019-07-19 20:33 UTC (permalink / raw)
  To: openembedded-core

Currently the code fetches the compressed JSON, writes it to a temporary file,
uncompresses that with gzip and passes the fake file object to update_db().

Instead, uncompress the gzip'd data in memory and pass the JSON directly to
update_db().

Signed-off-by: Ross Burton <ross.burton@intel.com>
---
 .../recipes-core/meta/cve-update-db-native.bb | 29 ++++++++-----------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/meta/recipes-core/meta/cve-update-db-native.bb b/meta/recipes-core/meta/cve-update-db-native.bb
index 41a2aa8f207..9c083bdc991 100644
--- a/meta/recipes-core/meta/cve-update-db-native.bb
+++ b/meta/recipes-core/meta/cve-update-db-native.bb
@@ -67,25 +67,20 @@ python do_populate_cve_db() {
         meta = c.fetchone()
         if not meta or meta[0] != last_modified:
             # Clear products table entries corresponding to current year
-            cve_year = 'CVE-' + str(year) + '%'
-            c.execute("delete from PRODUCTS where ID like ?", (cve_year,))
+            c.execute("delete from PRODUCTS where ID like ?", ('CVE-%d%%' % year,))
 
             # Update db with current year json file
-            req = urllib.request.Request(json_url)
-            if proxy:
-                req.set_proxy(proxy, 'https')
             try:
-                with urllib.request.urlopen(req, timeout=1) as r, \
-                     open(json_tmpfile, 'wb') as tmpfile:
-                    shutil.copyfileobj(r, tmpfile)
-            except:
+                req = urllib.request.Request(json_url)
+                if proxy:
+                    req.set_proxy(proxy, 'https')
+                with urllib.request.urlopen(req) as r:
+                    update_db(c, gzip.decompress(r.read()))
+                c.execute("insert or replace into META values (?, ?)", [year, last_modified])
+            except urllib.error.URLError as e:
                 cve_f.write('Warning: CVE db update error, CVE data is outdated.\n\n')
-                break
-
-            with gzip.open(json_tmpfile, 'rt') as jsonfile:
-                update_db(c, jsonfile)
-            c.execute("insert or replace into META values (?, ?)",
-                    [year, last_modified])
+                bb.warn("Cannot parse CVE data (%s), update failed" % e.reason)
+                return
 
         # Update success, set the date to cve_check file.
         if year == date.today().year:
@@ -148,9 +143,9 @@ def parse_node_and_insert(c, node, cveId):
 
     c.executemany("insert into PRODUCTS values (?, ?, ?, ?, ?, ?, ?)", cpe_generator())
 
-def update_db(c, json_filename):
+def update_db(c, jsondata):
     import json
-    root = json.load(json_filename)
+    root = json.loads(jsondata)
 
     for elt in root['CVE_Items']:
         if not elt['impact']:
-- 
2.20.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-07-19 20:33 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-19 20:33 [PATCH 1/3] cve-update-db-native: use executemany() to optimise CPE insertion Ross Burton
2019-07-19 20:33 ` [PATCH 2/3] cve-update-db-native: improve metadata parsing Ross Burton
2019-07-19 20:33 ` [PATCH 3/3] cve-update-db-native: clean up JSON fetching Ross Burton

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.