signatures.lore.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/10] Entirely fake patch set
@ 2020-10-02 23:29 Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 01/10] Use shorter cache file names Konstantin Ryabitsev
                   ` (9 more replies)
  0 siblings, 10 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

This is an entirely fake patchset used for testing in-header patch
signing.

Konstantin Ryabitsev (10):
  Use shorter cache file names
  Preserve trailer order by default
  Don't force trailers into a set
  Initial go at supporting [extra trailer data]
  Tighten follow-up header parsing
  Use a more precise regex for email trailers
  Set charset in order to generate MIME headers
  Use bytes when dumping to stdout
  Don't crash when no valid patches are found
  Fix some cherry-picking corner cases

 b4/__init__.py | 269 ++++++++++++++++++++++++++++++-------------------
 b4/diff.py     |  14 +--
 b4/mbox.py     |  28 ++---
 b4/ty.py       |   4 +-
 man/b4.5       |  17 ++--
 man/b4.5.rst   |   9 +-
 6 files changed, 204 insertions(+), 137 deletions(-)

-- 
2.26.2



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 01/10] Use shorter cache file names
  2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
@ 2020-10-02 23:29 ` Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 02/10] Preserve trailer order by default Konstantin Ryabitsev
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

Use shorter cache filenames to avoid running into OSError.

Reported-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
---
 b4/__init__.py | 90 +++++++++++++++++++++++++++++++-------------------
 b4/diff.py     | 11 +++---
 2 files changed, 61 insertions(+), 40 deletions(-)

diff --git a/b4/__init__.py b/b4/__init__.py
index 1af313b..4325121 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -641,8 +641,6 @@ class LoreSeries:
 
     def make_fake_am_range(self, gitdir):
         start_commit = end_commit = None
-        # Do we have it in cache already?
-        cachedir = get_cache_dir()
         # Use the msgid of the first non-None patch in the series
         msgid = None
         for lmsg in self.patches:
@@ -652,16 +650,14 @@ class LoreSeries:
         if msgid is None:
             logger.critical('Cannot operate on an empty series')
             return None, None
-        cachefile = os.path.join(cachedir, '%s.fakeam' % urllib.parse.quote_plus(msgid))
-        if os.path.exists(cachefile):
+        cachedata = get_cache(msgid, suffix='fakeam')
+        if cachedata:
             stalecache = False
-            with open(cachefile, 'r') as fh:
-                cachedata = fh.read()
-                chunks = cachedata.strip().split()
-                if len(chunks) == 2:
-                    start_commit, end_commit = chunks
-                else:
-                    stalecache = True
+            chunks = cachedata.strip().split()
+            if len(chunks) == 2:
+                start_commit, end_commit = chunks
+            else:
+                stalecache = True
             if start_commit is not None and end_commit is not None:
                 # Make sure they are still there
                 ecode, out = git_run_command(gitdir, ['cat-file', '-e', start_commit])
@@ -677,7 +673,7 @@ class LoreSeries:
 
             if stalecache:
                 logger.debug('Stale cache for [v%s] %s', self.revision, self.subject)
-                os.unlink(cachefile)
+                save_cache(None, msgid, suffix='fakeam')
 
         logger.info('Preparing fake-am for v%s: %s', self.revision, self.subject)
         with git_temp_worktree(gitdir):
@@ -744,10 +740,9 @@ class LoreSeries:
             end_commit = out.strip()
             logger.info('  range: %.12s..%.12s', start_commit, end_commit)
 
-        with open(cachefile, 'w') as fh:
-            logger.debug('Saving into cache: %s', cachefile)
-            logger.debug('    %s..%s', start_commit, end_commit)
-            fh.write(f'{start_commit} {end_commit}\n')
+        logger.debug('Saving into cache:')
+        logger.debug('    %s..%s', start_commit, end_commit)
+        save_cache(f'{start_commit} {end_commit}\n', msgid, suffix='fakeam')
 
         return start_commit, end_commit
 
@@ -1577,12 +1572,7 @@ class LoreAttestationDocument:
 
         if source.find('http') == 0:
             # We only cache known-good attestations obtained from remote
-            cachedir = get_cache_dir()
-            cachename = '%s.attestation' % urllib.parse.quote_plus(source.strip('/').split('/')[-1])
-            fullpath = os.path.join(cachedir, cachename)
-            with open(fullpath, 'w') as fh:
-                logger.debug('Saved attestation in cache: %s', cachename)
-                fh.write(sigdata)
+            save_cache(sigdata, source, suffix='attestation')
 
         hg = [None, None, None]
         for line in sigdata.split('\n'):
@@ -1636,15 +1626,13 @@ class LoreAttestationDocument:
         attdocs = list()
         # XXX: Querying this via the Atom feed is a temporary kludge until we have
         #      proper search API on lore.kernel.org
-        cachedir = get_cache_dir()
-        cachefile = os.path.join(cachedir, '%s.lookup' % urllib.parse.quote_plus(attid))
         status = None
-        if os.path.exists(cachefile):
-            with open(cachefile, 'r') as fh:
-                try:
-                    status = int(fh.read())
-                except ValueError:
-                    pass
+        cachedata = get_cache(attid, suffix='lookup')
+        if cachedata:
+            try:
+                status = int(cachedata)
+            except ValueError:
+                pass
         if status is not None and status != 200:
             logger.debug('Cache says looking up %s = %s', attid, status)
             return attdocs
@@ -1657,8 +1645,7 @@ class LoreAttestationDocument:
         resp = session.get(queryurl)
         if resp.status_code != 200:
             # Record this as a bad hit
-            with open(cachefile, 'w') as fh:
-                fh.write(str(resp.status_code))
+            save_cache(str(resp.status_code), attid, suffix='lookup')
 
         matches = re.findall(
             r'link\s+href="([^"]+)".*?(-----BEGIN PGP SIGNED MESSAGE-----.*?-----END PGP SIGNATURE-----)',
@@ -1885,6 +1872,42 @@ def get_cache_dir():
     return cachedir
 
 
+def get_cache_file(identifier, suffix=None):
+    cachedir = get_cache_dir()
+    cachefile = hashlib.sha1(identifier.encode()).hexdigest()
+    if suffix:
+        cachefile = f'{cachefile}.{suffix}'
+    return os.path.join(cachedir, cachefile)
+
+
+def get_cache(identifier, suffix=None):
+    fullpath = get_cache_file(identifier, suffix=suffix)
+    try:
+        with open(fullpath) as fh:
+            logger.debug('Using cache %s for %s', fullpath, identifier)
+            return fh.read()
+    except FileNotFoundError:
+        logger.debug('Cache miss for %s', identifier)
+    return None
+
+
+def save_cache(contents, identifier, suffix=None, mode='w'):
+    fullpath = get_cache_file(identifier, suffix=suffix)
+    if not contents:
+        # noinspection PyBroadException
+        try:
+            os.unlink(fullpath)
+            logger.debug('Removed cache %s for %s', fullpath, identifier)
+        except:
+            pass
+    try:
+        with open(fullpath, mode) as fh:
+            fh.write(contents)
+            logger.debug('Saved cache %s for %s', fullpath, identifier)
+    except FileNotFoundError:
+        logger.debug('Could not write cache %s for %s', fullpath, identifier)
+
+
 def get_user_config():
     global USER_CONFIG
     if USER_CONFIG is None:
@@ -1998,8 +2021,7 @@ def save_strict_thread(in_mbx, out_mbx, msgid):
 
 
 def get_pi_thread_by_url(t_mbx_url, savefile, nocache=False):
-    cachedir = get_cache_dir()
-    cachefile = os.path.join(cachedir, '%s.pi.mbx' % urllib.parse.quote_plus(t_mbx_url))
+    cachefile = get_cache_file(t_mbx_url, 'pi.mbx')
     if os.path.exists(cachefile) and not nocache:
         logger.debug('Using cached copy: %s', cachefile)
         shutil.copyfile(cachefile, savefile)
diff --git a/b4/diff.py b/b4/diff.py
index 7e9125b..81e015a 100644
--- a/b4/diff.py
+++ b/b4/diff.py
@@ -11,7 +11,6 @@ import b4
 import b4.mbox
 import mailbox
 import shutil
-import urllib.parse
 
 from tempfile import mkstemp
 
@@ -29,13 +28,13 @@ def diff_same_thread_series(cmdargs):
     # start by grabbing the mbox provided
     savefile = mkstemp('b4-diff-to')[1]
     # Do we have a cache of this lookup?
-    cachedir = b4.get_cache_dir()
-    cachebase = urllib.parse.quote_plus(msgid)
+    identifier = msgid
     if wantvers:
-        cachebase += '-' + '-'.join([str(x) for x in wantvers])
+        identifier += '-' + '-'.join([str(x) for x in wantvers])
     if cmdargs.useproject:
-        cachebase += '-' + cmdargs.useproject
-    cachefile = os.path.join(cachedir, '%s.diff.mbx' % cachebase)
+        identifier += '-' + cmdargs.useproject
+
+    cachefile = b4.get_cache_file(identifier, suffix='diff.mbx')
     if os.path.exists(cachefile) and not cmdargs.nocache:
         logger.info('Using cached copy of the lookup')
         shutil.copyfile(cachefile, savefile)
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 02/10] Preserve trailer order by default
  2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 01/10] Use shorter cache file names Konstantin Ryabitsev
@ 2020-10-02 23:29 ` Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 03/10] Don't force trailers into a set Konstantin Ryabitsev
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

Per discussion on the users list, preserve the trailer order by default.
There is no agreement on whether this is a hard requirement for patches
or not, but there is general consensus that the default should be to
make as few changes to incoming patches as possible.

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
---
 b4/__init__.py | 43 +++++++++++++++++++++++++------------------
 man/b4.5       | 17 ++++++++---------
 man/b4.5.rst   |  9 +++++----
 3 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/b4/__init__.py b/b4/__init__.py
index 4325121..2f6d1ba 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -66,7 +66,12 @@ WANTHDRS = [
 # [b4]
 #   # remember to end with ,*
 #   trailer-order=link*,fixes*,cc*,reported*,suggested*,original*,co-*,tested*,reviewed*,acked*,signed-off*,*
-DEFAULT_TRAILER_ORDER = 'fixes*,reported*,suggested*,original*,co-*,signed-off*,tested*,reviewed*,acked*,cc*,link*,*'
+#   (another common)
+#   trailer-order=fixes*,reported*,suggested*,original*,co-*,signed-off*,tested*,reviewed*,acked*,cc*,link*,*
+#
+# Or use _preserve_ (alias to *) to keep the order unchanged
+
+DEFAULT_TRAILER_ORDER = '*'
 
 LOREADDR = 'https://lore.kernel.org'
 
@@ -300,17 +305,17 @@ class LoreMailbox:
                         pmsg.load_hashes()
                         attid = pmsg.attestation.attid
                         if attid not in self.trailer_map:
-                            self.trailer_map[attid] = set()
-                        self.trailer_map[attid].update(trailers)
-                    pmsg.followup_trailers.update(trailers)
+                            self.trailer_map[attid] = list()
+                        self.trailer_map[attid] += trailers
+                    pmsg.followup_trailers += trailers
                     break
                 if not pmsg.reply:
                     # Could be a cover letter
-                    pmsg.followup_trailers.update(trailers)
+                    pmsg.followup_trailers += trailers
                     break
                 if pmsg.in_reply_to and pmsg.in_reply_to in self.msgid_map:
                     lvl += 1
-                    trailers.update(pmsg.trailers)
+                    trailers += pmsg.trailers
                     pmsg = self.msgid_map[pmsg.in_reply_to]
                     continue
                 break
@@ -321,7 +326,7 @@ class LoreMailbox:
                 continue
             lmsg.load_hashes()
             if lmsg.attestation.attid in self.trailer_map:
-                lmsg.followup_trailers.update(self.trailer_map[lmsg.attestation.attid])
+                lmsg.followup_trailers += self.trailer_map[lmsg.attestation.attid]
 
         return lser
 
@@ -505,11 +510,11 @@ class LoreSeries:
                 continue
             if lmsg is not None:
                 if self.has_cover and covertrailers and self.patches[0].followup_trailers:
-                    lmsg.followup_trailers.update(self.patches[0].followup_trailers)
+                    lmsg.followup_trailers += self.patches[0].followup_trailers
                 if addmysob:
-                    lmsg.followup_trailers.add(('Signed-off-by', '%s <%s>' % (usercfg['name'], usercfg['email'])))
+                    lmsg.followup_trailers.append(('Signed-off-by', '%s <%s>' % (usercfg['name'], usercfg['email'])))
                 if addlink:
-                    lmsg.followup_trailers.add(('Link', linkmask % lmsg.msgid))
+                    lmsg.followup_trailers.append(('Link', linkmask % lmsg.msgid))
 
                 if attpolicy != 'off':
                     lore_lookup = False
@@ -780,8 +785,8 @@ class LoreMessage:
         self.charset = 'utf-8'
         self.has_diff = False
         self.has_diffstat = False
-        self.trailers = set()
-        self.followup_trailers = set()
+        self.trailers = list()
+        self.followup_trailers = list()
 
         # These are populated by pr
         self.pr_base_commit = None
@@ -879,7 +884,7 @@ class LoreMessage:
             matches = re.findall(r'^\s*Fixes:[ \t]+([a-f0-9]+\s+\(.*\))\s*$', self.body, re.MULTILINE)
             if matches:
                 for tvalue in matches:
-                    self.trailers.add(('Fixes', tvalue))
+                    self.trailers.append(('Fixes', tvalue))
 
             # Do we have something that looks like a person-trailer?
             matches = re.findall(r'^\s*([\w-]{2,}):[ \t]+(.*<\S+>)\s*$', self.body, re.MULTILINE)
@@ -888,17 +893,17 @@ class LoreMessage:
             if matches:
                 for tname, tvalue in matches:
                     if tname.lower() not in badtrailers:
-                        self.trailers.add((tname, tvalue))
+                        self.trailers.append((tname, tvalue))
 
     def get_trailers(self, sloppy=False):
         mismatches = set()
         if sloppy:
             return set(self.trailers), mismatches
 
-        trailers = set()
+        trailers = list()
         for tname, tvalue in self.trailers:
             if tname.lower() in ('fixes',):
-                trailers.add((tname, tvalue))
+                trailers.append((tname, tvalue))
                 continue
 
             tmatch = False
@@ -935,7 +940,7 @@ class LoreMessage:
                     logger.debug('  trailer fuzzy name match')
                     tmatch = True
             if tmatch:
-                trailers.add((tname, tvalue))
+                trailers.append((tname, tvalue))
             else:
                 mismatches.add((tname, tvalue))
 
@@ -1263,10 +1268,12 @@ class LoreMessage:
     def fix_trailers(self, trailer_order=None):
         bheaders, message, btrailers, basement, signature = LoreMessage.get_body_parts(self.body)
         # Now we add mix-in trailers
-        trailers = btrailers + list(self.followup_trailers)
+        trailers = btrailers + self.followup_trailers
         fixtrailers = list()
         if trailer_order is None:
             trailer_order = DEFAULT_TRAILER_ORDER
+        elif trailer_order in ('preserve', '_preserve_'):
+            trailer_order = '*'
         for trailermatch in trailer_order:
             for trailer in trailers:
                 if trailer in fixtrailers:
diff --git a/man/b4.5 b/man/b4.5
index 95876b8..cdbd1d0 100644
--- a/man/b4.5
+++ b/man/b4.5
@@ -316,8 +316,6 @@ Save diff into this file instead of outputting to stdout
 .B \-c\fP,\fB  \-\-color
 Force color output even when writing to file
 .UNINDENT
-.IP "System Message: WARNING/2 (b4.5.rst:, line 201)"
-Option list ends without a blank line; unexpected unindent.
 .INDENT 0.0
 .TP
 .B \-m AMBOX AMBOX, \-\-compare\-am\-mboxes AMBOX AMBOX
@@ -326,7 +324,7 @@ Compare two mbx files prepared with "b4 am"
 .UNINDENT
 .UNINDENT
 .sp
-\fIExample\fP: b4 diff
+\fIExample\fP: b4 diff \fI\%20200526205322.23465\-1\-mic@digikod.net\fP
 .SH CONFIGURATION
 .sp
 B4 configuration is handled via git\-config(1), so you can store it in
@@ -344,13 +342,14 @@ Default configuration, with explanations:
    midmask = https://lore.kernel.org/r/%s\(aq
    #
    # When recording Link: trailers, use this mask
-   linkmask = https://lore.kernel.org/r/%s\(aq
+   linkmask = https://lore.kernel.org/r/%s
    #
-   # When processing thread trailers, use this order. Can use shell\-globbing
-   # and must end with ,*
-   # Common alternative order:
+   # When processing thread trailers, sort them in this order.
+   # Can use shell\-globbing and must end with ,*
+   # Some sorting orders:
    #trailer\-order=link*,fixes*,cc*,reported*,suggested*,original*,co\-*,tested*,reviewed*,acked*,signed\-off*,*
-   trailer\-order = fixes*,reported*,suggested*,original*,co\-*,signed\-off*,tested*,reviewed*,acked*,cc*,link*,*
+   #trailer\-order = fixes*,reported*,suggested*,original*,co\-*,signed\-off*,tested*,reviewed*,acked*,cc*,link*,*
+   trailer\-order = _preserve_
    #
    # Attestation\-checking configuration parameters
    # off: do not bother checking attestation
@@ -389,7 +388,7 @@ Default configuration, with explanations:
    # How long to keep downloaded threads in cache (minutes)?
    cache\-expire = 10
    # Used when creating summaries for b4 ty, and can be set to a value like
-   # thanks\-commit\-url\-mask = https://git.kernel.org/username/c/%.10s
+   # thanks\-commit\-url\-mask = https://git.kernel.org/username/c/%.12s
    # See this page for more info on convenient git.kernel.org shorterners:
    # https://korg.wiki.kernel.org/userdoc/git\-url\-shorterners
    thanks\-commit\-url\-mask = None
diff --git a/man/b4.5.rst b/man/b4.5.rst
index 2591a04..9e0d995 100644
--- a/man/b4.5.rst
+++ b/man/b4.5.rst
@@ -219,11 +219,12 @@ Default configuration, with explanations::
       # When recording Link: trailers, use this mask
       linkmask = https://lore.kernel.org/r/%s
       #
-      # When processing thread trailers, use this order. Can use shell-globbing
-      # and must end with ,*
-      # Common alternative order:
+      # When processing thread trailers, sort them in this order.
+      # Can use shell-globbing and must end with ,*
+      # Some sorting orders:
       #trailer-order=link*,fixes*,cc*,reported*,suggested*,original*,co-*,tested*,reviewed*,acked*,signed-off*,*
-      trailer-order = fixes*,reported*,suggested*,original*,co-*,signed-off*,tested*,reviewed*,acked*,cc*,link*,*
+      #trailer-order = fixes*,reported*,suggested*,original*,co-*,signed-off*,tested*,reviewed*,acked*,cc*,link*,*
+      trailer-order = _preserve_
       #
       # Attestation-checking configuration parameters
       # off: do not bother checking attestation
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 03/10] Don't force trailers into a set
  2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 01/10] Use shorter cache file names Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 02/10] Preserve trailer order by default Konstantin Ryabitsev
@ 2020-10-02 23:29 ` Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 04/10] Initial go at supporting [extra trailer data] Konstantin Ryabitsev
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

Fixes sloppy-trailers.

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
---
 b4/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/b4/__init__.py b/b4/__init__.py
index 2f6d1ba..630c1b2 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -898,7 +898,7 @@ class LoreMessage:
     def get_trailers(self, sloppy=False):
         mismatches = set()
         if sloppy:
-            return set(self.trailers), mismatches
+            return self.trailers, mismatches
 
         trailers = list()
         for tname, tvalue in self.trailers:
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 04/10] Initial go at supporting [extra trailer data]
  2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
                   ` (2 preceding siblings ...)
  2020-10-02 23:29 ` [PATCH 03/10] Don't force trailers into a set Konstantin Ryabitsev
@ 2020-10-02 23:29 ` Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 05/10] Tighten follow-up header parsing Konstantin Ryabitsev
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

A common request is to support trailers that contain extra data in the
following format:

Reviewed-by: D. Eveloper <d.eveloper@example.com>
[for the code in foo.h]

This should do the right thing now, and moves trailer searching into one
place instead of being reimplemented twice.

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
---
 b4/__init__.py | 122 ++++++++++++++++++++++++++++---------------------
 1 file changed, 69 insertions(+), 53 deletions(-)

diff --git a/b4/__init__.py b/b4/__init__.py
index 630c1b2..1cebe2b 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -288,7 +288,7 @@ class LoreMailbox:
                 continue
 
             trailers, mismatches = fmsg.get_trailers(sloppy=sloppytrailers)
-            for tname, tvalue in mismatches:
+            for tname, tvalue, extdata in mismatches:
                 lser.trailer_mismatches.add((tname, tvalue, fmsg.fromname, fmsg.fromemail))
             lvl = 1
             while True:
@@ -512,9 +512,10 @@ class LoreSeries:
                 if self.has_cover and covertrailers and self.patches[0].followup_trailers:
                     lmsg.followup_trailers += self.patches[0].followup_trailers
                 if addmysob:
-                    lmsg.followup_trailers.append(('Signed-off-by', '%s <%s>' % (usercfg['name'], usercfg['email'])))
+                    lmsg.followup_trailers.append(('Signed-off-by',
+                                                   '%s <%s>' % (usercfg['name'], usercfg['email']), None))
                 if addlink:
-                    lmsg.followup_trailers.append(('Link', linkmask % lmsg.msgid))
+                    lmsg.followup_trailers.append(('Link', linkmask % lmsg.msgid, None))
 
                 if attpolicy != 'off':
                     lore_lookup = False
@@ -880,20 +881,12 @@ class LoreMessage:
 
         # We only pay attention to trailers that are sent in reply
         if self.reply:
-            # Do we have a Fixes: trailer?
-            matches = re.findall(r'^\s*Fixes:[ \t]+([a-f0-9]+\s+\(.*\))\s*$', self.body, re.MULTILINE)
-            if matches:
-                for tvalue in matches:
-                    self.trailers.append(('Fixes', tvalue))
-
-            # Do we have something that looks like a person-trailer?
-            matches = re.findall(r'^\s*([\w-]{2,}):[ \t]+(.*<\S+>)\s*$', self.body, re.MULTILINE)
-            # These are commonly part of patch/commit metadata
-            badtrailers = ('from', 'author', 'cc')
-            if matches:
-                for tname, tvalue in matches:
-                    if tname.lower() not in badtrailers:
-                        self.trailers.append((tname, tvalue))
+            trailers, others = LoreMessage.find_trailers(self.body)
+            for trailer in trailers:
+                # These are commonly part of patch/commit metadata
+                badtrailers = ('from', 'author', 'cc')
+                if trailer[0].lower() not in badtrailers:
+                    self.trailers.append(trailer)
 
     def get_trailers(self, sloppy=False):
         mismatches = set()
@@ -901,9 +894,9 @@ class LoreMessage:
             return self.trailers, mismatches
 
         trailers = list()
-        for tname, tvalue in self.trailers:
+        for tname, tvalue, extdata in self.trailers:
             if tname.lower() in ('fixes',):
-                trailers.append((tname, tvalue))
+                trailers.append((tname, tvalue, extdata))
                 continue
 
             tmatch = False
@@ -940,9 +933,9 @@ class LoreMessage:
                     logger.debug('  trailer fuzzy name match')
                     tmatch = True
             if tmatch:
-                trailers.append((tname, tvalue))
+                trailers.append((tname, tvalue, extdata))
             else:
-                mismatches.add((tname, tvalue))
+                mismatches.add((tname, tvalue, extdata))
 
         return trailers, mismatches
 
@@ -1184,6 +1177,43 @@ class LoreMessage:
         if i and m and p:
             self.attestation = LoreAttestation(i, m, p)
 
+    @staticmethod
+    def find_trailers(body):
+        # Fix some more common copypasta trailer wrapping
+        # Fixes: abcd0123 (foo bar
+        # baz quux)
+        body = re.sub(r'^(\S+:\s+[0-9a-f]+\s+\([^)]+)\n([^\n]+\))', r'\1 \2', body, flags=re.M)
+        # Signed-off-by: Long Name
+        # <email.here@example.com>
+        body = re.sub(r'^(\S+:\s+[^<]+)\n(<[^>]+>)$', r'\1 \2', body, flags=re.M)
+        # Signed-off-by: Foo foo <foo@foo.com>
+        # [for the thing that the thing is too long the thing that is
+        # thing but thing]
+        body = re.sub(r'^(\[[^]]+)\n([^]]+]$)', r'\1 \2', body, flags=re.M)
+        trailers = list()
+        others = list()
+        was_trailer = False
+        for line in body.split('\n'):
+            line = line.strip('\r')
+            matches = re.search(r'^(\w\S+):\s+(\S.*)', line, flags=re.I)
+            if matches:
+                was_trailer = True
+                groups = list(matches.groups())
+                groups.append(None)
+                trailers.append(groups)
+                continue
+            # Is it an extended info line, e.g.:
+            # Signed-off-by: Foo Foo <foo@foo.com>
+            # [for the foo bits]
+            if len(line) > 2 and line[0] == '[' and line[-1] == ']' and was_trailer:
+                trailers[-1][2] = line
+                was_trailer = False
+                continue
+            was_trailer = False
+            others.append(line)
+
+        return trailers, others
+
     @staticmethod
     def get_body_parts(body):
         # remove any starting/trailing blank lines
@@ -1193,8 +1223,6 @@ class LoreMessage:
         githeaders = list()
         # commit message
         message = ''
-        # all trailers we find preceding the ---
-        trailers = list()
         # everything below the ---
         basement = ''
         # conformant signature --\s\n
@@ -1215,35 +1243,20 @@ class LoreMessage:
 
         mbody = parts[0].strip('\n')
 
-        # Fix some more common copypasta trailer wrapping
-        # Fixes: abcd0123 (foo bar
-        # baz quux)
-        mbody = re.sub(r'^(\S+:\s+[0-9a-f]+\s+\([^)]+)\n([^\n]+\))', r'\1 \2', mbody, flags=re.M)
-        # Signed-off-by: Long Name
-        # <email.here@example.com>
-        mbody = re.sub(r'^(\S+:\s+[^<]+)\n(<[^>]+>)', r'\1 \2', mbody, flags=re.M)
-
         # Split into paragraphs
         bpara = mbody.split('\n\n')
 
         # Is every line of the first part in a header format?
         mparts = list()
-        for line in bpara[0].split('\n'):
-            matches = re.search(r'^(\w\S+):\s+(\S.*)', line, re.I | re.M)
-            if not matches:
-                githeaders = list()
-                mparts.append(bpara[0])
-                break
-            githeaders.append(matches.groups())
+        h, o = LoreMessage.find_trailers(bpara[0])
+        if len(o):
+            # Not everything was a header, so we don't treat it as headers
+            mparts.append(bpara[0])
+        else:
+            githeaders = h
 
         # Any lines of the last part match the header format?
-        nlines = list()
-        for line in bpara[-1].split('\n'):
-            matches = re.search(r'^(\w\S+):\s+(\S.*)', line, re.I | re.M)
-            if matches:
-                trailers.append(matches.groups())
-                continue
-            nlines.append(line)
+        trailers, nlines = LoreMessage.find_trailers(bpara[-1])
 
         if len(bpara) == 1:
             if githeaders == trailers:
@@ -1282,16 +1295,17 @@ class LoreMessage:
                 if fnmatch.fnmatch(trailer[0].lower(), trailermatch.strip()):
                     fixtrailers.append(trailer)
                     if trailer not in btrailers:
-                        logger.info('    + %s: %s' % trailer)
+                        logger.info('    + %s: %s' % (trailer[0], trailer[1]))
                     else:
-                        logger.debug('    . %s: %s' % trailer)
+                        logger.debug('    . %s: %s' % (trailer[0], trailer[1]))
 
         # Reconstitute the message
+        self.body = ''
         if bheaders:
-            self.body = '\n'.join('%s: %s' % h for h in bheaders)
-            self.body += '\n\n'
-        else:
-            self.body = ''
+            for bheader in bheaders:
+                # There is no [extdata] in git headers, so we ignore bheader[2]
+                self.body += '%s: %s\n' % (bheader[0], bheader[1])
+            self.body += '\n'
 
         if len(message):
             self.body += message + '\n'
@@ -1299,8 +1313,10 @@ class LoreMessage:
                 self.body += '\n'
 
         if len(fixtrailers):
-            self.body += '\n'.join('%s: %s' % t for t in fixtrailers)
-            self.body += '\n'
+            for trailer in fixtrailers:
+                self.body += '%s: %s\n' % (trailer[0], trailer[1])
+                if trailer[2]:
+                    self.body += '%s\n' % trailer[2]
         if len(basement):
             self.body += '---\n'
             self.body += basement
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 05/10] Tighten follow-up header parsing
  2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
                   ` (3 preceding siblings ...)
  2020-10-02 23:29 ` [PATCH 04/10] Initial go at supporting [extra trailer data] Konstantin Ryabitsev
@ 2020-10-02 23:29 ` Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 06/10] Use a more precise regex for email trailers Konstantin Ryabitsev
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

The combined routine was too broad for parsing follow-up messages, so
this tightens it to avoid too many false positive matches.

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
---
 b4/__init__.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/b4/__init__.py b/b4/__init__.py
index 1cebe2b..d4a67a5 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -1179,6 +1179,8 @@ class LoreMessage:
 
     @staticmethod
     def find_trailers(body):
+        headers = ('subject', 'date', 'from')
+        nonperson = ('fixes', 'subject', 'date')
         # Fix some more common copypasta trailer wrapping
         # Fixes: abcd0123 (foo bar
         # baz quux)
@@ -1189,7 +1191,8 @@ class LoreMessage:
         # Signed-off-by: Foo foo <foo@foo.com>
         # [for the thing that the thing is too long the thing that is
         # thing but thing]
-        body = re.sub(r'^(\[[^]]+)\n([^]]+]$)', r'\1 \2', body, flags=re.M)
+        # (too false-positivey, commented out)
+        # body = re.sub(r'^(\[[^]]+)\n([^]]+]$)', r'\1 \2', body, flags=re.M)
         trailers = list()
         others = list()
         was_trailer = False
@@ -1197,8 +1200,17 @@ class LoreMessage:
             line = line.strip('\r')
             matches = re.search(r'^(\w\S+):\s+(\S.*)', line, flags=re.I)
             if matches:
-                was_trailer = True
                 groups = list(matches.groups())
+                # We only accept headers if we haven't seen any non-trailer lines
+                tname = groups[0].lower()
+                if len(others) and tname in headers:
+                    logger.debug('Ignoring %s (header after other content)', line)
+                    continue
+                mperson = re.search(r'<[^>]+>', groups[1])
+                if not mperson and tname not in nonperson:
+                    logger.debug('Ignoring %s (not a recognized non-person trailer)', line)
+                    continue
+                was_trailer = True
                 groups.append(None)
                 trailers.append(groups)
                 continue
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 06/10] Use a more precise regex for email trailers
  2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
                   ` (4 preceding siblings ...)
  2020-10-02 23:29 ` [PATCH 05/10] Tighten follow-up header parsing Konstantin Ryabitsev
@ 2020-10-02 23:29 ` Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 07/10] Set charset in order to generate MIME headers Konstantin Ryabitsev
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

Still seeing false-positives for personal follow-up trailers, so tighten
a regex a bit further to make sure we don't match bogus content.

Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
---
 b4/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/b4/__init__.py b/b4/__init__.py
index d4a67a5..f0d5d24 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -1206,7 +1206,7 @@ class LoreMessage:
                 if len(others) and tname in headers:
                     logger.debug('Ignoring %s (header after other content)', line)
                     continue
-                mperson = re.search(r'<[^>]+>', groups[1])
+                mperson = re.search(r'<\S+@\S+\.\S+>', groups[1])
                 if not mperson and tname not in nonperson:
                     logger.debug('Ignoring %s (not a recognized non-person trailer)', line)
                     continue
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 07/10] Set charset in order to generate MIME headers
  2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
                   ` (5 preceding siblings ...)
  2020-10-02 23:29 ` [PATCH 06/10] Use a more precise regex for email trailers Konstantin Ryabitsev
@ 2020-10-02 23:29 ` Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 08/10] Use bytes when dumping to stdout Konstantin Ryabitsev
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

Apparently, merely passing policy= doesn't generate the full set of
required headers, so make sure we do set_charset('utf-8').

Reported-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
---
 b4/ty.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/b4/ty.py b/b4/ty.py
index 382ce93..cbee06a 100644
--- a/b4/ty.py
+++ b/b4/ty.py
@@ -426,9 +426,9 @@ def send_messages(listing, gitdir, outdir, branch, since='1.week'):
         outgoing += 1
         outfile = os.path.join(outdir, '%s.thanks' % slug)
         logger.info('  Writing: %s', outfile)
-        bout = msg.as_string(policy=b4.emlpolicy)
+        msg.set_charset('utf-8')
         with open(outfile, 'wb') as fh:
-            fh.write(bout.encode('utf-8'))
+            fh.write(msg.as_bytes())
         logger.debug('Cleaning up: %s', jsondata['trackfile'])
         fullpath = os.path.join(datadir, jsondata['trackfile'])
         os.rename(fullpath, '%s.sent' % fullpath)
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 08/10] Use bytes when dumping to stdout
  2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
                   ` (6 preceding siblings ...)
  2020-10-02 23:29 ` [PATCH 07/10] Set charset in order to generate MIME headers Konstantin Ryabitsev
@ 2020-10-02 23:29 ` Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 09/10] Don't crash when no valid patches are found Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 10/10] Fix some cherry-picking corner cases Konstantin Ryabitsev
  9 siblings, 0 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

We don't need to needlessly convert to unicode when dumping to stdout,
especially when it can lead to crashers when we encounter other
charsets.

Reported-by: Rob Herring <robh@kernel.org>
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
---
 b4/mbox.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/b4/mbox.py b/b4/mbox.py
index fb82389..754d0f8 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -234,8 +234,8 @@ def mbox_to_am(mboxfile, cmdargs):
     am_mbx.close()
     if cmdargs.outdir == '-':
         logger.info('---')
-        with open(am_filename, 'r') as fh:
-            shutil.copyfileobj(fh, sys.stdout)
+        with open(am_filename, 'rb') as fh:
+            shutil.copyfileobj(fh, sys.stdout.buffer)
         os.unlink(am_filename)
 
     thanks_record_am(lser, cherrypick=cherrypick)
@@ -511,8 +511,8 @@ def main(cmdargs):
         mbx.close()
         if cmdargs.outdir == '-':
             logger.info('---')
-            with open(threadmbox, 'r') as fh:
-                shutil.copyfileobj(fh, sys.stdout)
+            with open(threadmbox, 'rb') as fh:
+                shutil.copyfileobj(fh, sys.stdout.buffer)
             os.unlink(threadmbox)
             return
 
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 09/10] Don't crash when no valid patches are found
  2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
                   ` (7 preceding siblings ...)
  2020-10-02 23:29 ` [PATCH 08/10] Use bytes when dumping to stdout Konstantin Ryabitsev
@ 2020-10-02 23:29 ` Konstantin Ryabitsev
  2020-10-02 23:29 ` [PATCH 10/10] Fix some cherry-picking corner cases Konstantin Ryabitsev
  9 siblings, 0 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

Error out when we don't find any patches in an mbox when trying to diff
series.

Reported-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
---
 b4/diff.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/b4/diff.py b/b4/diff.py
index 81e015a..10f3159 100644
--- a/b4/diff.py
+++ b/b4/diff.py
@@ -101,6 +101,9 @@ def diff_mboxes(cmdargs):
         lmbx = b4.LoreMailbox()
         for key, msg in mbx.items():
             lmbx.add_message(msg)
+        if len(lmbx.series) < 1:
+            logger.critical('No valid patches found in %s', mboxfile)
+            sys.exit(1)
         if len(lmbx.series) > 1:
             logger.critical('More than one series version in %s, will use latest', mboxfile)
 
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 10/10] Fix some cherry-picking corner cases
  2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
                   ` (8 preceding siblings ...)
  2020-10-02 23:29 ` [PATCH 09/10] Don't crash when no valid patches are found Konstantin Ryabitsev
@ 2020-10-02 23:29 ` Konstantin Ryabitsev
  9 siblings, 0 replies; 11+ messages in thread
From: Konstantin Ryabitsev @ 2020-10-02 23:29 UTC (permalink / raw)
  To: signatures

Handle several corner cases when trying to cherrypick from incomplete
series.

Reported-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
---
 b4/__init__.py | 12 ++++++++----
 b4/mbox.py     | 20 ++++++++++++--------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/b4/__init__.py b/b4/__init__.py
index f0d5d24..dc393a9 100644
--- a/b4/__init__.py
+++ b/b4/__init__.py
@@ -504,10 +504,14 @@ class LoreSeries:
         at = 1
         atterrors = list()
         for lmsg in self.patches[1:]:
-            if cherrypick is not None and at not in cherrypick:
-                at += 1
-                logger.debug('  skipped: [%s/%s] (not in cherrypick)', at, self.expected)
-                continue
+            if cherrypick is not None:
+                if at not in cherrypick:
+                    at += 1
+                    logger.debug('  skipped: [%s/%s] (not in cherrypick)', at, self.expected)
+                    continue
+                if lmsg is None:
+                    logger.critical('CRITICAL: [%s/%s] is missing, cannot cherrypick', at, self.expected)
+                    raise KeyError('Cherrypick not in series')
             if lmsg is not None:
                 if self.has_cover and covertrailers and self.patches[0].followup_trailers:
                     lmsg.followup_trailers += self.patches[0].followup_trailers
diff --git a/b4/mbox.py b/b4/mbox.py
index 754d0f8..2ec74c9 100644
--- a/b4/mbox.py
+++ b/b4/mbox.py
@@ -86,12 +86,12 @@ def mbox_to_am(mboxfile, cmdargs):
             at = 0
             for lmsg in lser.patches[1:]:
                 at += 1
-                if lmsg.msgid == msgid:
+                if lmsg and lmsg.msgid == msgid:
                     cherrypick = [at]
-                    cmdargs.cherrypick = '5'
+                    cmdargs.cherrypick = f'<{msgid}>'
                     break
             if not len(cherrypick):
-                logger.critical('Specified msgid is not present in the series, cannot cherry-pick')
+                logger.critical('Specified msgid is not present in the series, cannot cherrypick')
                 sys.exit(1)
         elif cmdargs.cherrypick.find('*') >= 0:
             # Globbing on subject
@@ -110,10 +110,14 @@ def mbox_to_am(mboxfile, cmdargs):
 
     logger.critical('Writing %s', am_filename)
     mbx = mailbox.mbox(am_filename)
-    am_mbx = lser.save_am_mbox(mbx, noaddtrailers=cmdargs.noaddtrailers,
-                               covertrailers=covertrailers, trailer_order=config['trailer-order'],
-                               addmysob=cmdargs.addmysob, addlink=cmdargs.addlink,
-                               linkmask=config['linkmask'], cherrypick=cherrypick)
+    try:
+        am_mbx = lser.save_am_mbox(mbx, noaddtrailers=cmdargs.noaddtrailers,
+                                   covertrailers=covertrailers, trailer_order=config['trailer-order'],
+                                   addmysob=cmdargs.addmysob, addlink=cmdargs.addlink,
+                                   linkmask=config['linkmask'], cherrypick=cherrypick)
+    except KeyError:
+        sys.exit(1)
+
     logger.info('---')
 
     if cherrypick is None:
@@ -153,7 +157,7 @@ def mbox_to_am(mboxfile, cmdargs):
                 logger.info('Prepared a fake commit range for 3-way merge (%.12s..%.12s)', rstart, rend)
 
     logger.critical('---')
-    if not lser.complete:
+    if not lser.complete and not cmdargs.cherrypick:
         logger.critical('WARNING: Thread incomplete!')
 
     if lser.has_cover and not cmdargs.nocover:
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-10-02 23:29 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-02 23:29 [PATCH 00/10] Entirely fake patch set Konstantin Ryabitsev
2020-10-02 23:29 ` [PATCH 01/10] Use shorter cache file names Konstantin Ryabitsev
2020-10-02 23:29 ` [PATCH 02/10] Preserve trailer order by default Konstantin Ryabitsev
2020-10-02 23:29 ` [PATCH 03/10] Don't force trailers into a set Konstantin Ryabitsev
2020-10-02 23:29 ` [PATCH 04/10] Initial go at supporting [extra trailer data] Konstantin Ryabitsev
2020-10-02 23:29 ` [PATCH 05/10] Tighten follow-up header parsing Konstantin Ryabitsev
2020-10-02 23:29 ` [PATCH 06/10] Use a more precise regex for email trailers Konstantin Ryabitsev
2020-10-02 23:29 ` [PATCH 07/10] Set charset in order to generate MIME headers Konstantin Ryabitsev
2020-10-02 23:29 ` [PATCH 08/10] Use bytes when dumping to stdout Konstantin Ryabitsev
2020-10-02 23:29 ` [PATCH 09/10] Don't crash when no valid patches are found Konstantin Ryabitsev
2020-10-02 23:29 ` [PATCH 10/10] Fix some cherry-picking corner cases Konstantin Ryabitsev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).