from email.parser import BytesParser from email.message import EmailMessage from email.policy import default from git import Repo our_last_id = '' #'<20180711142744.GN3593@linux.vnet.ibm.com>' repo = Repo('/Users/spaans/xsrc/lkml/lkml/git/6.git') commit = repo.commit("master") counter = 5000 froms = set() while True: tree = commit.tree blob = tree['m'] data = blob.data_stream.read() msg = BytesParser(policy=default).parsebytes(data) msgid = msg['Message-ID'] from_ = msg['From'] froms.add(from_) print(msgid) #import pdb; pdb.set_trace() if len(froms) > 1000: print("HAVE LOTS OF FRIENDS NOW") break if msgid == our_last_id: print("LADIES & GENTLEMEN, WE'VE GOT HIM") break parents = commit.parents if len(parents) != 1: print("WUH") break else: commit = commit.parents[0] #with open("output/%04d.eml" % counter, "bw") as f: # f.write(data) counter -= 1 import pprint pprint.pprint(froms)