All of lore.kernel.org
 help / color / mirror / Atom feed
* darcs2git.py - convert darcs repository using gfi
@ 2007-02-11 23:56 Han-Wen Nienhuys
  2007-02-12  1:14 ` Shawn O. Pearce
  0 siblings, 1 reply; 5+ messages in thread
From: Han-Wen Nienhuys @ 2007-02-11 23:56 UTC (permalink / raw)
  To: git

[-- Attachment #1: Type: text/plain, Size: 1081 bytes --]

The python script attached is a try at providing a sane
conversion from Darcs to GIT.  It tries to map darcs conflict
resolutions onto git branch merges.

Regarding GFI, it's a breeze to work with; my compliments to its
author. My only gripe is the need to specify a branch for each commit.
Darcs uses changeset based storage. It doesn't really have branches,
but it does record divergent changes and merges of resulting
conflicts.  Hence, it's not clear which refs/head/BRANCH should be
used when creating a commit object.

I found it easiest to write each commit to a

  refs/head/darcs-tmp-COUNT

branch, use the reset command to specify at the end which commits are
tops of branches, and delete the temporary branches.

So, my feature request: please make the "commit" command always accept
a "from" command, and make the "refs" argument optional.  This will
cleanup my converter, and separate out two logical functions of the
gfi "commit" command: creating a commit object, and advancing the head
ref.


-- 
 Han-Wen Nienhuys - hanwen@xs4all.nl - http://www.xs4all.nl/~hanwen

[-- Attachment #2: darcs2git.py --]
[-- Type: text/x-python, Size: 12160 bytes --]

import os  
import sys
import time
import xml.dom.minidom
import re
import gdbm as dbmodule
import gzip
import optparse

################################################################
# globals

silent=False
mail_to_name_dict = {}
pending_patches = {}
used_tags = {}

################################################################
# utils

class PullConflict (Exception):
    pass
class CommandFailed (Exception):
    pass

def progress (s):
    sys.stderr.write (s + '\n')
    
def get_cli_options ():
    p = optparse.OptionParser ()

    p.usage='''darcs2git [OPTIONS] DARCS-REPO'''
    p.description='''Convert darcs repo to git.

This tool is a one shot conversion utility for Darcs repositories.  It
requires Git version that has git-fast-import.  It does not support
incremental updating.

This tool will import the patches in chronological order, and only creates
merges when a resolved conflict is detected.

TODO:

- correct time zone handling
- 

'''

    def update_map (option, opt, value, parser):
        for l in open (value).readlines ():
            (mail, name) = tuple (l.strip ().split ('='))
            mail_to_name_dict[mail] = name

    p.add_option ('-a', '--authors', action='callback',
                  callback=update_map,
                  type='string',
                  nargs=1,
                  help='read a text file, containing EMAIL=NAME lines')

    p.add_option ('-d', '--destination', action='store',
                  type='string',
                  default='',
                  dest='target_git_repo',
                  help='where to put the resulting Git repo.')

    p.add_option ('--verbose', action='store_true',
                  dest='verbose',
                  default=False, 
                  help='show commands as they are invoked')
    
    options, args = p.parse_args ()
    if not args:
        p.print_help ()
        sys.exit (2)
        
    global silent
    silent = not options.verbose

    if not options.target_git_repo:
        p = args[0]
        p = os.path.abspath (p)
        options.target_git_repo = os.path.basename (p).replace ('.darcs', '')
        options.target_git_repo += '.git' 
       
        
    return (options, args)

def read_pipe (cmd, ignore_errors=False):
    if not silent:
        progress ('pipe %s' % cmd)
    pipe = os.popen (cmd)

    val = pipe.read ()
    if pipe.close () and not ignore_errors:
        raise CommandFailed ("Pipe failed: %s" % cmd)
    
    return val

def system (c, ignore_error=0):
    if not silent:
        progress ( c)
    if os.system (c) and not ignore_error:
        raise CommandFailed ("Command failed: %s" % c)

def darcs_date_to_git (x):
    t = time.strptime (x, '%Y%m%d%H%M%S')
    return '%d' % int (time.mktime (t))

def darcs_timezone (x) :
    time.strptime (x, '%a %b %d %H:%M:%S %Z %Y')

    # todo
    return "+0100"

################################################################
# darcs

class DarcsConversionRepo:
    def __init__ (self, dir, patches):
        self.dir = dir
        self.patches = patches
        
    def clean (self):
        system ('rm -rf %s' % self.dir)
        
    def pull (self, patch):
        id = patch.attributes['hash']
        source_repo = patch.dir
        dir = self.dir
        system ('cd %(dir)s && darcs pull --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ())

    def go_from_to (self, from_patch, to_patch):

        """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise
        PullConflict if conflict is detected

        This uses the fishy technique of writing the inventory and
        constructing the pristine tree with 'darcs repair'

        It might be quicker and/or more correct to wind/rewind the
        repo with pull and unpull.  """

        
        dir = os.path.abspath (self.dir)
        system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo  %(dir)s'
                % locals ())
    
        source = to_patch.dir
        if from_patch:
            iv = open (dir + '/_darcs/inventory', 'w')
            for p in self.patches[:from_patch.number+1]:
                os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ()))
                iv.write (p.header ())
                
            iv.close ()

            progress ('Go to patch %d' % from_patch.number)
            system ('cd %(dir)s && darcs repair --quiet' % locals ())
            system ('rsync -a  %(dir)s/_darcs/pristine/ %(dir)s/' % locals ())
        try:
            self.pull (to_patch)
            success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals  ())
        except CommandFailed:
            raise PullConflict ()
    

        if not success:
            raise PullConflict ()

    def has_patch (self, p):
        id = p.attributes['hash']
        f = self.dir + '/_darcs/patches/' + id
        return os.path.exists (f)

    def pristine_tree (self):
        return self.dir + '/_darcs/pristine'
    
class DarcsPatch:
    def __init__ (self, xml, dir):
        self.xml = xml
        self.dir = dir
        self.number = -1
        self.attributes = {}
        for (nm, value) in xml.attributes.items():
            self.attributes[nm] = value

        # fixme: ugh attributes vs. methods.
        self.extract_author ()
        self.extract_message ()
        self.extract_time ()

    def filename (self):
        return self.dir + '/_darcs/patches/' + self.attributes['hash']

    def contents (self):
        f = gzip.open (self.filename ())
        return f.read ()

    def header (self):
        lines = self.contents ().split ('\n')

        name = lines[0]
        committer = lines[1] + '\n'
        committer = re.sub ('] {\n$', ']\n', committer)
        committer = re.sub ('] *\n$', ']\n', committer)
        comment = ''
        if not committer.endswith (']\n'):
            for l in lines[2:]:
                if l[0] == ']':
                    comment += ']\n'
                    break
                comment += l + '\n'

        header = name  + '\n' + committer 
        if comment:
            header += comment
   
        return header

    def extract_author (self):
        mail = self.attributes['author']
        name = ''
        m = re.search ("^(.*) <(.*)>$", mail)

        if m:
            name = m.group (1)
            mail = m.group (2)
        else:
            try:
                name = mail_to_name_dict[mail]
            except KeyError:
                name = mail.split ('@')[0]

        self.author_name = name
        self.author_mail = mail

    def extract_time (self):
        self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date'])

    def name (self):
        patch_name = '(no comment)'
        try:
            name_elt = self.xml.getElementsByTagName ('name')[0]
            patch_name = name_elt.childNodes[0].data
        except IndexError:
            pass
        return patch_name
    
    def extract_message (self):
        patch_name = self.name ()
        comment_elts = self.xml.getElementsByTagName ('comment')
        comment = ''
        if comment_elts:
            comment = comment_elts[0].childNodes[0].data

        if self.attributes['inverted'] == 'True':
            patch_name = 'UNDO: ' + patch_name

        self.message = '%s\n\n%s' % (patch_name, comment)

    def tag_name (self):
        patch_name = self.name ()
        if patch_name.startswith ("TAG "):
           tag = patch_name[4:]
           tag = re.sub (r'\s', '_', tag).strip ()
           tag = re.sub (r':', '_', tag).strip ()
           return tag
        return ''

def get_darcs_patches (darcs_repo):
    progress ('reading patches.')
    
    xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo)

    dom = xml.dom.minidom.parseString(xml_string)
    xmls = dom.documentElement.getElementsByTagName('patch')

    patches = [DarcsPatch (x, darcs_repo) for x in xmls]

    n = 0
    for p in patches:
        p.number = n
        n += 1

    return patches

################################################################
# GIT export

def export_tree (tree, gfi):
    tree = os.path.normpath (tree)
    gfi.write ('deleteall\n')
    for (root, dirs, files) in os.walk (tree):
        for f in files:
            rf = os.path.normpath (os.path.join (root, f))
            s = open (rf).read ()
            rf = rf.replace (tree + '/', '')
            
            gfi.write ('M 644 inline %s\n' % rf)
            gfi.write ('data %d\n%s\n' % (len (s), s))
    gfi.write ('\n')

    
def export_commit (repo, patch, last_patch, gfi):
    gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number)
    gfi.write ('mark :%d\n' % (patch.number + 1))
    gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
                                           patch.author_mail,
                                           patch.date))
    gfi.write ('data %d\n%s\n' % (len (patch.message), patch.message))
    if last_patch:
        gfi.write ('from :%d\n' % (last_patch.number + 1))
        if pending_patches.has_key (last_patch.number):
            del pending_patches[last_patch.number]
        
    for (n, p) in pending_patches.items ():
        if repo.has_patch (p):
            gfi.write ('merge :%d\n' % (n + 1))
            del pending_patches[n]
        
    pending_patches[patch.number] = patch
    export_tree (repo.pristine_tree (), gfi)

def export_pending (gfi):
    if len (pending_patches.items ()) == 1:
        gfi.write ('reset refs/heads/master\n')
        gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1))
        return
    
    
    for (n, p) in pending_patches.items ():
        gfi.write ('reset refs/heads/master%d\n' % n)
        gfi.write ('from :%d\n\n' % (n+1))

    patches = pending_patches.values()
    patch = patches[0]
    gfi.write ('commit refs/heads/master\n')
    gfi.write ('committer %s <%s> %s\n' % (patch.author_name,
                                           patch.author_mail,
                                           patch.date))
    msg = 'tie together'
    gfi.write ('data %d\n%s\n' % (len(msg), msg))
    gfi.write ('from :%d\n' % (patch.number + 1))
    for p in patches[1:]:
        gfi.write ('merge :%d\n' % (p.number + 1))
    gfi.write ('\n')
        
    
def export_tag (patch, gfi):
    gfi.write ('tag %s\n' % patch.tag_name ())
    gfi.write ('from :%d\n' % (patch.number + 1))
    gfi.write ('tagger %s <%s> %s\n' % (patch.author_name,
                                    patch.author_mail,
                                    patch.date))
    gfi.write ('data %d\n%s\n' % (len (patch.message),
                                  patch.message))
    
################################################################
# main.
def main ():

    (options, args) = get_cli_options ()
    
    darcs_repo = os.path.abspath (args[0])
    git_repo = os.path.abspath (options.target_git_repo)
    
    system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ())
    os.environ['GIT_DIR'] = git_repo

    gfi = os.popen ('git-fast-import', 'w')    #
    
    patches = get_darcs_patches (darcs_repo)

    conv_repo = DarcsConversionRepo ("darcs2git.tmpdarcs", patches)
    for p in patches:
        parent = p.number - 1

        last = None
        while 1:
            if parent >= 0:
                last = patches[parent]

            try:
                conv_repo.go_from_to (last, p)
                break
            except PullConflict:
                ## simplistic, may not be enough.
                progress ('conflict, going one back')
                parent -= 1

                if parent < 0:
                    raise Exception('urg')

        progress ('Export %d -> %d (total %d)' % (parent,
                                                 p.number, len (patches)))
        export_commit (conv_repo, p, last, gfi)
        if p.tag_name ():
            export_tag (p, gfi)
    export_pending (gfi)
    gfi.close ()

    system ('rm %(git_repo)s/refs/heads/darcstmp*' % locals ())
    conv_repo.clean ()
    
main ()

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2007-02-19  8:25 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-02-11 23:56 darcs2git.py - convert darcs repository using gfi Han-Wen Nienhuys
2007-02-12  1:14 ` Shawn O. Pearce
2007-02-13 22:42   ` Han-Wen Nienhuys
2007-02-18 12:45   ` git-fast-export ? Han-Wen Nienhuys
2007-02-19  8:25     ` Shawn O. Pearce

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.