#!/usr/bin/env python import sys, os, time, getopt, getpass, socket, struct, mmap, stat, email import shutil, pprint verbose = 0 imapdconf = '/etc/imapd.conf' maildir = os.path.expanduser('~/Maildir.cyrus') mailbox = getpass.getuser() configdir = None partition = None seen = None def usage(): print """%s [-h] [-v]* [--maildir ] [--imapdconf ] -h: produce this help text -v: increase verbose level (can be repeated) --maildir : use as the Maildir target currently: %s --imapdconf : use as the Cyrus imapd.conf currently: %s --user : process mailbox name currently: %s """ % (os.path.basename(sys.argv[0]), maildir, imapdconf, mailbox) sys.exit(0) counter = 0 # Cyrus databases # deliver.db berkeley # annotations.db skiplist # user/m/mailbox.seen skiplist # user/m/mailbox.sub flat rectypes = { 1: 'INORDER', 2: 'ADD', 4: 'DELETE', 255: 'COMMIT', 257: 'DUMMY', } class Skiplist(dict): """Read an entire Cyrus skiplist database into memory""" def __init__(self, name): global rectypes size = os.stat(name)[stat.ST_SIZE] f = open(name) # for the skiplist db format, see Cyrus imapd's source file: # src/cyrusdb_skiplist.c # and also this Python implementation: # http://oss.netfarm.it/download/skiplist.py assert f.read(20) == "\241\002\213\015skiplist file\0\0\0" major, minor, maxlevel, curlevel, listsize, log_start, last_recovery = \ struct.unpack('!7I', f.read(28)) # skiplist DB format version 1 assert major == 1 while True: rectype = f.read(4) if not rectype: break # EOF rectype = struct.unpack('!I', rectype)[0] rectype = rectypes[rectype] if rectype == 'COMMIT': continue if rectype == 'DELETE': delptr = struct.unpack('!I', f.read(4))[0] continue keysize = struct.unpack('!I', f.read(4))[0] key = f.read(keysize) f.read(((keysize + 3) & 0xFFFFFFFC) - keysize) datasize = struct.unpack('!I', f.read(4))[0] data = f.read(datasize) f.read(((datasize + 3) & 0xFFFFFFFC) - datasize) self[key] = data skiplist = [] while True: skipptr = struct.unpack('!I', f.read(4))[0] if skipptr == 0xFFFFFFFFL: break #skiplist.append(skipptr) f.close() header_magic = """\241\002\213\015Cyrus mailbox header "The best thing about this system was that it had lots of goals." \t--Jim Morris on Andrew """ rand_base = '%%s/%%s/cur/%%d.%dFMIc%%d.%s:2,%%s' % \ (os.getpid(), socket.gethostname()) def onelevel(dirpath, dirnames, filenames): global counter relpath = dirpath[len(partition):].replace('/', '.') if verbose: print relpath if relpath not in ['', '.']: os.mkdir(maildir + '/' + relpath) os.mkdir(maildir + '/' + relpath + '/tmp') os.mkdir(maildir + '/' + relpath + '/new') os.mkdir(maildir + '/' + relpath + '/cur') # find the uniqueid for the folder # see mailbox_read_header in Cyrus src/mailbox.c assert 'cyrus.header' in filenames or not filenames if filenames: f = open(dirpath + '/cyrus.header') assert f.read(len(header_magic)) == header_magic line = f.readline() f.close() uniqueid = line.split('\t')[1].strip() # look up the uniqueid in the per-user seen DB # to find out which messages have been seen seenset = decode_seen(uniqueid) for f in filenames: if f[-1] == '.': try: msgid = int(f[:-1]) except ValueError: if verbose > 1: print 'skipping', f continue else: if verbose > 1: print 'skipping', f continue # generate the maildir filename counter += 1 # see http://cr.yp.to/proto/maildir.html for flag meanings if msgid in seenset: flags = 'S' else: flags = '' nf = rand_base % (maildir, relpath, int(time.time()), counter, flags) if verbose > 2: print dirpath + '/' + f, '->', nf timestamp = email_received(dirpath + '/' + f) shutil.copy(dirpath + '/' + f, nf) os.utime(nf, (timestamp, timestamp)) def decode_seen(uniqueid): seenset = set() try: seenlist = seen[uniqueid].split(' ', 4) # see seen_readit in Cyrus src/seen_db.c assert seenlist[0] == '1' # version if seenlist[-1]: if verbose: print 'seen', seenlist[-1] #seenset for interval in seenlist[-1].split(','): if ':' in interval: # a message ID range begin, end = interval.split(':') seenset.update(range(int(begin), int(end) + 1)) else: # individual message ID seenset.add(interval) return seenset except KeyError: # this folder probably has never been seen # XXX should probably log a warning return set() def email_received(filename): """find the timestamp to use by trying in order: 1) first of the Received: headers 2) Date: header 3) Unix timestamp (ctime) of the Cyrus mail file """ m = email.message_from_file(open(filename)) received = m.get_all('received') if received: received = received[0].split(';')[-1].strip() ts = time.mktime(email.Utils.parsedate(received)) else: if 'from' not in m or 'date' not in m: print 'WARNING: could not determine received date for:' pprint.pprint(m._headers) print '-'*72 if 'date' in m: ts = time.mktime(email.Utils.parsedate(m['date'])) else: ts = os.stat(filename)[stat.ST_CTIME] return ts def migrate(): global configdir, partition, seen try: os.makedirs(maildir) print 'created maildir target', maildir except os.error: pass # get config information from imapd.conf f = open(imapdconf) for line in f: line = line.strip() if not line or line.startswith('#'): continue option, value = line.split(':', 2) option = option.strip() value = value.strip() if option == 'configdirectory': configdir = value if option == 'partition-default': partition = value assert configdir and partition if verbose: print 'configdir', configdir, 'partition', partition # process only one mailbox partition += '/user/' + mailbox # walk the walk # mbox name -> mbox type (4 bytes), partition ID, space, ACLs # mboxes = Skiplist('%s/mailboxes.db' % configdir) # mailbox UUID -> read list seen = Skiplist('%s/user/%s/%s.seen' % (configdir, mailbox[0], mailbox)) for level in os.walk(partition): onelevel(*level) if __name__ == '__main__': opts, args = getopt.getopt( sys.argv[1:], 'vh', ['maildir=', 'imapdconf=', 'user=']) for opt, arg in opts: if opt == '-v': verbose += 1 if opt == '-h': usage() if opt == '--imapdconf': imapdconf = arg if opt == '--maildir': maildir = arg if opt == '--user': mailbox = arg migrate()