import re, win32com.client srcs = {} dsts = {} pairs = {} # regular expression that scans for valid email addresses in the headers m_re = re.compile(r'[-A-Za-z0-9.,_]*@majid\.fm') # regular expression that strips out headers that can cause false positives strip_re = re.compile(r'(Message-Id:.*$|In-Reply-To:.*$|References:.*$)', re.IGNORECASE | re.MULTILINE) def dump_folder(folder): """Iterate recursively over the given folder and its subfolders""" print '-' * 72 print folder.Name print '-' * 72 for i in range(1, folder.Messages.Count + 1): try: # PR_SENDER_EMAIL_ADDRESS _from = folder.Messages[i].Fields[0x0C1F001F].Value # PR_TRANSPORT_MESSAGE_HEADERS headers = folder.Messages[i].Fields[0x7d001e].Value except: # ignore non-email objects like contacts or calendar entries continue stripped_headers = strip_re.sub('', headers) for _to in m_re.findall(stripped_headers): srcs[_from] = srcs.get(_from, 0) + 1 dsts[_to] = dsts.get(_to, 0) + 1 if (_from, _to) not in pairs: print _from, '->', _to pairs[_from, _to] = pairs.get((_from, _to), 0) + 1 # recurse for i in range(1, folder.Folders.Count + 1): dump_folder(folder.Folders[i]) # connect to Outlook via CDO cdo = win32com.client.Dispatch('MAPI.Session') cdo.Logon() # iterate over all the open PST files for i in range(1, cdo.InfoStores.Count + 1): store = cdo.InfoStores[i] root = store.RootFolder m = root.Messages store.ID print '#' * 72 print store.Name print '#' * 72 dump_folder(root) cdo.Logoff()