Working version of imap backup.
Replaced imaplib with imapclient for a friendlier interface.
This commit is contained in:
parent
29efb943f6
commit
a0eabee28a
4 changed files with 126 additions and 187 deletions
1
Pipfile
1
Pipfile
|
@ -7,6 +7,7 @@ name = "pypi"
|
|||
pygit2 = "*"
|
||||
requests = "*"
|
||||
pyyaml = "*"
|
||||
imapclient = "*"
|
||||
|
||||
[dev-packages]
|
||||
|
||||
|
|
41
Pipfile.lock
generated
41
Pipfile.lock
generated
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "b0181521bb016cf1b8bc7de817cc2f903eefd390e75c26e9d19618233e6548fd"
|
||||
"sha256": "578af5999e85ea37262f2fc11f64d6892589b6860b851696c8fd6817f88f69d7"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
|
@ -77,6 +77,14 @@
|
|||
],
|
||||
"version": "==2.9"
|
||||
},
|
||||
"imapclient": {
|
||||
"hashes": [
|
||||
"sha256:3eeb97b9aa8faab0caa5024d74bfde59408fbd542781246f6960873c7bf0dd01",
|
||||
"sha256:60ba79758cc9f13ec910d7a3df9acaaf2bb6c458720d9a02ec33a41352fd1b99"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.1.0"
|
||||
},
|
||||
"pycparser": {
|
||||
"hashes": [
|
||||
"sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
|
||||
|
@ -105,20 +113,20 @@
|
|||
},
|
||||
"pyyaml": {
|
||||
"hashes": [
|
||||
"sha256:059b2ee3194d718896c0ad077dd8c043e5e909d9180f387ce42012662a4946d6",
|
||||
"sha256:1cf708e2ac57f3aabc87405f04b86354f66799c8e62c28c5fc5f88b5521b2dbf",
|
||||
"sha256:24521fa2890642614558b492b473bee0ac1f8057a7263156b02e8b14c88ce6f5",
|
||||
"sha256:4fee71aa5bc6ed9d5f116327c04273e25ae31a3020386916905767ec4fc5317e",
|
||||
"sha256:70024e02197337533eef7b85b068212420f950319cc8c580261963aefc75f811",
|
||||
"sha256:74782fbd4d4f87ff04159e986886931456a1894c61229be9eaf4de6f6e44b99e",
|
||||
"sha256:940532b111b1952befd7db542c370887a8611660d2b9becff75d39355303d82d",
|
||||
"sha256:cb1f2f5e426dc9f07a7681419fe39cee823bb74f723f36f70399123f439e9b20",
|
||||
"sha256:dbbb2379c19ed6042e8f11f2a2c66d39cceb8aeace421bfc29d085d93eda3689",
|
||||
"sha256:e3a057b7a64f1222b56e47bcff5e4b94c4f61faac04c7c4ecb1985e18caa3994",
|
||||
"sha256:e9f45bd5b92c7974e59bcd2dcc8631a6b6cc380a904725fce7bc08872e691615"
|
||||
"sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
|
||||
"sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76",
|
||||
"sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
|
||||
"sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648",
|
||||
"sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
|
||||
"sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f",
|
||||
"sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2",
|
||||
"sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
|
||||
"sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
|
||||
"sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
|
||||
"sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==5.3"
|
||||
"version": "==5.3.1"
|
||||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
|
@ -128,6 +136,13 @@
|
|||
"index": "pypi",
|
||||
"version": "==2.23.0"
|
||||
},
|
||||
"six": {
|
||||
"hashes": [
|
||||
"sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a",
|
||||
"sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"
|
||||
],
|
||||
"version": "==1.14.0"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:2f3db8b19923a873b3e5256dc9c2dedfa883e33d87c690d9c7913e1f40673cdc",
|
||||
|
|
|
@ -24,7 +24,7 @@ class Archivist():
|
|||
|
||||
elif source == "imap":
|
||||
log.info("Backing up IMAP")
|
||||
imap.backup_imap(c.c["imap_server"], c.c["imap_port"], c.c["imap_user"],
|
||||
imap.backup_imap(c.c["imap_server"], c.c["imap_user"],
|
||||
c.c["imap_password"], c.imapdir())
|
||||
|
||||
elif source == "all":
|
||||
|
@ -40,7 +40,7 @@ class Archivist():
|
|||
|
||||
if c.c["imap_enabled"]:
|
||||
log.info("Backing up IMAP")
|
||||
imap.backup_imap(c.c["imap_server"], c.c["imap_port"], c.c["imap_user"],
|
||||
imap.backup_imap(c.c["imap_server"], c.c["imap_user"],
|
||||
c.c["imap_password"], c.imapdir())
|
||||
|
||||
else:
|
||||
|
|
|
@ -4,79 +4,21 @@
|
|||
# https://github.com/rcarmo/imapbackup/blob/master/imapbackup.py
|
||||
###
|
||||
|
||||
import imaplib, logging, re, hashlib, email
|
||||
import logging, email, os
|
||||
from pathlib import Path
|
||||
from imapclient import IMAPClient
|
||||
from archivist.lib import Config
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
MSGID_RE = re.compile("^Message\-Id\: (.+)", re.IGNORECASE + re.MULTILINE)
|
||||
BLANKS_RE = re.compile(r'\s+', re.MULTILINE)
|
||||
|
||||
def imap_connect(imap_server, imap_port, imap_user, imap_password):
|
||||
log.info("Connecting to "+imap_server+" as "+imap_user)
|
||||
server = imaplib.IMAP4_SSL(imap_server, imap_port)
|
||||
server.login(imap_user, imap_password)
|
||||
return server
|
||||
|
||||
def parse_paren_list(row):
|
||||
"""Parses the nested list of attributes at the start of a LIST response"""
|
||||
# eat starting paren
|
||||
assert(row[0] == '(')
|
||||
row = row[1:]
|
||||
|
||||
result = []
|
||||
|
||||
# NOTE: RFC3501 doesn't fully define the format of name attributes
|
||||
name_attrib_re = re.compile("^\s*(\\\\[a-zA-Z0-9_]+)\s*")
|
||||
|
||||
# eat name attributes until ending paren
|
||||
while row[0] != ')':
|
||||
# recurse
|
||||
if row[0] == '(':
|
||||
paren_list, row = parse_paren_list(row)
|
||||
result.append(paren_list)
|
||||
# consume name attribute
|
||||
else:
|
||||
match = name_attrib_re.search(row)
|
||||
assert(match != None)
|
||||
name_attrib = row[match.start():match.end()]
|
||||
row = row[match.end():]
|
||||
#print "MATCHED '%s' '%s'" % (name_attrib, row)
|
||||
name_attrib = name_attrib.strip()
|
||||
result.append(name_attrib)
|
||||
|
||||
# eat ending paren
|
||||
assert(')' == row[0])
|
||||
row = row[1:]
|
||||
|
||||
# done!
|
||||
return result, row
|
||||
|
||||
def parse_string_list(row):
|
||||
"""Parses the quoted and unquoted strings at the end of a LIST response"""
|
||||
slist = re.compile('\s*(?:"([^"]+)")\s*|\s*(\S+)\s*').split(row)
|
||||
return [s for s in slist if s]
|
||||
|
||||
def parse_list(row):
|
||||
"""Prases response of LIST command into a list"""
|
||||
row = row.strip()
|
||||
paren_list, row = parse_paren_list(row)
|
||||
string_list = parse_string_list(row)
|
||||
assert(len(string_list) == 2)
|
||||
return [paren_list] + string_list
|
||||
|
||||
def get_remote_folders(server):
|
||||
def get_remote_folders(client):
|
||||
""" Gets and parses a list of folders from the server """
|
||||
log.info("Getting remote folders")
|
||||
typ, data = server.list()
|
||||
|
||||
l = client.list_folders()
|
||||
folders = []
|
||||
|
||||
for row in data:
|
||||
l = parse_list(row.decode('UTF-8'))
|
||||
folders.append(l[-1])
|
||||
|
||||
for folder in l:
|
||||
folders.append(str(folder[2]))
|
||||
return folders
|
||||
|
||||
def create_folder_structure(localroot, folders):
|
||||
|
@ -86,7 +28,7 @@ def create_folder_structure(localroot, folders):
|
|||
else:
|
||||
log.info("Updating local folder structure")
|
||||
for f in folders:
|
||||
lf = localroot / f
|
||||
lf = localroot / f
|
||||
if not lf.exists():
|
||||
log.info("Creating "+str(lf))
|
||||
lf.mkdir(parents=True)
|
||||
|
@ -97,117 +39,98 @@ def create_folder_structure(localroot, folders):
|
|||
tmp = lf / "tmp"
|
||||
tmp.mkdir()
|
||||
|
||||
|
||||
|
||||
def scan_remote_folder(server, folder):
|
||||
def scan_remote_folder(client, folder):
|
||||
""" Scans a remote folder for messages and retrieves message IDs in batches"""
|
||||
### ToDo: Cache this data and only pull new Messages from server.
|
||||
folder = '"' + folder + '"'
|
||||
messages = {}
|
||||
log.info("Scanning "+folder)
|
||||
typ, data = server.select(folder, readonly=True)
|
||||
c = 0
|
||||
if "OK" != typ:
|
||||
log.error("Could not retrieve messages for the folder: "+folder)
|
||||
num_messages = int(data[0])
|
||||
if num_messages > 0:
|
||||
log.info("Messages in folder "+folder+": "+str(num_messages))
|
||||
jumpsize = 500 # how many messages to pull in one transaction
|
||||
jumps = (num_messages // jumpsize) + 1 # adding one to make sure we get into the loop
|
||||
mod_messages = num_messages % jumpsize
|
||||
|
||||
for num in range(0, jumps):
|
||||
""" Pull messages in batches to move faster than single transactions per message."""
|
||||
log.info("Pulling batch#: "+str(num))
|
||||
start = str(num*jumpsize)
|
||||
if num == (jumps-1):
|
||||
end = str(num*jumpsize + mod_messages)
|
||||
else:
|
||||
end = str(num*jumpsize + jumpsize - 1)
|
||||
message_set = start + ":" + end
|
||||
log.info("Messages in this batch: " + message_set)
|
||||
typ, data = server.fetch(message_set, '(BODY.PEEK[HEADER.FIELDS (MESSAGE-ID)])')
|
||||
if 'OK' != typ:
|
||||
log.error("Could not retrieve messages " + message_set + " from " + folder)
|
||||
for i in range(0, len(data), 2):
|
||||
msg = data[i][1]
|
||||
msg_str = email.message_from_string(msg.decode('UTF-8'))
|
||||
msg_id = msg_str.get('Message-ID')
|
||||
if msg_id not in messages.keys():
|
||||
messages[msg_id] = num
|
||||
c += 1
|
||||
#try:
|
||||
# for d in data:
|
||||
# if isinstance(d, tuple):
|
||||
# header = d[1].strip()
|
||||
# header = header.decode('UTF-8')
|
||||
# header = BLANKS_RE.sub(' ', header)
|
||||
# msg_id = MSGID_RE.match(header).group(1)
|
||||
|
||||
# if msg_id not in messages.keys():
|
||||
# messages[msg_id] = num
|
||||
# c += 1
|
||||
#except (AttributeError):
|
||||
# """ If we break down in the batch processing, process one by one."""
|
||||
# log.warning("Bad message in batch "+str(num)+" of folder "+folder+". Running one by one...")
|
||||
# for n in range(int(start), int(end)):
|
||||
# typ, data = server.fetch(str(n), '(BODY.PEEK[HEADER.FIELDS (MESSAGE-ID)])')
|
||||
# if 'OK' != typ:
|
||||
# log.error("Could not retrieve message " + str(n) + " from " + folder)
|
||||
# try:
|
||||
# header = data[0][1].strip()
|
||||
# header = header.decode('UTF-8')
|
||||
# header = BLANKS_RE.sub(' ', header)
|
||||
# msg_id = MSGID_RE.match(header).group(1)
|
||||
# except (AttributeError):
|
||||
# """ If the Message-ID cannot be processed normally, generate one. """
|
||||
# log.warning("Generating Message-ID for "+str(n)+" in folder "+folder)
|
||||
# typ, data = server.fetch(str(n), '(BODY.PEEK[HEADER.FIELDS (FROM TO CC DATE SUBJECT)])')
|
||||
# if "OK" != typ:
|
||||
# log.error("Could not retrieve message " + str(n) + " from " + folder)
|
||||
# header = data[0][1].strip()
|
||||
# header = str(header).replace('\r\n', '\t')
|
||||
# msg_id = '<' + hashlib.sha1(header.encode('UTF-8')).hexdigest() + '>'
|
||||
# if msg_id not in messages.keys():
|
||||
# messages[msg_id] = num
|
||||
# c += 1
|
||||
|
||||
|
||||
|
||||
|
||||
else:
|
||||
log.info("No messages in folder "+folder+". Skipping ahead.")
|
||||
|
||||
log.info("Parsed " + str(c) + " of " + str(num_messages) + " in " + str(folder))
|
||||
#return messages
|
||||
client.select_folder(folder, readonly=True)
|
||||
uids = client.search()
|
||||
if len(uids) > 0:
|
||||
UID_newest = max(uids)
|
||||
else:
|
||||
UID_newest = 0
|
||||
UID_validity = client.folder_status(folder, what=u'UIDVALIDITY')[b'UIDVALIDITY']
|
||||
return UID_validity, UID_newest
|
||||
|
||||
def scan_local_folder(localroot, folder):
|
||||
print("Not implemented")
|
||||
""" Get the last UID stored in the folder """
|
||||
UID_file = localroot / folder / '.uid'
|
||||
if UID_file.exists():
|
||||
with open(UID_file, 'r') as f:
|
||||
fstr = f.read()
|
||||
ftup = fstr.split()
|
||||
return int(ftup[0]), int(ftup[1])
|
||||
else:
|
||||
return -1, 0
|
||||
|
||||
def download_messages(server, new_messages):
|
||||
print("Not implemented")
|
||||
|
||||
|
||||
|
||||
def backup_imap(imap_server, imap_port, imap_user, imap_password, imap_localroot):
|
||||
server = imap_connect(imap_server, imap_port, imap_user, imap_password)
|
||||
|
||||
folders = get_remote_folders(server)
|
||||
|
||||
create_folder_structure(imap_localroot, folders)
|
||||
|
||||
for folder in folders:
|
||||
remote_messages = scan_remote_folder(server, folder)
|
||||
# current_messages = scan_local_folder(imap_localroot, folder)
|
||||
#
|
||||
# new_messages = {}
|
||||
#
|
||||
# for msg_id in remote_messages:
|
||||
# if msg_id not in current_messages:
|
||||
# new_messages[msg_id] = remote_messages[msg_id]
|
||||
#
|
||||
# download_messages(server, new_messages)
|
||||
|
||||
server.logout()
|
||||
def get_messages(client, folder, uid_local, uid_newest):
|
||||
""" Get all messages in a folder between two UIDs """
|
||||
client.select_folder(folder, readonly=True)
|
||||
searchstr = 'UID '+str(uid_local) + ":" + str(uid_newest)
|
||||
messages = client.search(searchstr)
|
||||
return messages
|
||||
|
||||
def store_email(client, localroot, folder, uid_validity, uids):
|
||||
""" Store an email in the correct folder"""
|
||||
response = client.fetch(uids, 'RFC822')
|
||||
|
||||
for uid, data in response.items():
|
||||
filename = str(uid_validity) + '-' + str(uid).zfill(9)
|
||||
emailfile = localroot / folder / "cur" / filename
|
||||
|
||||
with open(emailfile, 'wb') as f:
|
||||
f.write(data[b'RFC822'])
|
||||
|
||||
return True
|
||||
|
||||
def update_folder_uid(localroot, folder, uid_validity, uid):
|
||||
""" Update the folder with the most recently stored UID """
|
||||
UID_file = localroot / folder / '.uid'
|
||||
|
||||
with open(UID_file, 'w') as f:
|
||||
fstr = str(uid_validity) + " " + str(uid)
|
||||
f.write(fstr)
|
||||
|
||||
validity_check, check = scan_local_folder(localroot, folder)
|
||||
|
||||
if validity_check == uid_validity and check == uid:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def backup_imap(imap_server, imap_user, imap_password, imap_localroot):
|
||||
|
||||
with IMAPClient(host=imap_server) as client:
|
||||
client.login(imap_user, imap_password)
|
||||
|
||||
folders = get_remote_folders(client)
|
||||
create_folder_structure(imap_localroot, folders)
|
||||
|
||||
for folder in folders:
|
||||
uid_local_validity, uid_local = scan_local_folder(imap_localroot, folder)
|
||||
uid_remote_validity, uid_newest = scan_remote_folder(client, folder)
|
||||
|
||||
# if the folder does not have a recorded validity, accept the server's
|
||||
if 0 > uid_local_validity:
|
||||
uid_local_validity = uid_remote_validity
|
||||
|
||||
# Check to make sure the server has not reset UIDs
|
||||
if uid_local_validity == uid_remote_validity:
|
||||
messages = get_messages(client, folder, uid_local, uid_newest)
|
||||
log.info("Downloading "+str(len(messages))+" to "+folder)
|
||||
|
||||
for uid in messages:
|
||||
if store_email(client, imap_localroot, folder, uid_remote_validity, uid):
|
||||
if not update_folder_uid(imap_localroot, folder, uid_remote_validity, uid):
|
||||
log.error("UID " + str(uid) + " failed to update in " + folder)
|
||||
else:
|
||||
log.error("Message " + str(uid) + " failed to save in " + folder)
|
||||
|
||||
else:
|
||||
log.error("The server has reset UID validity, for folder " + folder + ". Backup must be repaired manually")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue