Working version of imap backup.

Replaced imaplib with imapclient for a friendlier interface.
This commit is contained in:
Andrew Davidson 2020-04-02 16:29:52 -04:00
parent 29efb943f6
commit a0eabee28a
Signed by: amd
GPG key ID: 17AF8F2A49CF25C6
4 changed files with 126 additions and 187 deletions

View file

@ -7,6 +7,7 @@ name = "pypi"
pygit2 = "*"
requests = "*"
pyyaml = "*"
imapclient = "*"
[dev-packages]

41
Pipfile.lock generated
View file

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "b0181521bb016cf1b8bc7de817cc2f903eefd390e75c26e9d19618233e6548fd"
"sha256": "578af5999e85ea37262f2fc11f64d6892589b6860b851696c8fd6817f88f69d7"
},
"pipfile-spec": 6,
"requires": {
@ -77,6 +77,14 @@
],
"version": "==2.9"
},
"imapclient": {
"hashes": [
"sha256:3eeb97b9aa8faab0caa5024d74bfde59408fbd542781246f6960873c7bf0dd01",
"sha256:60ba79758cc9f13ec910d7a3df9acaaf2bb6c458720d9a02ec33a41352fd1b99"
],
"index": "pypi",
"version": "==2.1.0"
},
"pycparser": {
"hashes": [
"sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0",
@ -105,20 +113,20 @@
},
"pyyaml": {
"hashes": [
"sha256:059b2ee3194d718896c0ad077dd8c043e5e909d9180f387ce42012662a4946d6",
"sha256:1cf708e2ac57f3aabc87405f04b86354f66799c8e62c28c5fc5f88b5521b2dbf",
"sha256:24521fa2890642614558b492b473bee0ac1f8057a7263156b02e8b14c88ce6f5",
"sha256:4fee71aa5bc6ed9d5f116327c04273e25ae31a3020386916905767ec4fc5317e",
"sha256:70024e02197337533eef7b85b068212420f950319cc8c580261963aefc75f811",
"sha256:74782fbd4d4f87ff04159e986886931456a1894c61229be9eaf4de6f6e44b99e",
"sha256:940532b111b1952befd7db542c370887a8611660d2b9becff75d39355303d82d",
"sha256:cb1f2f5e426dc9f07a7681419fe39cee823bb74f723f36f70399123f439e9b20",
"sha256:dbbb2379c19ed6042e8f11f2a2c66d39cceb8aeace421bfc29d085d93eda3689",
"sha256:e3a057b7a64f1222b56e47bcff5e4b94c4f61faac04c7c4ecb1985e18caa3994",
"sha256:e9f45bd5b92c7974e59bcd2dcc8631a6b6cc380a904725fce7bc08872e691615"
"sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
"sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76",
"sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
"sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648",
"sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
"sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f",
"sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2",
"sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
"sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
"sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
"sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"
],
"index": "pypi",
"version": "==5.3"
"version": "==5.3.1"
},
"requests": {
"hashes": [
@ -128,6 +136,13 @@
"index": "pypi",
"version": "==2.23.0"
},
"six": {
"hashes": [
"sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a",
"sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"
],
"version": "==1.14.0"
},
"urllib3": {
"hashes": [
"sha256:2f3db8b19923a873b3e5256dc9c2dedfa883e33d87c690d9c7913e1f40673cdc",

View file

@ -24,7 +24,7 @@ class Archivist():
elif source == "imap":
log.info("Backing up IMAP")
imap.backup_imap(c.c["imap_server"], c.c["imap_port"], c.c["imap_user"],
imap.backup_imap(c.c["imap_server"], c.c["imap_user"],
c.c["imap_password"], c.imapdir())
elif source == "all":
@ -40,7 +40,7 @@ class Archivist():
if c.c["imap_enabled"]:
log.info("Backing up IMAP")
imap.backup_imap(c.c["imap_server"], c.c["imap_port"], c.c["imap_user"],
imap.backup_imap(c.c["imap_server"], c.c["imap_user"],
c.c["imap_password"], c.imapdir())
else:

View file

@ -4,79 +4,21 @@
# https://github.com/rcarmo/imapbackup/blob/master/imapbackup.py
###
import imaplib, logging, re, hashlib, email
import logging, email, os
from pathlib import Path
from imapclient import IMAPClient
from archivist.lib import Config
log = logging.getLogger(__name__)
MSGID_RE = re.compile("^Message\-Id\: (.+)", re.IGNORECASE + re.MULTILINE)
BLANKS_RE = re.compile(r'\s+', re.MULTILINE)
def imap_connect(imap_server, imap_port, imap_user, imap_password):
log.info("Connecting to "+imap_server+" as "+imap_user)
server = imaplib.IMAP4_SSL(imap_server, imap_port)
server.login(imap_user, imap_password)
return server
def parse_paren_list(row):
"""Parses the nested list of attributes at the start of a LIST response"""
# eat starting paren
assert(row[0] == '(')
row = row[1:]
result = []
# NOTE: RFC3501 doesn't fully define the format of name attributes
name_attrib_re = re.compile("^\s*(\\\\[a-zA-Z0-9_]+)\s*")
# eat name attributes until ending paren
while row[0] != ')':
# recurse
if row[0] == '(':
paren_list, row = parse_paren_list(row)
result.append(paren_list)
# consume name attribute
else:
match = name_attrib_re.search(row)
assert(match != None)
name_attrib = row[match.start():match.end()]
row = row[match.end():]
#print "MATCHED '%s' '%s'" % (name_attrib, row)
name_attrib = name_attrib.strip()
result.append(name_attrib)
# eat ending paren
assert(')' == row[0])
row = row[1:]
# done!
return result, row
def parse_string_list(row):
"""Parses the quoted and unquoted strings at the end of a LIST response"""
slist = re.compile('\s*(?:"([^"]+)")\s*|\s*(\S+)\s*').split(row)
return [s for s in slist if s]
def parse_list(row):
"""Prases response of LIST command into a list"""
row = row.strip()
paren_list, row = parse_paren_list(row)
string_list = parse_string_list(row)
assert(len(string_list) == 2)
return [paren_list] + string_list
def get_remote_folders(server):
def get_remote_folders(client):
""" Gets and parses a list of folders from the server """
log.info("Getting remote folders")
typ, data = server.list()
l = client.list_folders()
folders = []
for row in data:
l = parse_list(row.decode('UTF-8'))
folders.append(l[-1])
for folder in l:
folders.append(str(folder[2]))
return folders
def create_folder_structure(localroot, folders):
@ -86,7 +28,7 @@ def create_folder_structure(localroot, folders):
else:
log.info("Updating local folder structure")
for f in folders:
lf = localroot / f
lf = localroot / f
if not lf.exists():
log.info("Creating "+str(lf))
lf.mkdir(parents=True)
@ -97,117 +39,98 @@ def create_folder_structure(localroot, folders):
tmp = lf / "tmp"
tmp.mkdir()
def scan_remote_folder(server, folder):
def scan_remote_folder(client, folder):
""" Scans a remote folder for messages and retrieves message IDs in batches"""
### ToDo: Cache this data and only pull new Messages from server.
folder = '"' + folder + '"'
messages = {}
log.info("Scanning "+folder)
typ, data = server.select(folder, readonly=True)
c = 0
if "OK" != typ:
log.error("Could not retrieve messages for the folder: "+folder)
num_messages = int(data[0])
if num_messages > 0:
log.info("Messages in folder "+folder+": "+str(num_messages))
jumpsize = 500 # how many messages to pull in one transaction
jumps = (num_messages // jumpsize) + 1 # adding one to make sure we get into the loop
mod_messages = num_messages % jumpsize
for num in range(0, jumps):
""" Pull messages in batches to move faster than single transactions per message."""
log.info("Pulling batch#: "+str(num))
start = str(num*jumpsize)
if num == (jumps-1):
end = str(num*jumpsize + mod_messages)
else:
end = str(num*jumpsize + jumpsize - 1)
message_set = start + ":" + end
log.info("Messages in this batch: " + message_set)
typ, data = server.fetch(message_set, '(BODY.PEEK[HEADER.FIELDS (MESSAGE-ID)])')
if 'OK' != typ:
log.error("Could not retrieve messages " + message_set + " from " + folder)
for i in range(0, len(data), 2):
msg = data[i][1]
msg_str = email.message_from_string(msg.decode('UTF-8'))
msg_id = msg_str.get('Message-ID')
if msg_id not in messages.keys():
messages[msg_id] = num
c += 1
#try:
# for d in data:
# if isinstance(d, tuple):
# header = d[1].strip()
# header = header.decode('UTF-8')
# header = BLANKS_RE.sub(' ', header)
# msg_id = MSGID_RE.match(header).group(1)
# if msg_id not in messages.keys():
# messages[msg_id] = num
# c += 1
#except (AttributeError):
# """ If we break down in the batch processing, process one by one."""
# log.warning("Bad message in batch "+str(num)+" of folder "+folder+". Running one by one...")
# for n in range(int(start), int(end)):
# typ, data = server.fetch(str(n), '(BODY.PEEK[HEADER.FIELDS (MESSAGE-ID)])')
# if 'OK' != typ:
# log.error("Could not retrieve message " + str(n) + " from " + folder)
# try:
# header = data[0][1].strip()
# header = header.decode('UTF-8')
# header = BLANKS_RE.sub(' ', header)
# msg_id = MSGID_RE.match(header).group(1)
# except (AttributeError):
# """ If the Message-ID cannot be processed normally, generate one. """
# log.warning("Generating Message-ID for "+str(n)+" in folder "+folder)
# typ, data = server.fetch(str(n), '(BODY.PEEK[HEADER.FIELDS (FROM TO CC DATE SUBJECT)])')
# if "OK" != typ:
# log.error("Could not retrieve message " + str(n) + " from " + folder)
# header = data[0][1].strip()
# header = str(header).replace('\r\n', '\t')
# msg_id = '<' + hashlib.sha1(header.encode('UTF-8')).hexdigest() + '>'
# if msg_id not in messages.keys():
# messages[msg_id] = num
# c += 1
else:
log.info("No messages in folder "+folder+". Skipping ahead.")
log.info("Parsed " + str(c) + " of " + str(num_messages) + " in " + str(folder))
#return messages
client.select_folder(folder, readonly=True)
uids = client.search()
if len(uids) > 0:
UID_newest = max(uids)
else:
UID_newest = 0
UID_validity = client.folder_status(folder, what=u'UIDVALIDITY')[b'UIDVALIDITY']
return UID_validity, UID_newest
def scan_local_folder(localroot, folder):
print("Not implemented")
""" Get the last UID stored in the folder """
UID_file = localroot / folder / '.uid'
if UID_file.exists():
with open(UID_file, 'r') as f:
fstr = f.read()
ftup = fstr.split()
return int(ftup[0]), int(ftup[1])
else:
return -1, 0
def download_messages(server, new_messages):
print("Not implemented")
def backup_imap(imap_server, imap_port, imap_user, imap_password, imap_localroot):
server = imap_connect(imap_server, imap_port, imap_user, imap_password)
folders = get_remote_folders(server)
create_folder_structure(imap_localroot, folders)
for folder in folders:
remote_messages = scan_remote_folder(server, folder)
# current_messages = scan_local_folder(imap_localroot, folder)
#
# new_messages = {}
#
# for msg_id in remote_messages:
# if msg_id not in current_messages:
# new_messages[msg_id] = remote_messages[msg_id]
#
# download_messages(server, new_messages)
server.logout()
def get_messages(client, folder, uid_local, uid_newest):
""" Get all messages in a folder between two UIDs """
client.select_folder(folder, readonly=True)
searchstr = 'UID '+str(uid_local) + ":" + str(uid_newest)
messages = client.search(searchstr)
return messages
def store_email(client, localroot, folder, uid_validity, uids):
""" Store an email in the correct folder"""
response = client.fetch(uids, 'RFC822')
for uid, data in response.items():
filename = str(uid_validity) + '-' + str(uid).zfill(9)
emailfile = localroot / folder / "cur" / filename
with open(emailfile, 'wb') as f:
f.write(data[b'RFC822'])
return True
def update_folder_uid(localroot, folder, uid_validity, uid):
""" Update the folder with the most recently stored UID """
UID_file = localroot / folder / '.uid'
with open(UID_file, 'w') as f:
fstr = str(uid_validity) + " " + str(uid)
f.write(fstr)
validity_check, check = scan_local_folder(localroot, folder)
if validity_check == uid_validity and check == uid:
return True
else:
return False
def backup_imap(imap_server, imap_user, imap_password, imap_localroot):
with IMAPClient(host=imap_server) as client:
client.login(imap_user, imap_password)
folders = get_remote_folders(client)
create_folder_structure(imap_localroot, folders)
for folder in folders:
uid_local_validity, uid_local = scan_local_folder(imap_localroot, folder)
uid_remote_validity, uid_newest = scan_remote_folder(client, folder)
# if the folder does not have a recorded validity, accept the server's
if 0 > uid_local_validity:
uid_local_validity = uid_remote_validity
# Check to make sure the server has not reset UIDs
if uid_local_validity == uid_remote_validity:
messages = get_messages(client, folder, uid_local, uid_newest)
log.info("Downloading "+str(len(messages))+" to "+folder)
for uid in messages:
if store_email(client, imap_localroot, folder, uid_remote_validity, uid):
if not update_folder_uid(imap_localroot, folder, uid_remote_validity, uid):
log.error("UID " + str(uid) + " failed to update in " + folder)
else:
log.error("Message " + str(uid) + " failed to save in " + folder)
else:
log.error("The server has reset UID validity, for folder " + folder + ". Backup must be repaired manually")