bookie-python/csv_import.py

83 lines
2.3 KiB
Python
Executable file

#!/usr/bin/env python3
import os, re
from flask.ext.mongoengine import MongoEngine
import datetime
import base64
from bookie import *
app.config["MONGODB_DB"] = "bookie"
db = MongoEngine(app)
file = open('import.csv')
date_marker = datetime.datetime.now()
count = 0
Bookmark.objects.all().delete()
Tag.objects.all().delete()
ArchivedText.objects.all().delete()
ArchivedImage.objects.all().delete()
###
# Fixed field order:
# URL, TITLE, NOTE, DATE, TAGS, UNREAD, SOURCE
###
# Notes on processing assumptions:
# UNREAD field is 1/0 integer compare
# Escape commas with ^comma^ in raw fields before CSV export
# Watch for NEWLINE/CRLF issues
# All tags converted to lowercase, tag duplication prevented
# Date importing assumes that bookmarks are in chronological order and will continue to \
# ... use the last recorded date until it finds a new date
for line in file.readlines():
b = Bookmark()
[URL, TITLE, NOTE, DATE, TAGS, UNREAD, SOURCE] = line.split(",")
filtered_url = URL.replace('^comma^',',')
if Bookmark.objects(url=filtered_url).count() == 0:
print("URL: " + URL)
b.url = filtered_url
else:
print("Duplicate URL: " + URL)
continue
print("Title: " + TITLE)
b.title = TITLE.replace('^comma^',',')[:255]
print("Note: " + NOTE)
b.note = NOTE.replace('^comma^',',')
if DATE != "":
b.created_at = datetime.datetime(*map(int, re.split('[^\d]', DATE)[:-1]))
if b.created_at < date_marker:
date_marker = b.created_at
else:
b.created_at = date_marker
print("Date: " + b.created_at.strftime("%Y/%m/%d"))
print("Tags: " + TAGS)
if TAGS != "":
tag_list = []
for rawtag in TAGS.split(" "):
filtered = rawtag.replace('^comma^',',').lower()[:25]
t = Tag.objects.get_or_create(name=filtered)[0].save()
tag_list.append(t)
b.tags=tag_list
if int(UNREAD) > 0:
b.unread = True
print("Unread: True")
print(UNREAD)
else:
b.unread=False
print("Unread: False")
print(UNREAD)
b.archived_text_needed = True
b.archived_image_needed = True
b.deleted = False
b.short = b.get_short()
b.factor = b.get_factor()
b.save()
count += 1
print(count)