84 lines
2.3 KiB
Python
84 lines
2.3 KiB
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
import os, re
|
||
|
from flask.ext.mongoengine import MongoEngine
|
||
|
import datetime
|
||
|
import base64
|
||
|
|
||
|
from bookie import *
|
||
|
|
||
|
app.config["MONGODB_DB"] = "bookie"
|
||
|
db = MongoEngine(app)
|
||
|
|
||
|
file = open('bookmarks.csv')
|
||
|
|
||
|
date_marker = datetime.datetime.now()
|
||
|
count = 0
|
||
|
|
||
|
Bookmark.objects.all().delete()
|
||
|
Tag.objects.all().delete()
|
||
|
ArchivedText.objects.all().delete()
|
||
|
ArchivedImage.objects.all().delete()
|
||
|
|
||
|
###
|
||
|
# Fixed field order:
|
||
|
# URL, TITLE, NOTE, DATE, TAGS, UNREAD, SOURCE
|
||
|
###
|
||
|
# Notes on processing assumptions:
|
||
|
# UNREAD field is 1/0 integer compare
|
||
|
# Escape commas with ^comma^ in raw fields before CSV export
|
||
|
# Watch for NEWLINE/CRLF issues
|
||
|
# All tags converted to lowercase, tag duplication prevented
|
||
|
# Date importing assumes that bookmarks are in chronological order and will continue to \
|
||
|
# ... use the last recorded date until it finds a new date
|
||
|
|
||
|
for line in file.readlines():
|
||
|
b = Bookmark()
|
||
|
[URL, TITLE, NOTE, DATE, TAGS, UNREAD, SOURCE] = line.split(",")
|
||
|
filtered_url = URL.replace('^comma^',',')
|
||
|
if Bookmark.objects(url=filtered_url).count() == 0:
|
||
|
print("URL: " + URL)
|
||
|
b.url = filtered_url
|
||
|
else:
|
||
|
print("Duplicate URL: " + URL)
|
||
|
continue
|
||
|
print("Title: " + TITLE)
|
||
|
b.title = TITLE.replace('^comma^',',')[:255]
|
||
|
print("Note: " + NOTE)
|
||
|
b.note = NOTE.replace('^comma^',',')
|
||
|
if DATE != "":
|
||
|
b.created_at = datetime.datetime(*map(int, re.split('[^\d]', DATE)[:-1]))
|
||
|
if b.created_at < date_marker:
|
||
|
date_marker = b.created_at
|
||
|
else:
|
||
|
b.created_at = date_marker
|
||
|
|
||
|
print("Date: " + b.created_at.strftime("%Y/%m/%d"))
|
||
|
print("Tags: " + TAGS)
|
||
|
if TAGS != "":
|
||
|
tag_list = []
|
||
|
for rawtag in TAGS.split(" "):
|
||
|
filtered = rawtag.replace('^comma^',',').lower()[:25]
|
||
|
t = Tag.objects.get_or_create(name=filtered)[0].save()
|
||
|
tag_list.append(t)
|
||
|
b.tags=tag_list
|
||
|
|
||
|
if int(UNREAD) > 0:
|
||
|
b.unread = True
|
||
|
print("Unread: True")
|
||
|
print(UNREAD)
|
||
|
else:
|
||
|
b.unread=False
|
||
|
print("Unread: False")
|
||
|
print(UNREAD)
|
||
|
|
||
|
b.archived_text_needed = True
|
||
|
b.archived_image_needed = True
|
||
|
b.deleted = False
|
||
|
b.short = b.get_short()
|
||
|
b.factor = b.get_factor()
|
||
|
b.save()
|
||
|
|
||
|
count += 1
|
||
|
print(count)
|