#!/usr/bin/env python3.4 import os, re from flask.ext.mongoengine import MongoEngine import datetime import base64 from bookie import * app.config["MONGODB_DB"] = "bookie" db = MongoEngine(app) file = open('import.csv') date_marker = datetime.datetime.now() count = 0 Bookmark.objects.all().delete() Tag.objects.all().delete() ArchivedText.objects.all().delete() ArchivedImage.objects.all().delete() ### # Fixed field order: # URL, TITLE, NOTE, DATE, TAGS, UNREAD, SOURCE ### # Notes on processing assumptions: # UNREAD field is 1/0 integer compare # Escape commas with ^comma^ in raw fields before CSV export # Watch for NEWLINE/CRLF issues # All tags converted to lowercase, tag duplication prevented # Date importing assumes that bookmarks are in chronological order and will continue to \ # ... use the last recorded date until it finds a new date for line in file.readlines(): b = Bookmark() [URL, TITLE, NOTE, DATE, TAGS, UNREAD, SOURCE] = line.split(",") filtered_url = URL.replace('^comma^',',') if Bookmark.objects(url=filtered_url).count() == 0: print("URL: " + URL) b.url = filtered_url else: print("Duplicate URL: " + URL) continue print("Title: " + TITLE) b.title = TITLE.replace('^comma^',',')[:255] print("Note: " + NOTE) b.note = NOTE.replace('^comma^',',') if DATE != "": b.created_at = datetime.datetime(*map(int, re.split('[^\d]', DATE)[:-1])) if b.created_at < date_marker: date_marker = b.created_at else: b.created_at = date_marker print("Date: " + b.created_at.strftime("%Y/%m/%d")) print("Tags: " + TAGS) if TAGS != "": tag_list = [] for rawtag in TAGS.split(" "): filtered = rawtag.replace('^comma^',',').lower()[:25] t = Tag.objects.get_or_create(name=filtered)[0].save() tag_list.append(t) b.tags=tag_list if int(UNREAD) > 0: b.unread = True print("Unread: True") print(UNREAD) else: b.unread=False print("Unread: False") print(UNREAD) b.archived_text_needed = True b.archived_image_needed = True b.deleted = False b.short = b.get_short() b.factor = b.get_factor() b.save() count += 1 print(count)