importing updates

This commit is contained in:
Andrew Davidson 2014-11-05 20:39:33 -05:00
parent 650722a079
commit bd644032bc
6 changed files with 71 additions and 2412 deletions

3
.gitignore vendored
View file

@ -8,3 +8,6 @@ logs/*
#* #*
Gemfile.lock Gemfile.lock
temp.py temp.py
import.xml
import.json
import.csv

View file

@ -1,5 +1,5 @@
import os import os
from flask import Flask, render_template, redirect, url_for, request from flask import Flask, render_template, redirect, url_for, request, Response
from flask.ext.mongoengine import MongoEngine from flask.ext.mongoengine import MongoEngine
from flask.ext.security import Security, UserMixin, RoleMixin, login_required, MongoEngineUserDatastore from flask.ext.security import Security, UserMixin, RoleMixin, login_required, MongoEngineUserDatastore
from flask.ext.login import current_user from flask.ext.login import current_user
@ -8,6 +8,7 @@ import base64
import urllib import urllib
from subprocess import call from subprocess import call
from bs4 import BeautifulSoup as BS from bs4 import BeautifulSoup as BS
from xml.sax.saxutils import escape
app = Flask(__name__) app = Flask(__name__)
@ -314,14 +315,16 @@ def list(count=100, format="HTML"):
out = out + b.url + ',' + b.title + ',' + b.note + ',' + b.created_at.isoformat() + ',' + tags + ',' + str(b.unread) + ',bookie\n' out = out + b.url + ',' + b.title + ',' + b.note + ',' + b.created_at.isoformat() + ',' + tags + ',' + str(b.unread) + ',bookie\n'
return out return out
elif format == "xml": elif format == "xml":
c = 0
blist = Bookmark.objects(deleted=False).order_by("-created_at").only("url","title","short","note","created_at","tags","unread").limit(count) blist = Bookmark.objects(deleted=False).order_by("-created_at").only("url","title","short","note","created_at","tags","unread").limit(count)
out = "<xml>\n" out = "<xml>\n"
out += "\t<title>Bookmark Export "+datetime.datetime.now().strftime("%Y-%m-%d")+"</title>\n" out += "\t<title>Bookmark Export "+datetime.datetime.now().strftime("%Y-%m-%d")+"</title>\n"
out += "\t<created_at>"+datetime.datetime.now().isoformat()+"</created_at>\n" out += "\t<created_at>"+datetime.datetime.now().isoformat()+"</created_at>\n"
for b in blist: for b in blist:
out += "\t<bookmark>\n" out += "\t<bookmark>\n"
out += "\t\t<title>"+b.title+"</title>\n" out += "\t\t<index>"+str(c)+"</index>\n"
out += "\t\t<short>"+b.short+"</short>\n" out += "\t\t<title>"+escape(b.title)+"</title>\n"
out += "\t\t<short>"+escape(b.short)+"</short>\n"
out += "\t\t<created_at>"+b.created_at.isoformat()+"</created_at>\n" out += "\t\t<created_at>"+b.created_at.isoformat()+"</created_at>\n"
out += "\t\t<unread>" out += "\t\t<unread>"
if b.unread: if b.unread:
@ -329,17 +332,22 @@ def list(count=100, format="HTML"):
else: else:
out += "False" out += "False"
out += "</unread>\n" out += "</unread>\n"
out += "\t\t<url>"+b.url+"</url>\n" out += "\t\t<url>"+escape(b.url)+"</url>\n"
out += "\t\t<tags>" out += "\t\t<tags>"
for t in b.tags: for t in b.tags:
out += t.name + " " out += escape(t.name) + " "
out += "</tags>\n" out += "</tags>\n"
out += "\t\t<note>\n" out += "\t\t<note><![CDATA[\n"
out += "\t\t\t"+b.note+"\n" out += "\t\t\t"+b.note+"\n"
out += "\t\t</note>\n" out += "\t\t]]></note>\n"
out += "\t</bookmark>\n" out += "\t</bookmark>\n"
c += 1
out += "</xml>\n" out += "</xml>\n"
return out
return Response(out, mimetype='application/xml')
elif format == "json": elif format == "json":
blist = Bookmark.objects(deleted=False).order_by("-created_at").only("url","title","short","note","created_at","tags","unread").limit(count) blist = Bookmark.objects(deleted=False).order_by("-created_at").only("url","title","short","note","created_at","tags","unread").limit(count)
out = "" out = ""

File diff suppressed because it is too large Load diff

View file

@ -10,7 +10,7 @@ from bookie import *
app.config["MONGODB_DB"] = "bookie" app.config["MONGODB_DB"] = "bookie"
db = MongoEngine(app) db = MongoEngine(app)
file = open('bookmarks.csv') file = open('import.csv')
date_marker = datetime.datetime.now() date_marker = datetime.datetime.now()
count = 0 count = 0

View file

@ -14,7 +14,7 @@ db = MongoEngine(app)
### ###
# File name is hard coded # File name is hard coded
### ###
file = open('bookmarks.json') file = open('import.json')
### ###
# Kill everything in the db # Kill everything in the db

49
xml_import.py Executable file
View file

@ -0,0 +1,49 @@
#!/usr/bin/env python3
import os, re
from flask.ext.mongoengine import MongoEngine
import datetime
import base64
from bs4 import BeautifulSoup as BS
from xml.sax.saxutils import unescape
from bookie import *
app.config["MONGODB_DB"] = "bookie"
db = MongoEngine(app)
date_marker = datetime.datetime.now()
count = 0
Bookmark.objects.all().delete()
Tag.objects.all().delete()
ArchivedText.objects.all().delete()
ArchivedImage.objects.all().delete()
with open("import.xml", "r") as importfile:
data = importfile.read()
soup = BS(data)
count = 0
for i in soup(["bookmark"]):
b = Bookmark()
b.title = unescape(i.find("title").get_text())
b.short = unescape(i.find("short").get_text())
b.url = unescape(i.find("url").get_text())
b.note = i.find("note").get_text()
b.created_at = datetime.datetime(*map(int, re.split('[^\d]', unescape(i.find("created_at").get_text()))[:-1]))
if i.find("unread").get_text() == "True":
b.unread = True
else:
b.unread = False
b.tags = []
for t in i.find("tags").get_text().split(" "):
tag = Tag.objects.get_or_create(name=t)[0].save()
b.tags.append(tag)
count += 1
print(str(count)+" - "+b.short)
print(b.title)
print(b.url)