importing updates

This commit is contained in:
Andrew Davidson 2014-11-05 20:39:33 -05:00
parent 650722a079
commit bd644032bc
6 changed files with 71 additions and 2412 deletions

5
.gitignore vendored
View file

@ -7,4 +7,7 @@ logs/*
*~
#*
Gemfile.lock
temp.py
temp.py
import.xml
import.json
import.csv

View file

@ -1,5 +1,5 @@
import os
from flask import Flask, render_template, redirect, url_for, request
from flask import Flask, render_template, redirect, url_for, request, Response
from flask.ext.mongoengine import MongoEngine
from flask.ext.security import Security, UserMixin, RoleMixin, login_required, MongoEngineUserDatastore
from flask.ext.login import current_user
@ -8,6 +8,7 @@ import base64
import urllib
from subprocess import call
from bs4 import BeautifulSoup as BS
from xml.sax.saxutils import escape
app = Flask(__name__)
@ -314,14 +315,16 @@ def list(count=100, format="HTML"):
out = out + b.url + ',' + b.title + ',' + b.note + ',' + b.created_at.isoformat() + ',' + tags + ',' + str(b.unread) + ',bookie\n'
return out
elif format == "xml":
c = 0
blist = Bookmark.objects(deleted=False).order_by("-created_at").only("url","title","short","note","created_at","tags","unread").limit(count)
out = "<xml>\n"
out += "\t<title>Bookmark Export "+datetime.datetime.now().strftime("%Y-%m-%d")+"</title>\n"
out += "\t<created_at>"+datetime.datetime.now().isoformat()+"</created_at>\n"
for b in blist:
out += "\t<bookmark>\n"
out += "\t\t<title>"+b.title+"</title>\n"
out += "\t\t<short>"+b.short+"</short>\n"
out += "\t\t<index>"+str(c)+"</index>\n"
out += "\t\t<title>"+escape(b.title)+"</title>\n"
out += "\t\t<short>"+escape(b.short)+"</short>\n"
out += "\t\t<created_at>"+b.created_at.isoformat()+"</created_at>\n"
out += "\t\t<unread>"
if b.unread:
@ -329,17 +332,22 @@ def list(count=100, format="HTML"):
else:
out += "False"
out += "</unread>\n"
out += "\t\t<url>"+b.url+"</url>\n"
out += "\t\t<url>"+escape(b.url)+"</url>\n"
out += "\t\t<tags>"
for t in b.tags:
out += t.name + " "
out += escape(t.name) + " "
out += "</tags>\n"
out += "\t\t<note>\n"
out += "\t\t<note><![CDATA[\n"
out += "\t\t\t"+b.note+"\n"
out += "\t\t</note>\n"
out += "\t\t]]></note>\n"
out += "\t</bookmark>\n"
c += 1
out += "</xml>\n"
return out
return Response(out, mimetype='application/xml')
elif format == "json":
blist = Bookmark.objects(deleted=False).order_by("-created_at").only("url","title","short","note","created_at","tags","unread").limit(count)
out = ""

File diff suppressed because it is too large Load diff

View file

@ -10,7 +10,7 @@ from bookie import *
app.config["MONGODB_DB"] = "bookie"
db = MongoEngine(app)
file = open('bookmarks.csv')
file = open('import.csv')
date_marker = datetime.datetime.now()
count = 0

View file

@ -14,7 +14,7 @@ db = MongoEngine(app)
###
# File name is hard coded
###
file = open('bookmarks.json')
file = open('import.json')
###
# Kill everything in the db

49
xml_import.py Executable file
View file

@ -0,0 +1,49 @@
#!/usr/bin/env python3
import os, re
from flask.ext.mongoengine import MongoEngine
import datetime
import base64
from bs4 import BeautifulSoup as BS
from xml.sax.saxutils import unescape
from bookie import *
app.config["MONGODB_DB"] = "bookie"
db = MongoEngine(app)
date_marker = datetime.datetime.now()
count = 0
Bookmark.objects.all().delete()
Tag.objects.all().delete()
ArchivedText.objects.all().delete()
ArchivedImage.objects.all().delete()
with open("import.xml", "r") as importfile:
data = importfile.read()
soup = BS(data)
count = 0
for i in soup(["bookmark"]):
b = Bookmark()
b.title = unescape(i.find("title").get_text())
b.short = unescape(i.find("short").get_text())
b.url = unescape(i.find("url").get_text())
b.note = i.find("note").get_text()
b.created_at = datetime.datetime(*map(int, re.split('[^\d]', unescape(i.find("created_at").get_text()))[:-1]))
if i.find("unread").get_text() == "True":
b.unread = True
else:
b.unread = False
b.tags = []
for t in i.find("tags").get_text().split(" "):
tag = Tag.objects.get_or_create(name=t)[0].save()
b.tags.append(tag)
count += 1
print(str(count)+" - "+b.short)
print(b.title)
print(b.url)