initial commit

This commit is contained in:
Andrew Davidson 2014-10-24 18:34:28 -04:00
commit 7bcc1b5ef0
22 changed files with 3663 additions and 0 deletions

10
.gitignore vendored Normal file
View file

@ -0,0 +1,10 @@
.DS_Store
tmp/*
venv
benv3
*.pyc
logs/*
*~
#*
Gemfile.lock
temp.py

5
__init__.py Normal file
View file

@ -0,0 +1,5 @@
from flask import Flask
app = Flask(__name__)
if __name__ == '__main__':
app.run()

632
bookie.py Normal file
View file

@ -0,0 +1,632 @@
import os
from flask import Flask, render_template, redirect, url_for, request
from flask.ext.mongoengine import MongoEngine
from flask.ext.security import Security, UserMixin, RoleMixin, login_required, MongoEngineUserDatastore
from flask.ext.login import current_user
import datetime
import base64
import urllib
from subprocess import call
from bs4 import BeautifulSoup as BS
app = Flask(__name__)
#####
# Config Values
#####
app.config["MONGODB_DB"] = "bookie"
app.config['SECRET_KEY'] = 'bobloblawlawblog'
app.config['UPLOAD_FOLDER'] = "static/uploads"
app.config['SITE_URL'] = "http://localhost:5000"
#####
# MongoDB Setup
#####
db = MongoEngine(app)
#####
# Classes
#####
class Role(db.Document, RoleMixin):
name = db.StringField(max_length=80, unique=True)
description = db.StringField(max_length=255)
class User(db.Document, UserMixin):
email = db.StringField(max_length=255)
password = db.StringField(max_length=255)
active = db.BooleanField(default=True)
confirmed_at = db.DateTimeField()
roles = db.ListField(db.ReferenceField(Role), default=[])
class Tag(db.Document):
name = db.StringField(required=True, max_length=25, unique=True)
note = db.StringField(required=False, max_length=100)
def __repr__(self):
return "Tag()"
def __str__(self):
return str(self.name)
class ArchivedText(db.Document):
url = db.StringField(max_length=1000, required=True)
created_at = db.DateTimeField(default=datetime.datetime.now, required=True)
text = db.StringField(required=True,default="")
raw_html = db.StringField(required=True,default="")
def get_html(self):
app.logger.debug("Brewing an opener")
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor())
app.logger.debug("Getting HTML")
raw_html = opener.open(self.url).read()
app.logger.debug("HTML retrieved") if raw_html != "" else False
try:
return raw_html.decode()
except:
return str(raw_html)
class ArchivedImage(db.Document):
url = db.StringField(max_length=1000, required=True)
created_at = db.DateTimeField(default=datetime.datetime.now, required=True)
path = db.StringField(required=True,max_length=150)
class Bookmark(db.Document):
#Meta
created_at = db.DateTimeField(default=datetime.datetime.now, required=True)
url = db.StringField(max_length=1000, required=True)
short = db.StringField(max_length=25, required=True, unique=True)
title = db.StringField(max_length=255, required=True)
note = db.StringField(required=False)
tags = db.ListField(db.ReferenceField(Tag))
image_embed = db.BooleanField(required=True, default=False)
archived_text = db.BooleanField()
archived_text_needed = db.BooleanField()
archived_text_ref = db.ReferenceField(ArchivedText)
archived_image = db.BooleanField()
archived_image_needed = db.BooleanField()
archived_image_ref = db.ReferenceField(ArchivedImage)
unread = db.BooleanField()
private = db.BooleanField(required=True, default=False)
deleted = db.BooleanField(required=True, default=False)
source = db.StringField(max_length=255)
#Metrics
hits = db.IntField(required=True,default=0)
factor = db.FloatField(required=True)
def get_factor(self):
return (len(self.short)+14)/len(self.url)
def get_short(self):
unique = False
while not unique:
s = base64.urlsafe_b64encode(os.urandom(5))[0:5].decode('Latin-1')
if Bookmark.objects(short=s).first() == None:
unique = True
return s
meta = {
'allow_inheritance': True,
'indexes': ['-created_at', 'short'],
'ordering': ['-created_at']
}
def __repr__(self):
return "Bookmark()"
def __str__(self):
return str("Bookmark " + self.short)
#####
# Security
#####
user_datastore = MongoEngineUserDatastore(db, User, Role)
security = Security(app, user_datastore)
######
# Helper Functions
######
# Function to update the archived text of a bookmark
# Inputs: Bookmark()
# Output: True / False
def update_archived_text(b, force=False, update_note=True):
if force == True:
t = ArchivedText.objects.create(url=b.url)
t.raw_html = t.get_html()
else:
t = ArchivedText.objects(url=b.url).order_by("-created_at").first()
if not t:
t = ArchivedText.objects.create(url=b.url)
if not hasattr(t, 'raw_html') or t.raw_html == "":
t.raw_html = t.get_html()
t.text = html_parse(t.raw_html, b.url, True)
t.save()
b.archived_text_ref = t
if update_note == True and b.note == "":
b.note = html_parse(t.raw_html, b.url, False)[:250]
b.archived_text_needed = False
b.archived_text = True
b.save()
return True
# Function to update the archived image of a bookmark
# Inputs: Bookmark()
# Output: True / False
def update_archived_image(b):
a = ArchivedImage()
a.url = b.url
ref = 'static/archive/images/'+b.short+'_'+a.created_at.strftime("%Y-%m-%d_%H%M%S")+'.jpg'
app.logger.debug(ref)
call(['/usr/bin/env','wkhtmltoimage',b.url,ref])
a.path = '/'+ ref
a.save()
b.archived_image_ref = a
b.archived_image_needed = False
b.archived_image = True
b.save()
return True
# A custom function to extract the key test from the raw html
# Inputs:
# Outputs:
def html_parse(raw_html,url,paragraphs=True):
strip_tags = False
soup = BS(raw_html)
for t in soup(["script","style","nav","header","aside","select","form", \
"link","meta","svg"]):
t.decompose()
for [tag, attr] in kill_list():
for t in soup.findAll(tag, attr):
t.decompose()
if soup.find("div", attrs={"class":"story-text"}):
app.logger.debug('Text import from <div class="story-text">')
text = soup.find("div", attrs={"class":"story-text"})
elif soup.find("div", attrs={"id":"article"}):
app.logger.debug('Text import from <div id="article">')
text = soup.find("div", attrs={"id":"article"})
elif soup.find("div", attrs={"id":"articleBody"}):
app.logger.debug('Text import from <div id="articleBody">')
text = soup.find("div", attrs={"id":"articleBody"})
elif soup.find("div", attrs={"class":"articleBody"}):
app.logger.debug('Text import from <div class="articleBody">')
text = soup.find("div", attrs={"class":"articleBody"})
elif soup.find("div", attrs={"class":"post"}):
app.logger.debug('Text import from <div class="post">')
text = soup.find("div", attrs={"class":"post"})
elif soup.find("div", attrs={"class":"post-content"}):
app.logger.debug('Text import from <div class="post-content">')
text = soup.find("div", attrs={"class":"post-content"})
elif soup.find("div", attrs={"class":"article-content"}):
app.logger.debug('Text import from <div class="article-content">')
text = soup.find("div", attrs={"class":"article-content"})
elif soup.find("div", attrs={"class":"story-content"}):
app.logger.debug('Text import from <div class="story-content">')
text = soup.find("div", attrs={"class":"story-content"})
elif soup.find("div", attrs={"class":"content"}):
app.logger.debug('Text import from <div class="content">')
text = soup.find("div", attrs={"class":"content"})
elif soup.find("article"):
app.logger.debug('Text import from from <article>')
text = soup.find("article")
elif soup.find("div", attrs={"id":"page"}):
app.logger.debug('Text import from <div id="page">')
text = soup.find("div", attrs={"id":"page"})
else:
app.logger.debug('Text import from <body>')
text = soup("body")[0]
strip_tags = True
if paragraphs == True:
for t in text('img'):
t['style'] = "max-width:600px;max-height:600px;"
try:
t['src'] = urllib.parse.urljoin(url, t['src'])
except:
pass
for t in text("div"):
del(t['class'])
del(t['style'])
for t in text("iframe"):
del(t['height'])
del(t['width'])
t['style'] = "max-width:600px;max-height:600px;margin:0em auto;display:block;"
if strip_tags == True:
lines = (line.strip() for line in text.get_text().splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
output = '<p>'+'</p></p>'.join(chunk for chunk in chunks if chunk) + '</p>'
else:
lines = (line.strip() for line in text.prettify().splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
output = '\n'.join(chunk for chunk in chunks if chunk)
else:
lines = (line.strip() for line in text.get_text().splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
output = '\n'.join(chunk for chunk in chunks if chunk)
return output
# A function defining key banned HTML tags
# Input: none
# Output: List of beautiful soup .decompose() compatible banned tags
def kill_list():
kill_list = []
kill_list.append(["div", {"id": "comments"}])
kill_list.append(["div", {"class": "video"}])
kill_list.append(["div", {"class": "m-linkset"}])
kill_list.append(["div", {"class": "m-feature__intro"}])
kill_list.append(["div", {"class": "m-share-buttons"}])
kill_list.append(["p", {"class": "m-entry__byline"}])
kill_list.append(["div", {"class": "social"}])
kill_list.append(["div", {"id": "follow-bar"}])
kill_list.append(["section", {"class": "m-rail-component"}])
return kill_list
# Encoding Function to enable JSON export
# Lifted from: http://goo.gl/SkWzpn
# Inputs: mongoengine object or query
# Outputs: Prepared instance for JSON dump
def encode_model(self, obj):
if isinstance(obj, (mongoengine.Document, mongoengine.EmbeddedDocument)):
out = dict(obj._data)
for k,v in out.items():
if isinstance(v, ObjectId):
out[k] = str(v)
elif isinstance(obj, mongoengine.queryset.QuerySet):
out = list(obj)
elif isinstance(obj, types.ModuleType):
out = None
elif isinstance(obj, groupby):
out = [ (g,list(l)) for g,l in obj ]
else:
raise TypeError("Could not JSON-encode type '%s': %s" % (type(obj), str(obj)))
return out
#####
# Routes
#####
# List all bookmarks
@app.route('/all/<int:count>/<format>')
@app.route('/all/<int:count>/')
@app.route('/all/')
@app.route('/a/')
@login_required
def list(count=100, format="HTML"):
loc = '/all/' + str(count) + '/'
if format == "JSON":
blist = Bookmark.objects(deleted=False).order_by("-created_at").only("url","title","note","created_at","tags","unread").limit(count)
return blist.to_json()
elif format == "CSV":
blist = Bookmark.objects(deleted=False).order_by("-created_at").limit(count)
out = ""
tags = ''
for b in blist:
for t in b.tags:
tags = tags + t.name + ' '
out = out + b.url + ',' + b.title + ',' + b.note + ',' + b.created_at.isoformat() + ',' + tags + ',' + str(b.unread) + ',bookie\n'
return out
else:
blist = Bookmark.objects(deleted=False).order_by("-created_at").limit(count)
return render_template("list.html", blist=blist, loc=loc)
# View deleted bookmarks
@app.route('/deleted/<int:count>/<format>')
@app.route('/deleted/<int:count>/')
@app.route('/deleted/')
@app.route('/d/')
@login_required
def deleted(count=100, format="HTML"):
loc = '/deleted/' + str(count) + '/'
blist = Bookmark.objects(deleted=True).order_by("-created_at").limit(count)
if format == "JSON":
return blist.to_json()
else:
return render_template("list.html", blist=blist, loc=loc)
# List unread bookmarks
@app.route('/unread/<int:count>/<format>')
@app.route('/unread/<int:count>/')
@app.route('/unread/')
@app.route('/u/')
@login_required
def unread(count=100, format="HTML"):
loc = '/unread/' + str(count) + '/'
blist = Bookmark.objects(unread=True, deleted=False).order_by("-created_at").limit(count)
if format == "JSON":
return blist.to_json()
else:
return render_template('list.html', blist=blist, loc=loc)
# New bookmark
@app.route('/new', methods=["GET", "POST"])
@login_required
def new():
if request.method=="POST":
b = Bookmark()
b.title = request.form["title"]
b.short = str(b.get_short())
b.note = request.form["note"]
try:
if request.form["image_embed"]:
b.image_embed = True
except:
b.image_embed = False
try:
if request.form["unread"]:
b.unread = True
except:
b.unread = False
try:
if request.form["archive"]:
b.archive_image_needed = True
b.archive_text_needed = True
except:
b.archive_image_needed = False
b.archive_text_needed = False
tag_list = []
for rawtag in request.form['tags'].split(" "):
t = Tag.objects.get_or_create(name=rawtag)[0].save()
tag_list.append(t)
b.tags = tag_list
if request.form["url"] == "":
file = request.files['file_upload']
ext = file.filename.rsplit('.',1)[1]
filename = b.short + "." + ext
file.save(os.path.join(app.config['UPLOAD_FOLDER'],filename))
b.url = '/' + app.config['UPLOAD_FOLDER'] + '/' + filename
b.factor = b.get_factor()
b.save()
return render_template("detail.html", b=b)
elif request.form["url"] != "":
b.url = request.form["url"]
b.factor = b.get_factor()
b.save()
return render_template("detail.html", b=b)
return render_template("form.html", action="/new")
else:
b = False
if any(k in request.args.keys() for k in ('title','url','note')):
# if 'title' in request.args.keys():
b = Bookmark()
if 'title' in request.args.keys():
b.title = request.args['title']
else:
b.title = ""
if 'url' in request.args.keys():
b.url = request.args['url']
else:
b.url = ""
if 'note' in request.args.keys():
b.note = request.args['note']
else:
b.note = ""
return render_template("form.html", action="/new", b=b)
@app.route('/tag/<rawtag>')
@login_required
def tagsearch(rawtag):
t = Tag.objects.get_or_404(name=rawtag.lower())
blist = Bookmark.objects(tags__in=[t])
if blist.count() > 0 :
return render_template('list.html',blist=blist)
else:
return redirect("/", code=302)
@app.route('/<id>/update/<action>')
@app.route('/<id>/u/<action>')
@login_required
def update(id,action):
if 'redirect' in request.args.keys():
loc = request.args['redirect']
else:
loc = '/'
if 'anchor' in request.args.keys():
app.logger.debug(request.args['anchor'])
loc = loc + "#" + request.args['anchor']
b = Bookmark.objects(short=id).first()
if action == "text":
update_archived_text(b)
elif action == "text_force":
update_archived_text(b, force=True)
elif action == "image":
update_archived_image(b)
elif action == "archive":
b.unread = False
b.save()
elif action == "unread":
b.unread = True
b.save()
elif action == "private":
b.private = True
b.save()
elif action == "public":
b.private = False
b.save()
elif action == "restore":
b.deleted = False
b.save()
elif action == "delete":
b.deleted = True
b.save()
return redirect(loc, code=302)
@app.route('/<id>/details')
@app.route('/<id>/d')
@login_required
def details(id):
b = Bookmark.objects(short=id).first()
return render_template("detail.html", b=b)
@app.route('/<id>/edit', methods=["GET", "POST"])
@app.route('/<id>/e', methods=["GET", "POST"])
@login_required
def edit(id):
b = Bookmark.objects(short=id).first()
if request.method=="POST":
if "title" in request.form.keys():
b.title = request.form["title"]
if "note" in request.form.keys():
b.note = request.form["note"]
if "image_embed" in request.form.keys() and \
request.form['image_embed'] == "checked":
b.image_embed = True
else:
b.image_embed = False
if "unread" in request.form.keys() and \
request.form['unread'] == "checked":
b.unread = True
else:
b.unread = False
if "archive_text_needed" in request.form.keys() and \
request.form['archive_text_needed'] == "checked":
b.archive_text_needed = True
else:
b.archive_text_needed = False
if "archive_image_needed" in request.form.keys() and \
request.form['archive_text_needed'] == "checked":
b.archive_image_needed = True
else:
b.archive_image_needed = False
tag_list = []
for rawtag in request.form['tags'].split(" "):
t = Tag.objects.get_or_create(name=rawtag)[0].save()
tag_list.append(t)
b.tags = tag_list
if "url" in request.form.keys():
b.url = request.form["url"]
b.factor = b.get_factor()
b.save()
if b:
return render_template("form.html", action = "/"+b.short+"/edit", b=b)
else:
return redirect("/", code=302)
# Pull up an archived and parsed text view of the Bookmark
# The first line of defense in preventing link rot...
@app.route('/<id>/text/<version>')
@app.route('/<id>/text/')
@app.route('/<id>/t/')
@login_required
def text(id, version=False):
b = Bookmark.objects(short=id).first()
tlist = ArchivedText.objects(url=b.url)
if b:
if version:
t = ArchivedText.objects(url=b.url,created_at=version).first()
text = t.text
else:
text = b.archived_text_ref.text
b.hits += 1
b.save()
return render_template("text.html", b=b, text=text, tlist=tlist)
else:
return redirect("/", code=302)
# Display the raw html scraped from the website
# The second line of defense against link rot...
@app.route('/<id>/raw/<version>')
@app.route('/<id>/raw/')
@app.route('/<id>/r/')
@login_required
def raw(id, version=False):
b = Bookmark.objects(short=id).first()
tlist = ArchivedText.objects(url=b.url)
if b:
if version:
t = ArchivedText.objects(url=b.url,created_at=version).first()
text = t.raw_html
else:
text = b.archived_text_ref.raw_html
return text
else:
return redirect("/", code=302)
# An archived image scraped from the website
# The third line of defense against link rot...
@app.route('/<id>/image/<version>')
@app.route('/<id>/image/')
@app.route('/<id>/i/')
@login_required
def image(id, version=False):
b = Bookmark.objects(short=id).first()
tlist = ArchivedImage.objects(url=b.url)
if b:
if version:
t = ArchivedImage.objects(url=b.url,created_at=version).first()
path = t.path
else:
path = b.archived_image_ref.path
b.hits += 1
b.save()
return redirect(path, code=302)
else:
return redirect("/", code=302)
# Embed url as an image in a formatted page. Does not require login.
@app.route('/<id>/embed')
def embed(id):
b = Bookmark.objects(short=id).first()
if b and (b.private != True or current_user.is_authenticated()):
b.hits += 1
b.save()
return render_template("image.html", b=b)
else:
return redirect("/", code=302)
# Short code redirects directly to bookmark target, does not require auth to use
# bookie as a URL shortener app
@app.route('/<id>')
def short(id):
b = Bookmark.objects(short=id).first()
if b and (b.private != True or current_user.is_authenticated()):
b.hits += 1
b.save()
if b.image_embed:
return redirect("/"+b.short+"/embed", code=302)
else:
return redirect(b.url, code=302)
else:
return redirect("/", code=302)
# Anonymous home page
@app.route('/')
def index():
return render_template("index.html")

2401
bookmarks.csv Normal file

File diff suppressed because it is too large Load diff

83
csv_import.py Executable file
View file

@ -0,0 +1,83 @@
#!/usr/bin/env python3
import os, re
from flask.ext.mongoengine import MongoEngine
import datetime
import base64
from bookie import *
app.config["MONGODB_DB"] = "bookie"
db = MongoEngine(app)
file = open('bookmarks.csv')
date_marker = datetime.datetime.now()
count = 0
Bookmark.objects.all().delete()
Tag.objects.all().delete()
ArchivedText.objects.all().delete()
ArchivedImage.objects.all().delete()
###
# Fixed field order:
# URL, TITLE, NOTE, DATE, TAGS, UNREAD, SOURCE
###
# Notes on processing assumptions:
# UNREAD field is 1/0 integer compare
# Escape commas with ^comma^ in raw fields before CSV export
# Watch for NEWLINE/CRLF issues
# All tags converted to lowercase, tag duplication prevented
# Date importing assumes that bookmarks are in chronological order and will continue to \
# ... use the last recorded date until it finds a new date
for line in file.readlines():
b = Bookmark()
[URL, TITLE, NOTE, DATE, TAGS, UNREAD, SOURCE] = line.split(",")
filtered_url = URL.replace('^comma^',',')
if Bookmark.objects(url=filtered_url).count() == 0:
print("URL: " + URL)
b.url = filtered_url
else:
print("Duplicate URL: " + URL)
continue
print("Title: " + TITLE)
b.title = TITLE.replace('^comma^',',')[:255]
print("Note: " + NOTE)
b.note = NOTE.replace('^comma^',',')
if DATE != "":
b.created_at = datetime.datetime(*map(int, re.split('[^\d]', DATE)[:-1]))
if b.created_at < date_marker:
date_marker = b.created_at
else:
b.created_at = date_marker
print("Date: " + b.created_at.strftime("%Y/%m/%d"))
print("Tags: " + TAGS)
if TAGS != "":
tag_list = []
for rawtag in TAGS.split(" "):
filtered = rawtag.replace('^comma^',',').lower()[:25]
t = Tag.objects.get_or_create(name=filtered)[0].save()
tag_list.append(t)
b.tags=tag_list
if int(UNREAD) > 0:
b.unread = True
print("Unread: True")
print(UNREAD)
else:
b.unread=False
print("Unread: False")
print(UNREAD)
b.archived_text_needed = True
b.archived_image_needed = True
b.deleted = False
b.short = b.get_short()
b.factor = b.get_factor()
b.save()
count += 1
print(count)

94
html_parse.py Normal file
View file

@ -0,0 +1,94 @@
from bs4 import BeautifulSoup as BS
from urllib.parse import urljoin
from bookie import app
def html_parse(raw_html,url,paragraphs=True):
strip_tags = False
soup = BS(raw_html)
for t in soup(["script","style","nav","header","aside","select","form", \
"link","meta","svg"]):
t.decompose()
for [tag, attr] in kill_list():
for t in soup.findAll(tag, attr):
t.decompose()
if soup.find("div", attrs={"class":"story-text"}):
app.logger.debug('Text import from <div class="story-text">')
text = soup.find("div", attrs={"class":"story-text"})
elif soup.find("div", attrs={"id":"article"}):
print('Text import from <div id="article">')
text = soup.find("div", attrs={"id":"article"})
elif soup.find("div", attrs={"id":"articleBody"}):
print('Text import from <div id="articleBody">')
text = soup.find("div", attrs={"id":"articleBody"})
elif soup.find("div", attrs={"class":"articleBody"}):
print('Text import from <div class="articleBody">')
text = soup.find("div", attrs={"class":"articleBody"})
elif soup.find("div", attrs={"class":"post"}):
print('Text import from <div class="post">')
text = soup.find("div", attrs={"class":"post"})
elif soup.find("div", attrs={"class":"post-content"}):
print('Text import from <div class="post-content">')
text = soup.find("div", attrs={"class":"post-content"})
elif soup.find("div", attrs={"class":"article-content"}):
print('Text import from <div class="article-content">')
text = soup.find("div", attrs={"class":"article-content"})
elif soup.find("div", attrs={"class":"story-content"}):
print('Text import from <div class="story-content">')
text = soup.find("div", attrs={"class":"story-content"})
elif soup.find("div", attrs={"class":"content"}):
print('Text import from <div class="content">')
text = soup.find("div", attrs={"class":"content"})
elif soup.find("article"):
print('Text import from from <article>')
text = soup.find("article")
elif soup.find("div", attrs={"id":"page"}):
print('Text import from <div id="page">')
text = soup.find("div", attrs={"id":"page"})
else:
text = soup("body")[0]
strip_tags = True
if paragraphs == True:
for t in text('img'):
t['style'] = "max-width:600px;max-height:600px;"
try:
t['src'] = urljoin(url, t['src'])
except:
pass
for t in text("div"):
del(t['class'])
del(t['style'])
for t in text("iframe"):
del(t['height'])
del(t['width'])
t['style'] = "max-width:600px;max-height:600px;margin:0em auto;display:block;"
if strip_tags == True:
lines = (line.strip() for line in text.get_text().splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
output = '<p>'+'</p></p>'.join(chunk for chunk in chunks if chunk) + '</p>'
else:
lines = (line.strip() for line in text.prettify().splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
output = '\n'.join(chunk for chunk in chunks if chunk)
else:
lines = (line.strip() for line in text.get_text().splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
output = '\n'.join(chunk for chunk in chunks if chunk)
return output
def kill_list():
kill_list = []
kill_list.append(["div", {"id": "comments"}])
kill_list.append(["div", {"class": "video"}])
kill_list.append(["div", {"class": "m-linkset"}])
kill_list.append(["div", {"class": "m-feature__intro"}])
kill_list.append(["div", {"class": "m-share-buttons"}])
kill_list.append(["p", {"class": "m-entry__byline"}])
kill_list.append(["div", {"class": "social"}])
kill_list.append(["div", {"id": "follow-bar"}])
kill_list.append(["section", {"class": "m-rail-component"}])
return kill_list

26
json_import.py Executable file
View file

@ -0,0 +1,26 @@
#!/usr/bin/env python3
import os, re
from flask.ext.mongoengine import MongoEngine
import datetime
import base64
from bookie import *
app.config["MONGODB_DB"] = "bookie"
db = MongoEngine(app)
###
# File name is hard coded
###
file = open('bookmarks.json')
###
# Kill everything in the db
###
# Bookmark.objects.all().delete()
# Tag.objects.all().delete()
# ArchivedText.objects.all().delete()
# ArchivedImage.objects.all().delete()

18
manage.py Normal file
View file

@ -0,0 +1,18 @@
# Set the path
import os, sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from flask.ext.script import Manager, Server
from bookie import app
manager = Manager(app)
# Turn on debugger by default and reloader
manager.add_command("runserver", Server(
use_debugger = True,
use_reloader = True,
host = '0.0.0.0')
)
if __name__ == "__main__":
manager.run()

64
requirements.txt Normal file
View file

@ -0,0 +1,64 @@
Flask-Script==2.0.5
Flask-WTF==0.10.2
Jinja2==2.7.3
MarkupSafe==0.23
Twisted==13.2.0
WTForms==2.0.1
Werkzeug==0.9.6
altgraph==0.10.2
bdist-mpkg==0.5.0
bonjour-py==0.3
gunicorn==19.1.0
itsdangerous==0.24
macholib==1.5.1
matplotlib==1.3.1
modulegraph==0.10.4
mongoengine==0.8.7
numpy==1.8.0rc1
py2app==0.7.3
pyOpenSSL==0.13.1
pymongo==2.7.2
pyobjc-core==2.5.1
pyobjc-framework-Accounts==2.5.1
pyobjc-framework-AddressBook==2.5.1
pyobjc-framework-AppleScriptKit==2.5.1
pyobjc-framework-AppleScriptObjC==2.5.1
pyobjc-framework-Automator==2.5.1
pyobjc-framework-CFNetwork==2.5.1
pyobjc-framework-Cocoa==2.5.1
pyobjc-framework-Collaboration==2.5.1
pyobjc-framework-CoreData==2.5.1
pyobjc-framework-CoreLocation==2.5.1
pyobjc-framework-CoreText==2.5.1
pyobjc-framework-DictionaryServices==2.5.1
pyobjc-framework-EventKit==2.5.1
pyobjc-framework-ExceptionHandling==2.5.1
pyobjc-framework-FSEvents==2.5.1
pyobjc-framework-InputMethodKit==2.5.1
pyobjc-framework-InstallerPlugins==2.5.1
pyobjc-framework-InstantMessage==2.5.1
pyobjc-framework-LatentSemanticMapping==2.5.1
pyobjc-framework-LaunchServices==2.5.1
pyobjc-framework-Message==2.5.1
pyobjc-framework-OpenDirectory==2.5.1
pyobjc-framework-PreferencePanes==2.5.1
pyobjc-framework-PubSub==2.5.1
pyobjc-framework-QTKit==2.5.1
pyobjc-framework-Quartz==2.5.1
pyobjc-framework-ScreenSaver==2.5.1
pyobjc-framework-ScriptingBridge==2.5.1
pyobjc-framework-SearchKit==2.5.1
pyobjc-framework-ServiceManagement==2.5.1
pyobjc-framework-Social==2.5.1
pyobjc-framework-SyncServices==2.5.1
pyobjc-framework-SystemConfiguration==2.5.1
pyobjc-framework-WebKit==2.5.1
pyparsing==2.0.1
python-dateutil==1.5
pytz==2013.7
scipy==0.13.0b1
six==1.4.1
virtualenv==1.11.6
wsgiref==0.1.2
xattr==0.6.4
zope.interface==4.1.1

27
run_jobs.py Executable file
View file

@ -0,0 +1,27 @@
#!/usr/bin/env python3
import os, re
from flask.ext.mongoengine import MongoEngine
import datetime
import base64
from bookie import *
app.config["MONGODB_DB"] = "bookie"
db = MongoEngine(app)
for b in Bookmark.objects(archived_text_needed = True):
print("Archiving text for " + b.short)
try:
update_archived_text(b)
print("Text archived.")
except:
print("Text archive failed.")
for b in Bookmark.objects(archived_image_needed = True):
print("Archiving image for " + b.short)
try:
update_archived_image(b)
print("image archived.")
except:
print("image archive failed.")

4
static/archive/.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore

BIN
static/images/logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 41 KiB

99
static/site.css Normal file
View file

@ -0,0 +1,99 @@
/* Site CSS File */
body {
font-family:Helvetica, Helvetica-Neue, Arial, sans-serif;
font-size: 14pt;
}
div.wrapper{
width:750px;
margin:0em auto;
}
div.header {
text-align: center;
}
div.image_embed {
text-align:center;
margin: 0em auto;
}
div.image_caption {
text-align:center;
margin: 0em auto;
max-width: 700px;
}
div.form {
margin: 0em auto;
text-align:center;
}
div.form input.text {
border: 1px #000 solid;
width: 350px;
}
div.form input.button {
font-size: 13pt;
}
div.form textarea {
border: 1px #000 solid;
width: 350px;
height: 100px;
}
div.list {
margin: 0em auto;
}
div.text {
}
div.text p {
margin-bottom: 10px;
}
img.image_embed {
max-width: 600px;
max-height: 600px;
}
ul.bookmarks {
list-style-type: none;
}
li.bookmark {
border-top: 1px #999 solid;
border-left: 1px #999 solid;
padding-left: 25px;
word-wrap: break-word;
}
p.date {
font-size: 0.7em;
}
p.menu {
font-size: 0.7em;
}
p.tags {
font-size: 0.7em;
}
div.nav {
width: 100%;
text-align: center;
}
ul.nav {
list-style-type: none;
}
ul.nav li {
display: inline;
}

4
static/uploads/.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore

12
templates/detail.html Normal file
View file

@ -0,0 +1,12 @@
{% extends "layout.html" %}
{% block title %} | {{b.short}} Details{% endblock %}
{% block body %}
<p>Title: {{b.title}}</p>
<p>Short: {{b.short}}</p>
<p>URL: {{b.url}}</p>
<p>Note: {{b.note}}</p>
<p>Tags: {% for t in b.tags %}{{t}}, {% endfor %}
<p>Created At {{b.created_at}}</p>
<p>Hits: {{b.hits}}</p>
<p>Factor: {{b.factor}}</p>
{% endblock %}

39
templates/form.html Normal file
View file

@ -0,0 +1,39 @@
{% extends "layout.html" %}
{% block title %} | New Bookmark{% endblock %}
{% block body %}
<div class="form new">
<h3>Add/Update Bookmark</h3>
{% if action == "/edit" %}
<a href="/{{b.short}}/details">{{b.short}} info</a>
{% endif %}
<form action="{{action}}" method="POST" enctype="multipart/form-data">
<p><label for="title">Title</label><br/>
<input type="text" name="title" class="text" value="{%if b%}{{b.title}}{%endif%}"/></p>
<p><label for="note">Note</label><br/>
<textarea name="note" class="textarea">{%if b%}{{b.note}}{%endif%}</textarea></p>
<p><label for="tags">Tags</label><br/>
<input type="text" name="tags" class="text" value="{%if b%}{%for t in b.tags%}{{t.name}} {%endfor%}{%endif%}"/></p>
<p><label for="image">Embed as image?</label>
<input type="checkbox" name="image_embed" class="checkbox" {%if b%}{%if b.image_embed%}checked="checked"{%endif%}{%endif%}"/></p>
<p><label for="archive">Archive page (text)?</label>
<input type="checkbox" name="archive_text_needed" class="checkbox" {%if b%}{%if b.archive_text_needed%}checked="checked"{%endif%}{%endif%}/></p>
<p><label for="archive">Archive page (image)?</label>
<input type="checkbox" name="archive_image_needed" class="checkbox" {%if b%}{%if b.archive_image_needed%}checked="checked"{%endif%}{%endif%}/></p>
<p><label for="unread">Mark as unread?</label>
<input type="checkbox" name="unread" class="checkbox" {%if b%}{%if b.unread%}checked="checked"{%endif%}{%endif%}/></p>
<hr/>
<p><label for="url">Bookmark URL</label><br/>
<input type="text" name="url" class="text" value="{%if b%}{{b.url}}{%endif%}"/></p>
<p><input type="submit" name="linker" value="Link" class="button"/></p>
<hr/>
<p><label for="upload">Upload File</label><br/>
<input type="file" name="file_upload" class="file"/><p>
<p><input type="submit" name="uploader" value="Upload" class="button"/></p>
</form>
</div>
{% endblock %}

11
templates/image.html Normal file
View file

@ -0,0 +1,11 @@
{% extends "layout.html" %}
{% block title %} | {{b.title}}{% endblock %}
{% block body %}
<div class="image_embed">
<img src="{{b.url}}" alt="Image for {{b.short}}" class="image_embed" />
</div>
<div class="image_caption">
<h3>{{b.title}}</h3>
<p>{{b.note}}</h3>
</div>
{% endblock %}

13
templates/index.html Normal file
View file

@ -0,0 +1,13 @@
<!doctype html>
<html>
<head>
<title>amd.im</title>
</head>
<body>
<div class="page">
<div style="text-align:center;margin:0em auto;">
<img src="/static/images/logo.png" style="width:90%;margin-top:-10%;" />
</div>
</div>
</body>
</html>

24
templates/layout.html Normal file
View file

@ -0,0 +1,24 @@
<!doctype html>
<html>
<head>
<title>amd.im{% block title %}{% endblock %}</title>
<link rel="stylesheet" type="text/css" href="/static/site.css" />
{% block includes %}{% endblock %}
</head>
<body>
<div class="wrapper">
<div class="header">
<img src="/static/images/logo.png" style="width:250px;" />
</div>
<div class="nav">
<ul class="nav">
<li><a href="/unread">Unread</a></li> |
<li><a href="/all">All</a></li> |
<li><a href="/deleted">Deleted</a></li> |
<li><a href="/new">New</a></li>
</ul>
</div>
{% block body %}{% endblock %}
</div>
</body>
</html>

49
templates/list.html Normal file
View file

@ -0,0 +1,49 @@
{% extends "layout.html" %}
{% block title %} | Bookmark List{% endblock %}
{% block body %}
<div class="list">
<ul class="bookmarks">
{% for b in blist %}
<li class="bookmark" id="{{b.short}}" name="{{b.short}}"><p>
<p class="title"><a href="{{b.url}}">{{b.title}}</a></p>
<p class="date">{{b.created_at.strftime('%Y-%m-%d')}}</p>
<p class="menu">
<a href="/{{b.short}}/edit">Edit</a>
{% if b.deleted == False %}
<a href="/{{b.short}}/u/delete?redirect={{loc}}">Delete</a>
{% else %}
<a href="/{{b.short}}/u/restore?redirect={{loc}}">Restore</a>
{% endif %}
{% if b.private == False %}
<a href="/{{b.short}}/u/private?redirect={{loc}}&anchor={{b.short}}">Make Private</a>
{% else %}
<a href="/{{b.short}}/u/public?redirect={{loc}}&anchor={{b.short}}">Make Public</a>
{% endif %}
{% if not b.archived_image %}
<a href="/{{b.short}}/u/image?redirect={{loc}}&anchor={{b.short}}">Get Image</a>
{% else %}
<a href="/{{b.short}}/image">View Image</a>
{% endif %}
{% if b.archived_text %}
<a href="/{{b.short}}/text">View Text</a>
{% else %}
<a href="/{{b.short}}/u/text?redirect={{loc}}&anchor={{b.short}}">Get Text</a>
{% endif %}
</p>
<p class="note">{{b.note}}</p>
{% if b.tags %}
<p class="tags">Tags:
{% for t in b.tags %}
<a href="/tag/{{t}}">{{t}}</a>
{% endfor %}</p>
{% endif %}
<p class="menu">[ <a href="http://amd.im/{{b.short}}">{{b.short}}</a>,
Short Factor: {{b.factor|round(4)}}, Views: {{b.hits}},
]</p>
</li>
{% endfor %}
</ul>
</div>
{% endblock %}

View file

@ -0,0 +1,16 @@
{% extends "layout.html" %}
{% block title %} | Login {% endblock %}
{% block body %}
<div style="text-align:center">
{% from "security/_macros.html" import render_field_with_errors, render_field %}
{% include "security/_messages.html" %}
<form action="{{ url_for_security('login') }}" method="POST" name="login_user_form">
{{ login_user_form.hidden_tag() }}
{{ render_field_with_errors(login_user_form.email) }}
{{ render_field_with_errors(login_user_form.password) }}
{{ render_field_with_errors(login_user_form.remember) }}
{{ render_field(login_user_form.next) }}
{{ render_field(login_user_form.submit) }}
</form>
</div>
{% endblock %}

32
templates/text.html Normal file
View file

@ -0,0 +1,32 @@
{% extends "layout.html" %}
{% block title %} | {{b.title}} Text View{% endblock %}
{% block body %}
<h3>{{b.title}}</h3>
<p class="date">{{b.created_at.strftime('%Y-%m-%d')}}</p>
<p class="menu">
{% if b.unread %}
<a href="/unread/#{{b.short}}"><< Back to Unread</a> |
<a href="/{{b.short}}/u/archive?redirect={{b.short}}/text">Archive</a> |
{% else %}
<a href="/all/#{{b.short}}"><< Back to Bookmarks</a> |
<a href="/{{b.short}}/u/unread?redirect={{b.short}}/text">Mark Unread</a> |
{% endif %}
<a href="{{b.url}}">View Original</a> |
<a href="/{{b.short}}/u/text?redirect={{b.short}}/text">Update Text</a> |
<a href="/{{b.short}}/u/text_force?redirect={{b.short}}/text">Re-scrape Text</a>
</p>
<div class="text">
{{text|safe}}
</div>
<hr/>
<div class="text_versions">
<p>Other versions of this text:</p>
<ul>
{% for t in tlist %}
<li><a href="/{{b.short}}/text/{{t.created_at}}">{{t.created_at}}</a></li>
{% endfor %}
</ul>
</div>
{% endblock %}