wip, has working ingest queue
This commit is contained in:
parent
59dda11344
commit
ae57639f98
3 changed files with 113 additions and 34 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1 +1 @@
|
||||||
*.sqlite3
|
*.db
|
||||||
|
|
144
pocketarchive.go
144
pocketarchive.go
|
@ -3,26 +3,127 @@ package main
|
||||||
import (
|
import (
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
_ "github.com/mattn/go-sqlite3"
|
_ "github.com/mattn/go-sqlite3"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
func checkURL(url string, db *sql.DB) bool {
|
// Make up some data structures into which we can put our feed.
|
||||||
row := db.QueryRow(`SELECT COUNT(url) FROM bookmarks where URL="%s"`, url)
|
|
||||||
|
|
||||||
if row != nil {
|
// Bookmark defines the fundamental structure of the items to be archived.
|
||||||
|
type Bookmark struct {
|
||||||
|
// Required
|
||||||
|
Title string `xml:"title"`
|
||||||
|
Link string `xml:"link"`
|
||||||
|
GUID string `xml:"guid"`
|
||||||
|
// Optional
|
||||||
|
PubDate string `xml:"pubDate"`
|
||||||
|
Comments string `xml:"comments"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Feed defines the structure of the RSS feed exported from Pocket
|
||||||
|
type Feed struct {
|
||||||
|
XMLName xml.Name `xml:"rss"`
|
||||||
|
Version string `xml:"version,attr"`
|
||||||
|
// Required
|
||||||
|
Title string `xml:"channel>title"`
|
||||||
|
Link string `xml:"channel>link"`
|
||||||
|
Description string `xml:"channel>description"`
|
||||||
|
// Optional
|
||||||
|
PubDate string `xml:"channel>pubDate"`
|
||||||
|
BookmarkList []Bookmark `xml:"channel>item"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// getDB opens a DB object and returns a usable DB instance
|
||||||
|
func getDB(path string) (*sql.DB, error) {
|
||||||
|
var fillDB bool
|
||||||
|
_, err := os.Stat(path)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Database does not exist, creating and applying schema")
|
||||||
|
fillDB = true
|
||||||
|
} else {
|
||||||
|
fillDB = false
|
||||||
|
}
|
||||||
|
|
||||||
|
db, err := sql.Open("sqlite3", path)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Could not open database")
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if fillDB {
|
||||||
|
file, err := ioutil.ReadFile("./schema.sql")
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("database empty, but cold not read schema file")
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
requests := strings.Split(string(file), ";")
|
||||||
|
|
||||||
|
for _, request := range requests {
|
||||||
|
_, err := db.Exec(request)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Could not execute:", request)
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return db, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ingestJobExists(url string, db *sql.DB) bool {
|
||||||
|
var count int
|
||||||
|
|
||||||
|
err := db.QueryRow("SELECT count() FROM ingest where URL=?", url).Scan(&count)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Could not check ingest table for URL")
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if count > 0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func addNewURL(url string, db *sql.DB) sql.Row {
|
func bookmarkExists(url string, db *sql.DB) bool {
|
||||||
|
var count int
|
||||||
|
|
||||||
panic(errors.New("Could not add URL to database"))
|
err := db.QueryRow("SELECT count() FROM bookmarks where URL=?", url).Scan(&count)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Could not check database for url")
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if count > 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func ingestURL(url string, db *sql.DB) sql.Result {
|
||||||
|
if ingestJobExists(url, db) {
|
||||||
|
fmt.Println("URL exists in ingest queue")
|
||||||
|
row, err := db.Exec("SELECT * FROM ingest WHERE URL=?", url)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Could not get job from ingest queue")
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return row
|
||||||
|
}
|
||||||
|
|
||||||
|
row, err := db.Exec("INSERT INTO ingest(url) VALUES (?)", url)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Could not execute insert query")
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return row
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -42,29 +143,6 @@ func main() {
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
body, err := ioutil.ReadAll(resp.Body)
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
|
||||||
// Make up some data structures into which we can put our feed.
|
|
||||||
type Bookmark struct {
|
|
||||||
// Required
|
|
||||||
Title string `xml:"title"`
|
|
||||||
Link string `xml:"link"`
|
|
||||||
GUID string `xml:"guid"`
|
|
||||||
// Optional
|
|
||||||
PubDate string `xml:"pubDate"`
|
|
||||||
Comments string `xml:"comments"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Feed struct {
|
|
||||||
XMLName xml.Name `xml:"rss"`
|
|
||||||
Version string `xml:"version,attr"`
|
|
||||||
// Required
|
|
||||||
Title string `xml:"channel>title"`
|
|
||||||
Link string `xml:"channel>link"`
|
|
||||||
Description string `xml:"channel>description"`
|
|
||||||
// Optional
|
|
||||||
PubDate string `xml:"channel>pubDate"`
|
|
||||||
BookmarkList []Bookmark `xml:"channel>item"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse the feed
|
// Parse the feed
|
||||||
f := Feed{}
|
f := Feed{}
|
||||||
err = xml.Unmarshal(body, &f)
|
err = xml.Unmarshal(body, &f)
|
||||||
|
@ -73,17 +151,17 @@ func main() {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Put data in sqlite3
|
db, err := getDB("./bookmarks.db")
|
||||||
db, err := sql.Open("sqlite3", "./bookmarks.sqlite3")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("Could not open database")
|
fmt.Println("Could not open or create db")
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
defer db.Close()
|
defer db.Close()
|
||||||
|
|
||||||
for _, bookmark := range f.BookmarkList {
|
for _, bookmark := range f.BookmarkList {
|
||||||
if checkURL(bookmark.GUID, db) == true {
|
if bookmarkExists(bookmark.GUID, db) == false {
|
||||||
fmt.Printf("New bookmark url %s\n", bookmark.GUID)
|
fmt.Printf("New bookmark url %s\n", bookmark.GUID)
|
||||||
|
ingestURL(bookmark.GUID, db)
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("Already know about %s\n", bookmark.GUID)
|
fmt.Printf("Already know about %s\n", bookmark.GUID)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
CREATE TABLE bookmarks (url text not null primary key);
|
CREATE TABLE bookmarks (url text not null primary key);
|
||||||
|
CREATE TABLE ingest (url text not null primary key);
|
||||||
|
|
Loading…
Reference in a new issue