From 805b37ed5ab077ee0c82df0bcf7dcb28412d31ca Mon Sep 17 00:00:00 2001 From: Andrew Davidson Date: Fri, 5 Jan 2018 12:43:36 -0500 Subject: [PATCH] much simpler reorganization --- bookmark.go | 22 ++++++ cmd.go | 42 ++++++++++++ db.go | 47 +++++++++++++ ingest.go | 42 ++++++++++++ main.go | 48 +++++++++++++ pocketarchive.go | 172 ----------------------------------------------- 6 files changed, 201 insertions(+), 172 deletions(-) create mode 100644 bookmark.go create mode 100644 cmd.go create mode 100644 db.go create mode 100644 ingest.go create mode 100644 main.go delete mode 100644 pocketarchive.go diff --git a/bookmark.go b/bookmark.go new file mode 100644 index 0000000..ed0a47f --- /dev/null +++ b/bookmark.go @@ -0,0 +1,22 @@ +package main + +import ( + "database/sql" + "fmt" +) + +func bookmarkExists(url string, db *sql.DB) bool { + var count int + + err := db.QueryRow("SELECT count() FROM bookmarks where URL=?", url).Scan(&count) + if err != nil { + fmt.Println("Could not check database for url") + panic(err) + } + + if count > 0 { + return true + } + + return false +} diff --git a/cmd.go b/cmd.go new file mode 100644 index 0000000..cfae017 --- /dev/null +++ b/cmd.go @@ -0,0 +1,42 @@ +package main + +import ( + "database/sql" + "encoding/xml" + "fmt" + "io/ioutil" + "net/http" +) + +func pullPocket(db *sql.DB) { + fmt.Println("Getting archive data from Pocket...") + + // Pull data from RSS feed. + archiveURL := "https://getpocket.com/users/amdavidson/feed/read" + + resp, err := http.Get(archiveURL) + if err != nil { + fmt.Println("Could not get archived urls") + panic(err) + } + + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + + // Parse the feed + f := Feed{} + err = xml.Unmarshal(body, &f) + if err != nil { + fmt.Println("Could not parse feed") + panic(err) + } + + for _, bookmark := range f.BookmarkList { + if bookmarkExists(bookmark.GUID, db) == false { + fmt.Printf("New bookmark url %s\n", bookmark.GUID) + ingestURL(bookmark.GUID, db) + } else { + fmt.Printf("Already know about %s\n", bookmark.GUID) + } + } +} diff --git a/db.go b/db.go new file mode 100644 index 0000000..07ecb49 --- /dev/null +++ b/db.go @@ -0,0 +1,47 @@ +package main + +import ( + "database/sql" + "fmt" + _ "github.com/mattn/go-sqlite3" + "io/ioutil" + "os" + "strings" +) + +// getDB opens a DB object and returns a usable DB instance +func getDB(path string) (*sql.DB, error) { + var fillDB bool + _, err := os.Stat(path) + if err != nil { + fmt.Println("Database does not exist, creating and applying schema") + fillDB = true + } else { + fillDB = false + } + + db, err := sql.Open("sqlite3", path) + if err != nil { + fmt.Println("Could not open database") + panic(err) + } + + if fillDB { + file, err := ioutil.ReadFile("./schema.sql") + if err != nil { + fmt.Println("database empty, but cold not read schema file") + panic(err) + } + requests := strings.Split(string(file), ";") + + for _, request := range requests { + _, err := db.Exec(request) + if err != nil { + fmt.Println("Could not execute:", request) + panic(err) + } + } + } + + return db, nil +} diff --git a/ingest.go b/ingest.go new file mode 100644 index 0000000..7329d54 --- /dev/null +++ b/ingest.go @@ -0,0 +1,42 @@ +package main + +import ( + "database/sql" + "fmt" +) + +func ingestJobExists(url string, db *sql.DB) bool { + var count int + + err := db.QueryRow("SELECT count() FROM ingest where URL=?", url).Scan(&count) + if err != nil { + fmt.Println("Could not check ingest table for URL") + panic(err) + } + + if count > 0 { + return true + } + + return false +} + +func ingestURL(url string, db *sql.DB) sql.Result { + if ingestJobExists(url, db) { + fmt.Println("URL exists in ingest queue") + row, err := db.Exec("SELECT * FROM ingest WHERE URL=?", url) + if err != nil { + fmt.Println("Could not get job from ingest queue") + panic(err) + } + return row + } + + row, err := db.Exec("INSERT INTO ingest(url) VALUES (?)", url) + if err != nil { + fmt.Println("Could not execute insert query") + panic(err) + } + + return row +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..cf27a04 --- /dev/null +++ b/main.go @@ -0,0 +1,48 @@ +package main + +import ( + "encoding/xml" + "fmt" +) + +// Make up some data structures into which we can put our feed. + +// Bookmark defines the fundamental structure of the items to be archived. +type Bookmark struct { + // Required + Title string `xml:"title"` + Link string `xml:"link"` + GUID string `xml:"guid"` + // Optional + PubDate string `xml:"pubDate"` + Comments string `xml:"comments"` +} + +// Feed defines the structure of the RSS feed exported from Pocket +type Feed struct { + XMLName xml.Name `xml:"rss"` + Version string `xml:"version,attr"` + // Required + Title string `xml:"channel>title"` + Link string `xml:"channel>link"` + Description string `xml:"channel>description"` + // Optional + PubDate string `xml:"channel>pubDate"` + BookmarkList []Bookmark `xml:"channel>item"` +} + +func main() { + fmt.Println("Launching Pocket Archive...") + + db, err := getDB("./bookmarks.db") + if err != nil { + fmt.Println("Could not open or create db") + panic(err) + } + defer db.Close() + + pullPocket(db) + + fmt.Println("Pocket Archive exiting.") + +} diff --git a/pocketarchive.go b/pocketarchive.go deleted file mode 100644 index 43d38ce..0000000 --- a/pocketarchive.go +++ /dev/null @@ -1,172 +0,0 @@ -package main - -import ( - "database/sql" - "encoding/xml" - "fmt" - _ "github.com/mattn/go-sqlite3" - "io/ioutil" - "net/http" - "os" - "strings" -) - -// Make up some data structures into which we can put our feed. - -// Bookmark defines the fundamental structure of the items to be archived. -type Bookmark struct { - // Required - Title string `xml:"title"` - Link string `xml:"link"` - GUID string `xml:"guid"` - // Optional - PubDate string `xml:"pubDate"` - Comments string `xml:"comments"` -} - -// Feed defines the structure of the RSS feed exported from Pocket -type Feed struct { - XMLName xml.Name `xml:"rss"` - Version string `xml:"version,attr"` - // Required - Title string `xml:"channel>title"` - Link string `xml:"channel>link"` - Description string `xml:"channel>description"` - // Optional - PubDate string `xml:"channel>pubDate"` - BookmarkList []Bookmark `xml:"channel>item"` -} - -// getDB opens a DB object and returns a usable DB instance -func getDB(path string) (*sql.DB, error) { - var fillDB bool - _, err := os.Stat(path) - if err != nil { - fmt.Println("Database does not exist, creating and applying schema") - fillDB = true - } else { - fillDB = false - } - - db, err := sql.Open("sqlite3", path) - if err != nil { - fmt.Println("Could not open database") - panic(err) - } - - if fillDB { - file, err := ioutil.ReadFile("./schema.sql") - if err != nil { - fmt.Println("database empty, but cold not read schema file") - panic(err) - } - requests := strings.Split(string(file), ";") - - for _, request := range requests { - _, err := db.Exec(request) - if err != nil { - fmt.Println("Could not execute:", request) - panic(err) - } - } - } - - return db, nil -} - -func ingestJobExists(url string, db *sql.DB) bool { - var count int - - err := db.QueryRow("SELECT count() FROM ingest where URL=?", url).Scan(&count) - if err != nil { - fmt.Println("Could not check ingest table for URL") - panic(err) - } - - if count > 0 { - return true - } - - return false -} - -func bookmarkExists(url string, db *sql.DB) bool { - var count int - - err := db.QueryRow("SELECT count() FROM bookmarks where URL=?", url).Scan(&count) - if err != nil { - fmt.Println("Could not check database for url") - panic(err) - } - - if count > 0 { - return true - } - - return false -} - -func ingestURL(url string, db *sql.DB) sql.Result { - if ingestJobExists(url, db) { - fmt.Println("URL exists in ingest queue") - row, err := db.Exec("SELECT * FROM ingest WHERE URL=?", url) - if err != nil { - fmt.Println("Could not get job from ingest queue") - panic(err) - } - return row - } - - row, err := db.Exec("INSERT INTO ingest(url) VALUES (?)", url) - if err != nil { - fmt.Println("Could not execute insert query") - panic(err) - } - - return row -} - -func main() { - fmt.Println("Launching Pocket Archive...") - - fmt.Println("Getting archive data from Pocket...") - - // Pull data from RSS feed. - archiveURL := "https://getpocket.com/users/amdavidson/feed/read" - - resp, err := http.Get(archiveURL) - if err != nil { - fmt.Println("Could not get archived urls") - panic(err) - } - - defer resp.Body.Close() - body, err := ioutil.ReadAll(resp.Body) - - // Parse the feed - f := Feed{} - err = xml.Unmarshal(body, &f) - if err != nil { - fmt.Println("Could not parse feed") - panic(err) - } - - db, err := getDB("./bookmarks.db") - if err != nil { - fmt.Println("Could not open or create db") - panic(err) - } - defer db.Close() - - for _, bookmark := range f.BookmarkList { - if bookmarkExists(bookmark.GUID, db) == false { - fmt.Printf("New bookmark url %s\n", bookmark.GUID) - ingestURL(bookmark.GUID, db) - } else { - fmt.Printf("Already know about %s\n", bookmark.GUID) - } - } - - fmt.Println("Pocket Archive exiting.") - -}