Revert "Revert "pushing stuff to modular structure""
This reverts commit 9e2b61bded
.
This commit is contained in:
parent
86e32807c0
commit
59a932292c
5 changed files with 300 additions and 0 deletions
|
@ -0,0 +1,21 @@
|
|||
package bookmark
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
func bookmarkExists(url string, db *sql.DB) bool {
|
||||
var count int
|
||||
|
||||
err := db.QueryRow("SELECT count() FROM bookmarks where URL=?", url).Scan(&count)
|
||||
if err != nil {
|
||||
fmt.Println("Could not check database for url")
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
65
cmd/cmd.go
Normal file
65
cmd/cmd.go
Normal file
|
@ -0,0 +1,65 @@
|
|||
package cmd
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"gitlab.amd.im/pocketarchive/bookmark"
|
||||
"gitlab.amd.im/pocketarchive/ingest"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func PullPocket(db *sql.DB) {
|
||||
fmt.Println("Getting archive data from Pocket...")
|
||||
|
||||
// Pull data from RSS feed.
|
||||
archiveURL := "https://getpocket.com/users/amdavidson/feed/read"
|
||||
|
||||
resp, err := http.Get(archiveURL)
|
||||
if err != nil {
|
||||
fmt.Println("Could not get archived urls")
|
||||
panic(err)
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
|
||||
// Parse the feed
|
||||
f := Feed{}
|
||||
err = xml.Unmarshal(body, &f)
|
||||
if err != nil {
|
||||
fmt.Println("Could not parse feed")
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for _, bookmark := range f.BookmarkList {
|
||||
if bookmark.BookmarkExists(bookmark.GUID, db) == false {
|
||||
fmt.Printf("New bookmark url %s\n", bookmark.GUID)
|
||||
ingest.CreateIngestJob(bookmark.GUID, db)
|
||||
} else {
|
||||
fmt.Printf("Already know about %s\n", bookmark.GUID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func RunIngest (db *sql.DB) {
|
||||
URLs, err := db.Query("SELECT URL FROM ingest")
|
||||
if err != nil {
|
||||
fmt.Println("Could not get ingest URL list")
|
||||
panic(err)
|
||||
}
|
||||
defer URLs.Close()
|
||||
|
||||
var URL string
|
||||
for URLs.Next() {
|
||||
err := URLs.Scan(&URL)
|
||||
if err != nil {
|
||||
fmt.Println("Could not parse ingest URL record")
|
||||
panic(err)
|
||||
}
|
||||
ingest.IngestURL(URL, db)
|
||||
}
|
||||
err = URLs.Err()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
117
ingest/ingest.go
117
ingest/ingest.go
|
@ -0,0 +1,117 @@
|
|||
package ingest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/mauidude/go-readability"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func ingestJobExists(url string, db *sql.DB) bool {
|
||||
var count int
|
||||
|
||||
err := db.QueryRow("SELECT count() FROM ingest where URL=?", url).Scan(&count)
|
||||
if err != nil {
|
||||
fmt.Println("Could not check ingest table for URL")
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// createIngestJob puts the url into a table to queue for ingesting into the bookmark table.
|
||||
func createIngestJob(url string, db *sql.DB) sql.Result {
|
||||
if ingestJobExists(url, db) {
|
||||
fmt.Println("URL exists in ingest queue")
|
||||
row, err := db.Exec("SELECT * FROM ingest WHERE URL=?", url)
|
||||
if err != nil {
|
||||
fmt.Println("Could not get job from ingest queue")
|
||||
panic(err)
|
||||
}
|
||||
return row
|
||||
}
|
||||
|
||||
row, err := db.Exec("INSERT INTO ingest(url) VALUES (?)", url)
|
||||
if err != nil {
|
||||
fmt.Println("Could not execute insert query")
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return row
|
||||
}
|
||||
|
||||
// Ingests a URL into the bookmarks table
|
||||
func ingestURL(url string, db *sql.DB) {
|
||||
fmt.Println("Ingesting:", url)
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
fmt.Println("Could not access bookmarked url:", url)
|
||||
panic(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
html, err := ioutil.ReadAll(resp.Body)
|
||||
|
||||
doc, err := readability.NewDocument(string(html))
|
||||
if err != nil {
|
||||
fmt.Println("Could not parse site data for:", url)
|
||||
}
|
||||
fmt.Println("Content:", doc.Content())
|
||||
}
|
||||
func ingestJobExists(url string, db *sql.DB) bool {
|
||||
var count int
|
||||
|
||||
err := db.QueryRow("SELECT count() FROM ingest where URL=?", url).Scan(&count)
|
||||
if err != nil {
|
||||
fmt.Println("Could not check ingest table for URL")
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// createIngestJob puts the url into a table to queue for ingesting into the bookmark table.
|
||||
func createIngestJob(url string, db *sql.DB) sql.Result {
|
||||
if ingestJobExists(url, db) {
|
||||
fmt.Println("URL exists in ingest queue")
|
||||
row, err := db.Exec("SELECT * FROM ingest WHERE URL=?", url)
|
||||
if err != nil {
|
||||
fmt.Println("Could not get job from ingest queue")
|
||||
panic(err)
|
||||
}
|
||||
return row
|
||||
}
|
||||
|
||||
row, err := db.Exec("INSERT INTO ingest(url) VALUES (?)", url)
|
||||
if err != nil {
|
||||
fmt.Println("Could not execute insert query")
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return row
|
||||
}
|
||||
|
||||
// Ingests a URL into the bookmarks table
|
||||
func ingestURL(url string, db *sql.DB) {
|
||||
fmt.Println("Ingesting:", url)
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
fmt.Println("Could not access bookmarked url:", url)
|
||||
panic(err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
html, err := ioutil.ReadAll(resp.Body)
|
||||
|
||||
doc, err := readability.NewDocument(string(html))
|
||||
if err != nil {
|
||||
fmt.Println("Could not parse site data for:", url)
|
||||
}
|
||||
fmt.Println("Content:", doc.Content())
|
||||
}
|
50
main.go
Normal file
50
main.go
Normal file
|
@ -0,0 +1,50 @@
|
|||
package pocketarchive
|
||||
t
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gitlab.amd.im/pocketarchive/cmd"
|
||||
pasql "gitlab.amd.im/pocketarchive/sql"
|
||||
)
|
||||
|
||||
// Make up some data structures into which we can put our feed.
|
||||
|
||||
// Bookmark defines the fundamental structure of the items to be archived.
|
||||
type Bookmark struct {
|
||||
// Required
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
GUID string `xml:"guid"`
|
||||
// Optional
|
||||
PubDate string `xml:"pubDate"`
|
||||
Comments string `xml:"comments"`
|
||||
}
|
||||
|
||||
// Feed defines the structure of the RSS feed exported from Pocket
|
||||
type Feed struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
// Required
|
||||
Title string `xml:"channel>title"`
|
||||
Link string `xml:"channel>link"`
|
||||
Description string `xml:"channel>description"`
|
||||
// Optional
|
||||
PubDate string `xml:"channel>pubDate"`
|
||||
BookmarkList []Bookmark `xml:"channel>item"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
fmt.Println("Launching Pocket Archive...")
|
||||
|
||||
db, err = pasql.GetDB("./bookmark.db")
|
||||
if err != nil {
|
||||
fmt.Println("Could not open database")
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
cmd.PullPocket(db)
|
||||
|
||||
cmd.RunIngest(db)
|
||||
|
||||
fmt.Println("Pocket Archive exiting.")
|
||||
}
|
47
sql/sql.go
47
sql/sql.go
|
@ -0,0 +1,47 @@
|
|||
package sql
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// getDB opens a DB object and returns a usable DB instance
|
||||
func GetDB(path string) (*sql.DB, error) {
|
||||
var fillDB bool
|
||||
_, err := os.Stat(path)
|
||||
if err != nil {
|
||||
fmt.Println("Database does not exist, creating and applying schema")
|
||||
fillDB = true
|
||||
} else {
|
||||
fillDB = false
|
||||
}
|
||||
|
||||
db, err := sql.Open("sqlite3", path)
|
||||
if err != nil {
|
||||
fmt.Println("Could not open database")
|
||||
panic(err)
|
||||
}
|
||||
|
||||
if fillDB {
|
||||
file, err := ioutil.ReadFile("./schema.sql")
|
||||
if err != nil {
|
||||
fmt.Println("database empty, but cold not read schema file")
|
||||
panic(err)
|
||||
}
|
||||
requests := strings.Split(string(file), ";")
|
||||
|
||||
for _, request := range requests {
|
||||
_, err := db.Exec(request)
|
||||
if err != nil {
|
||||
fmt.Println("Could not execute:", request)
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return db, nil
|
||||
}
|
Loading…
Reference in a new issue