2018-01-05 17:43:36 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"database/sql"
|
|
|
|
"fmt"
|
|
|
|
)
|
|
|
|
|
2023-01-03 02:13:08 +00:00
|
|
|
func runIngest(db *sql.DB) {
|
|
|
|
URLs, err := db.Query("SELECT URL FROM ingest")
|
2018-01-05 17:43:36 +00:00
|
|
|
if err != nil {
|
2023-01-03 02:13:08 +00:00
|
|
|
fmt.Println("Could not get ingest URL list")
|
2018-01-05 17:43:36 +00:00
|
|
|
panic(err)
|
|
|
|
}
|
2023-01-03 02:13:08 +00:00
|
|
|
defer URLs.Close()
|
2018-01-05 17:43:36 +00:00
|
|
|
|
2023-01-03 02:13:08 +00:00
|
|
|
var URL string
|
|
|
|
for URLs.Next() {
|
|
|
|
err := URLs.Scan(&URL)
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println("Could not parse ingest URL record")
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
err = ingestURL(URL, db)
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println("Could not ingest url.")
|
|
|
|
fmt.Println(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
err = URLs.Err()
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
2018-01-05 17:43:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-03 02:13:08 +00:00
|
|
|
// createIngestJob puts the url into a table to queue for ingesting into the bookmark table.
|
|
|
|
func createIngestJob(url string, db *sql.DB) sql.Result {
|
2018-01-05 17:43:36 +00:00
|
|
|
if ingestJobExists(url, db) {
|
|
|
|
fmt.Println("URL exists in ingest queue")
|
|
|
|
row, err := db.Exec("SELECT * FROM ingest WHERE URL=?", url)
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println("Could not get job from ingest queue")
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
return row
|
|
|
|
}
|
|
|
|
|
|
|
|
row, err := db.Exec("INSERT INTO ingest(url) VALUES (?)", url)
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println("Could not execute insert query")
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return row
|
|
|
|
}
|
2023-01-03 02:13:08 +00:00
|
|
|
|
|
|
|
func ingestJobExists(url string, db *sql.DB) bool {
|
|
|
|
var count int
|
|
|
|
|
|
|
|
err := db.QueryRow("SELECT count() FROM ingest where URL=?", url).Scan(&count)
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println("Could not check ingest table for URL")
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if count > 0 {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ingests a URL into the bookmarks table
|
|
|
|
func ingestURL(url string, db *sql.DB) error {
|
|
|
|
fmt.Println("Ingesting:", url)
|
|
|
|
|
|
|
|
var in Bookmark
|
|
|
|
|
|
|
|
in.URL = url
|
|
|
|
|
|
|
|
m, err := getMercury(url)
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if m.Title != "" {
|
|
|
|
in.Title = m.Title
|
|
|
|
}
|
|
|
|
|
|
|
|
if m.Content != "" {
|
|
|
|
in.MercuryContent = m.Content
|
|
|
|
}
|
|
|
|
|
|
|
|
if m.DatePublished != "" {
|
|
|
|
in.PubDate = m.DatePublished
|
|
|
|
}
|
|
|
|
|
|
|
|
b, err := addBookmark(db, in)
|
|
|
|
|
|
|
|
fmt.Println("Ingested:", b.Title)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|