bookie/ingest.go
2023-01-02 18:13:08 -08:00

102 lines
1.8 KiB
Go

package main
import (
"database/sql"
"fmt"
)
func runIngest(db *sql.DB) {
URLs, err := db.Query("SELECT URL FROM ingest")
if err != nil {
fmt.Println("Could not get ingest URL list")
panic(err)
}
defer URLs.Close()
var URL string
for URLs.Next() {
err := URLs.Scan(&URL)
if err != nil {
fmt.Println("Could not parse ingest URL record")
panic(err)
}
err = ingestURL(URL, db)
if err != nil {
fmt.Println("Could not ingest url.")
fmt.Println(err)
}
}
err = URLs.Err()
if err != nil {
panic(err)
}
}
// createIngestJob puts the url into a table to queue for ingesting into the bookmark table.
func createIngestJob(url string, db *sql.DB) sql.Result {
if ingestJobExists(url, db) {
fmt.Println("URL exists in ingest queue")
row, err := db.Exec("SELECT * FROM ingest WHERE URL=?", url)
if err != nil {
fmt.Println("Could not get job from ingest queue")
panic(err)
}
return row
}
row, err := db.Exec("INSERT INTO ingest(url) VALUES (?)", url)
if err != nil {
fmt.Println("Could not execute insert query")
panic(err)
}
return row
}
func ingestJobExists(url string, db *sql.DB) bool {
var count int
err := db.QueryRow("SELECT count() FROM ingest where URL=?", url).Scan(&count)
if err != nil {
fmt.Println("Could not check ingest table for URL")
panic(err)
}
if count > 0 {
return true
}
return false
}
// Ingests a URL into the bookmarks table
func ingestURL(url string, db *sql.DB) error {
fmt.Println("Ingesting:", url)
var in Bookmark
in.URL = url
m, err := getMercury(url)
if err != nil {
fmt.Println(err)
}
if m.Title != "" {
in.Title = m.Title
}
if m.Content != "" {
in.MercuryContent = m.Content
}
if m.DatePublished != "" {
in.PubDate = m.DatePublished
}
b, err := addBookmark(db, in)
fmt.Println("Ingested:", b.Title)
return nil
}