owl-blogs/webmention.go

132 lines
3.0 KiB
Go
Raw Normal View History

2022-08-27 21:01:14 +00:00
package owl
import (
"bytes"
"errors"
"net/http"
"strings"
2022-09-01 19:53:06 +00:00
"time"
2022-08-27 21:01:14 +00:00
"golang.org/x/net/html"
)
2022-09-04 13:03:16 +00:00
type WebmentionIn struct {
2022-09-01 19:53:06 +00:00
Source string `yaml:"source"`
Title string `yaml:"title"`
ApprovalStatus string `yaml:"approval_status"`
RetrievedAt time.Time `yaml:"retrieved_at"`
2022-09-01 19:34:33 +00:00
}
2022-09-04 13:03:16 +00:00
type WebmentionOut struct {
Target string `yaml:"target"`
Supported bool `yaml:"supported"`
ScannedAt time.Time `yaml:"scanned_at"`
LastSentAt time.Time `yaml:"last_sent_at"`
}
2022-08-27 21:01:14 +00:00
type HttpRetriever interface {
Get(url string) ([]byte, error)
}
2022-09-04 13:32:37 +00:00
type HttpParser interface {
ParseHEntry(data []byte) (ParsedHEntry, error)
2022-09-04 13:32:37 +00:00
ParseLinks(data []byte) ([]string, error)
}
2022-08-27 21:01:14 +00:00
type OwlHttpRetriever struct{}
type OwlMicroformatParser struct{}
2022-08-27 21:01:14 +00:00
type ParsedHEntry struct {
Title string
}
func (OwlHttpRetriever) Get(url string) ([]byte, error) {
2022-08-27 21:01:14 +00:00
resp, err := http.Get(url)
if err != nil {
return []byte{}, err
}
var data []byte
_, err = resp.Body.Read(data)
// TODO: encoding
return data, err
}
func collectText(n *html.Node, buf *bytes.Buffer) {
if n.Type == html.TextNode {
buf.WriteString(n.Data)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
collectText(c, buf)
}
}
func (OwlMicroformatParser) ParseHEntry(data []byte) (ParsedHEntry, error) {
2022-08-27 21:01:14 +00:00
doc, err := html.Parse(strings.NewReader(string(data)))
if err != nil {
return ParsedHEntry{}, err
}
var interpretHFeed func(*html.Node, *ParsedHEntry, bool) (ParsedHEntry, error)
interpretHFeed = func(n *html.Node, curr *ParsedHEntry, parent bool) (ParsedHEntry, error) {
attrs := n.Attr
for _, attr := range attrs {
if attr.Key == "class" && strings.Contains(attr.Val, "p-name") {
buf := &bytes.Buffer{}
collectText(n, buf)
curr.Title = buf.String()
return *curr, nil
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
interpretHFeed(c, curr, false)
}
return *curr, nil
}
var findHFeed func(*html.Node) (ParsedHEntry, error)
findHFeed = func(n *html.Node) (ParsedHEntry, error) {
attrs := n.Attr
for _, attr := range attrs {
if attr.Key == "class" && strings.Contains(attr.Val, "h-entry") {
return interpretHFeed(n, &ParsedHEntry{}, true)
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
entry, err := findHFeed(c)
if err == nil {
return entry, nil
}
}
return ParsedHEntry{}, errors.New("no h-entry found")
}
return findHFeed(doc)
}
2022-09-04 13:32:37 +00:00
func (OwlMicroformatParser) ParseLinks(data []byte) ([]string, error) {
doc, err := html.Parse(strings.NewReader(string(data)))
if err != nil {
return make([]string, 0), err
}
var findLinks func(*html.Node) ([]string, error)
findLinks = func(n *html.Node) ([]string, error) {
links := make([]string, 0)
if n.Type == html.ElementNode && n.Data == "a" {
for _, attr := range n.Attr {
if attr.Key == "href" {
links = append(links, attr.Val)
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
childLinks, _ := findLinks(c)
links = append(links, childLinks...)
}
return links, nil
}
return findLinks(doc)
}