Scanning for webmentions in posts

This commit is contained in:
Niko Abeler 2022-09-04 15:32:37 +02:00
parent da197c7e4d
commit f899184e29
6 changed files with 176 additions and 8 deletions

View File

@ -6,12 +6,16 @@ import (
"time" "time"
) )
type MockMicroformatParser struct{} type MockHttpParser struct{}
func (*MockMicroformatParser) ParseHEntry(data []byte) (owl.ParsedHEntry, error) { func (*MockHttpParser) ParseHEntry(data []byte) (owl.ParsedHEntry, error) {
return owl.ParsedHEntry{Title: "Mock Title"}, nil return owl.ParsedHEntry{Title: "Mock Title"}, nil
} }
func (*MockHttpParser) ParseLinks(data []byte) ([]string, error) {
return []string{"http://example.com"}, nil
}
type MockHttpRetriever struct{} type MockHttpRetriever struct{}
func (*MockHttpRetriever) Get(url string) ([]byte, error) { func (*MockHttpRetriever) Get(url string) ([]byte, error) {

83
post.go
View File

@ -44,6 +44,10 @@ func (post Post) Dir() string {
return path.Join(post.user.Dir(), "public", post.id) return path.Join(post.user.Dir(), "public", post.id)
} }
func (post Post) StatusFile() string {
return path.Join(post.Dir(), "status.yml")
}
func (post Post) MediaDir() string { func (post Post) MediaDir() string {
return path.Join(post.Dir(), "media") return path.Join(post.Dir(), "media")
} }
@ -85,6 +89,42 @@ func (post Post) Content() []byte {
return data return data
} }
func (post Post) Status() PostStatus {
// read status file
// return parsed webmentions
fileName := post.StatusFile()
if !fileExists(fileName) {
return PostStatus{}
}
data, err := os.ReadFile(fileName)
if err != nil {
return PostStatus{}
}
status := PostStatus{}
err = yaml.Unmarshal(data, &status)
if err != nil {
return PostStatus{}
}
return status
}
func (post Post) PersistStatus(status PostStatus) error {
data, err := yaml.Marshal(status)
if err != nil {
return err
}
err = os.WriteFile(post.StatusFile(), data, 0644)
if err != nil {
return err
}
return nil
}
func (post Post) RenderedContent() bytes.Buffer { func (post Post) RenderedContent() bytes.Buffer {
data := post.Content() data := post.Content()
@ -211,6 +251,27 @@ func (post *Post) AddWebmention(source string) error {
return nil return nil
} }
func (post *Post) AddOutgoingWebmention(target string) error {
status := post.Status()
// Check if file already exists
_, err := post.Webmention(target)
if err != nil {
webmention := WebmentionOut{
Target: target,
}
// if target is not in status, add it
for _, t := range status.Webmentions {
if t.Target == webmention.Target {
return nil
}
}
status.Webmentions = append(status.Webmentions, webmention)
}
return post.PersistStatus(status)
}
func (post *Post) EnrichWebmention(source string) error { func (post *Post) EnrichWebmention(source string) error {
html, err := post.user.repo.Retriever.Get(source) html, err := post.user.repo.Retriever.Get(source)
if err == nil { if err == nil {
@ -263,3 +324,25 @@ func (post *Post) ApprovedWebmentions() []WebmentionIn {
}) })
return approved return approved
} }
func (post *Post) OutgoingWebmentions() []WebmentionOut {
status := post.Status()
return status.Webmentions
}
// ScanForLinks scans the post content for links and adds them to the
// `status.yml` file for the post. The links are not scanned by this function.
func (post *Post) ScanForLinks() error {
// this could be done in markdown parsing, but I don't want to
// rely on goldmark for this (yet)
postHtml, err := renderPostContent(post)
if err != nil {
return err
}
links, _ := post.user.repo.Parser.ParseLinks([]byte(postHtml))
for _, link := range links {
post.AddOutgoingWebmention(link)
}
return nil
}

View File

@ -193,7 +193,7 @@ func TestPersistWebmention(t *testing.T) {
func TestAddWebmentionCreatesFile(t *testing.T) { func TestAddWebmentionCreatesFile(t *testing.T) {
repo := getTestRepo() repo := getTestRepo()
repo.Retriever = &MockHttpRetriever{} repo.Retriever = &MockHttpRetriever{}
repo.Parser = &MockMicroformatParser{} repo.Parser = &MockHttpParser{}
user, _ := repo.CreateUser("testuser") user, _ := repo.CreateUser("testuser")
post, _ := user.CreateNewPost("testpost") post, _ := user.CreateNewPost("testpost")
@ -211,7 +211,7 @@ func TestAddWebmentionCreatesFile(t *testing.T) {
func TestAddWebmentionNotOverwritingFile(t *testing.T) { func TestAddWebmentionNotOverwritingFile(t *testing.T) {
repo := getTestRepo() repo := getTestRepo()
repo.Retriever = &MockHttpRetriever{} repo.Retriever = &MockHttpRetriever{}
repo.Parser = &MockMicroformatParser{} repo.Parser = &MockHttpParser{}
user, _ := repo.CreateUser("testuser") user, _ := repo.CreateUser("testuser")
post, _ := user.CreateNewPost("testpost") post, _ := user.CreateNewPost("testpost")
@ -241,7 +241,7 @@ func TestAddWebmentionNotOverwritingFile(t *testing.T) {
func TestAddWebmentionAddsParsedTitle(t *testing.T) { func TestAddWebmentionAddsParsedTitle(t *testing.T) {
repo := getTestRepo() repo := getTestRepo()
repo.Retriever = &MockHttpRetriever{} repo.Retriever = &MockHttpRetriever{}
repo.Parser = &MockMicroformatParser{} repo.Parser = &MockHttpParser{}
user, _ := repo.CreateUser("testuser") user, _ := repo.CreateUser("testuser")
post, _ := user.CreateNewPost("testpost") post, _ := user.CreateNewPost("testpost")
@ -303,3 +303,52 @@ func TestApprovedWebmentions(t *testing.T) {
} }
} }
func TestScanningForLinks(t *testing.T) {
repo := getTestRepo()
user, _ := repo.CreateUser("testuser")
post, _ := user.CreateNewPost("testpost")
content := "---\n"
content += "title: test\n"
content += "date: Wed, 17 Aug 2022 10:50:02 +0000\n"
content += "---\n"
content += "\n"
content += "[Hello](https://example.com/hello)\n"
os.WriteFile(post.ContentFile(), []byte(content), 0644)
post.ScanForLinks()
webmentions := post.OutgoingWebmentions()
if len(webmentions) != 1 {
t.Errorf("Expected 1 webmention, got %d", len(webmentions))
}
if webmentions[0].Target != "https://example.com/hello" {
t.Errorf("Expected target: %s, got %s", "https://example.com/hello", webmentions[0].Target)
}
}
func TestScanningForLinksDoesNotAddDuplicates(t *testing.T) {
repo := getTestRepo()
user, _ := repo.CreateUser("testuser")
post, _ := user.CreateNewPost("testpost")
content := "---\n"
content += "title: test\n"
content += "date: Wed, 17 Aug 2022 10:50:02 +0000\n"
content += "---\n"
content += "\n"
content += "[Hello](https://example.com/hello)\n"
content += "[Hello](https://example.com/hello)\n"
os.WriteFile(post.ContentFile(), []byte(content), 0644)
post.ScanForLinks()
post.ScanForLinks()
post.ScanForLinks()
webmentions := post.OutgoingWebmentions()
if len(webmentions) != 1 {
t.Errorf("Expected 1 webmention, got %d", len(webmentions))
}
if webmentions[0].Target != "https://example.com/hello" {
t.Errorf("Expected target: %s, got %s", "https://example.com/hello", webmentions[0].Target)
}
}

View File

@ -68,13 +68,18 @@ func renderIntoBaseTemplate(user User, data PageContent) (string, error) {
return html.String(), nil return html.String(), nil
} }
func RenderPost(post *Post) (string, error) { func renderPostContent(post *Post) (string, error) {
buf := post.RenderedContent() buf := post.RenderedContent()
postHtml, err := renderEmbedTemplate("embed/post.html", PostRenderData{ postHtml, err := renderEmbedTemplate("embed/post.html", PostRenderData{
Title: post.Title(), Title: post.Title(),
Post: post, Post: post,
Content: template.HTML(buf.String()), Content: template.HTML(buf.String()),
}) })
return postHtml, err
}
func RenderPost(post *Post) (string, error) {
postHtml, err := renderPostContent(post)
if err != nil { if err != nil {
return "", err return "", err
} }

View File

@ -21,7 +21,7 @@ type Repository struct {
active_user string active_user string
allow_raw_html bool allow_raw_html bool
Retriever HttpRetriever Retriever HttpRetriever
Parser MicroformatParser Parser HttpParser
} }
type RepoConfig struct { type RepoConfig struct {

View File

@ -28,8 +28,9 @@ type HttpRetriever interface {
Get(url string) ([]byte, error) Get(url string) ([]byte, error)
} }
type MicroformatParser interface { type HttpParser interface {
ParseHEntry(data []byte) (ParsedHEntry, error) ParseHEntry(data []byte) (ParsedHEntry, error)
ParseLinks(data []byte) ([]string, error)
} }
type OwlHttpRetriever struct{} type OwlHttpRetriever struct{}
@ -102,3 +103,29 @@ func (OwlMicroformatParser) ParseHEntry(data []byte) (ParsedHEntry, error) {
} }
return findHFeed(doc) return findHFeed(doc)
} }
func (OwlMicroformatParser) ParseLinks(data []byte) ([]string, error) {
doc, err := html.Parse(strings.NewReader(string(data)))
if err != nil {
return make([]string, 0), err
}
var findLinks func(*html.Node) ([]string, error)
findLinks = func(n *html.Node) ([]string, error) {
links := make([]string, 0)
if n.Type == html.ElementNode && n.Data == "a" {
for _, attr := range n.Attr {
if attr.Key == "href" {
links = append(links, attr.Val)
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
childLinks, _ := findLinks(c)
links = append(links, childLinks...)
}
return links, nil
}
return findLinks(doc)
}