From f899184e297e1877707d8fa8275f4866cc32dd01 Mon Sep 17 00:00:00 2001 From: Niko Abeler Date: Sun, 4 Sep 2022 15:32:37 +0200 Subject: [PATCH] Scanning for webmentions in posts --- owl_test.go | 8 +++-- post.go | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++ post_test.go | 55 ++++++++++++++++++++++++++++++++-- renderer.go | 7 ++++- repository.go | 2 +- webmention.go | 29 +++++++++++++++++- 6 files changed, 176 insertions(+), 8 deletions(-) diff --git a/owl_test.go b/owl_test.go index 5d3b5f4..dcb1b7f 100644 --- a/owl_test.go +++ b/owl_test.go @@ -6,12 +6,16 @@ import ( "time" ) -type MockMicroformatParser struct{} +type MockHttpParser struct{} -func (*MockMicroformatParser) ParseHEntry(data []byte) (owl.ParsedHEntry, error) { +func (*MockHttpParser) ParseHEntry(data []byte) (owl.ParsedHEntry, error) { return owl.ParsedHEntry{Title: "Mock Title"}, nil } +func (*MockHttpParser) ParseLinks(data []byte) ([]string, error) { + return []string{"http://example.com"}, nil +} + type MockHttpRetriever struct{} func (*MockHttpRetriever) Get(url string) ([]byte, error) { diff --git a/post.go b/post.go index 5722c00..279719c 100644 --- a/post.go +++ b/post.go @@ -44,6 +44,10 @@ func (post Post) Dir() string { return path.Join(post.user.Dir(), "public", post.id) } +func (post Post) StatusFile() string { + return path.Join(post.Dir(), "status.yml") +} + func (post Post) MediaDir() string { return path.Join(post.Dir(), "media") } @@ -85,6 +89,42 @@ func (post Post) Content() []byte { return data } +func (post Post) Status() PostStatus { + // read status file + // return parsed webmentions + fileName := post.StatusFile() + if !fileExists(fileName) { + return PostStatus{} + } + + data, err := os.ReadFile(fileName) + if err != nil { + return PostStatus{} + } + + status := PostStatus{} + err = yaml.Unmarshal(data, &status) + if err != nil { + return PostStatus{} + } + + return status +} + +func (post Post) PersistStatus(status PostStatus) error { + data, err := yaml.Marshal(status) + if err != nil { + return err + } + + err = os.WriteFile(post.StatusFile(), data, 0644) + if err != nil { + return err + } + + return nil +} + func (post Post) RenderedContent() bytes.Buffer { data := post.Content() @@ -211,6 +251,27 @@ func (post *Post) AddWebmention(source string) error { return nil } +func (post *Post) AddOutgoingWebmention(target string) error { + status := post.Status() + + // Check if file already exists + _, err := post.Webmention(target) + if err != nil { + webmention := WebmentionOut{ + Target: target, + } + // if target is not in status, add it + for _, t := range status.Webmentions { + if t.Target == webmention.Target { + return nil + } + } + status.Webmentions = append(status.Webmentions, webmention) + } + + return post.PersistStatus(status) +} + func (post *Post) EnrichWebmention(source string) error { html, err := post.user.repo.Retriever.Get(source) if err == nil { @@ -263,3 +324,25 @@ func (post *Post) ApprovedWebmentions() []WebmentionIn { }) return approved } + +func (post *Post) OutgoingWebmentions() []WebmentionOut { + status := post.Status() + return status.Webmentions + +} + +// ScanForLinks scans the post content for links and adds them to the +// `status.yml` file for the post. The links are not scanned by this function. +func (post *Post) ScanForLinks() error { + // this could be done in markdown parsing, but I don't want to + // rely on goldmark for this (yet) + postHtml, err := renderPostContent(post) + if err != nil { + return err + } + links, _ := post.user.repo.Parser.ParseLinks([]byte(postHtml)) + for _, link := range links { + post.AddOutgoingWebmention(link) + } + return nil +} diff --git a/post_test.go b/post_test.go index 698c956..29e90ea 100644 --- a/post_test.go +++ b/post_test.go @@ -193,7 +193,7 @@ func TestPersistWebmention(t *testing.T) { func TestAddWebmentionCreatesFile(t *testing.T) { repo := getTestRepo() repo.Retriever = &MockHttpRetriever{} - repo.Parser = &MockMicroformatParser{} + repo.Parser = &MockHttpParser{} user, _ := repo.CreateUser("testuser") post, _ := user.CreateNewPost("testpost") @@ -211,7 +211,7 @@ func TestAddWebmentionCreatesFile(t *testing.T) { func TestAddWebmentionNotOverwritingFile(t *testing.T) { repo := getTestRepo() repo.Retriever = &MockHttpRetriever{} - repo.Parser = &MockMicroformatParser{} + repo.Parser = &MockHttpParser{} user, _ := repo.CreateUser("testuser") post, _ := user.CreateNewPost("testpost") @@ -241,7 +241,7 @@ func TestAddWebmentionNotOverwritingFile(t *testing.T) { func TestAddWebmentionAddsParsedTitle(t *testing.T) { repo := getTestRepo() repo.Retriever = &MockHttpRetriever{} - repo.Parser = &MockMicroformatParser{} + repo.Parser = &MockHttpParser{} user, _ := repo.CreateUser("testuser") post, _ := user.CreateNewPost("testpost") @@ -303,3 +303,52 @@ func TestApprovedWebmentions(t *testing.T) { } } + +func TestScanningForLinks(t *testing.T) { + repo := getTestRepo() + user, _ := repo.CreateUser("testuser") + post, _ := user.CreateNewPost("testpost") + + content := "---\n" + content += "title: test\n" + content += "date: Wed, 17 Aug 2022 10:50:02 +0000\n" + content += "---\n" + content += "\n" + content += "[Hello](https://example.com/hello)\n" + os.WriteFile(post.ContentFile(), []byte(content), 0644) + + post.ScanForLinks() + webmentions := post.OutgoingWebmentions() + if len(webmentions) != 1 { + t.Errorf("Expected 1 webmention, got %d", len(webmentions)) + } + if webmentions[0].Target != "https://example.com/hello" { + t.Errorf("Expected target: %s, got %s", "https://example.com/hello", webmentions[0].Target) + } +} + +func TestScanningForLinksDoesNotAddDuplicates(t *testing.T) { + repo := getTestRepo() + user, _ := repo.CreateUser("testuser") + post, _ := user.CreateNewPost("testpost") + + content := "---\n" + content += "title: test\n" + content += "date: Wed, 17 Aug 2022 10:50:02 +0000\n" + content += "---\n" + content += "\n" + content += "[Hello](https://example.com/hello)\n" + content += "[Hello](https://example.com/hello)\n" + os.WriteFile(post.ContentFile(), []byte(content), 0644) + + post.ScanForLinks() + post.ScanForLinks() + post.ScanForLinks() + webmentions := post.OutgoingWebmentions() + if len(webmentions) != 1 { + t.Errorf("Expected 1 webmention, got %d", len(webmentions)) + } + if webmentions[0].Target != "https://example.com/hello" { + t.Errorf("Expected target: %s, got %s", "https://example.com/hello", webmentions[0].Target) + } +} diff --git a/renderer.go b/renderer.go index 74034a6..a1b7643 100644 --- a/renderer.go +++ b/renderer.go @@ -68,13 +68,18 @@ func renderIntoBaseTemplate(user User, data PageContent) (string, error) { return html.String(), nil } -func RenderPost(post *Post) (string, error) { +func renderPostContent(post *Post) (string, error) { buf := post.RenderedContent() postHtml, err := renderEmbedTemplate("embed/post.html", PostRenderData{ Title: post.Title(), Post: post, Content: template.HTML(buf.String()), }) + return postHtml, err +} + +func RenderPost(post *Post) (string, error) { + postHtml, err := renderPostContent(post) if err != nil { return "", err } diff --git a/repository.go b/repository.go index c3c8c66..c5f8a1c 100644 --- a/repository.go +++ b/repository.go @@ -21,7 +21,7 @@ type Repository struct { active_user string allow_raw_html bool Retriever HttpRetriever - Parser MicroformatParser + Parser HttpParser } type RepoConfig struct { diff --git a/webmention.go b/webmention.go index f8ca0b1..ab02624 100644 --- a/webmention.go +++ b/webmention.go @@ -28,8 +28,9 @@ type HttpRetriever interface { Get(url string) ([]byte, error) } -type MicroformatParser interface { +type HttpParser interface { ParseHEntry(data []byte) (ParsedHEntry, error) + ParseLinks(data []byte) ([]string, error) } type OwlHttpRetriever struct{} @@ -102,3 +103,29 @@ func (OwlMicroformatParser) ParseHEntry(data []byte) (ParsedHEntry, error) { } return findHFeed(doc) } + +func (OwlMicroformatParser) ParseLinks(data []byte) ([]string, error) { + doc, err := html.Parse(strings.NewReader(string(data))) + if err != nil { + return make([]string, 0), err + } + + var findLinks func(*html.Node) ([]string, error) + findLinks = func(n *html.Node) ([]string, error) { + links := make([]string, 0) + if n.Type == html.ElementNode && n.Data == "a" { + for _, attr := range n.Attr { + if attr.Key == "href" { + links = append(links, attr.Val) + } + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + childLinks, _ := findLinks(c) + links = append(links, childLinks...) + } + return links, nil + } + return findLinks(doc) + +}