refactoring to have full http response in parser

This commit is contained in:
Niko Abeler 2022-09-06 20:32:21 +02:00
parent 945a13ed2f
commit 0b9da1860f
6 changed files with 119 additions and 68 deletions

View File

@ -2,33 +2,44 @@ package owl_test
import ( import (
"h4kor/owl-blogs" "h4kor/owl-blogs"
"io"
"math/rand" "math/rand"
"net/http"
"net/url" "net/url"
"time" "time"
) )
type MockHttpParser struct{} type MockHtmlParser struct{}
func (*MockHttpParser) ParseHEntry(data []byte) (owl.ParsedHEntry, error) { func (*MockHtmlParser) ParseHEntry(resp *http.Response) (owl.ParsedHEntry, error) {
return owl.ParsedHEntry{Title: "Mock Title"}, nil return owl.ParsedHEntry{Title: "Mock Title"}, nil
}
func (*MockHttpParser) ParseLinks(data []byte) ([]string, error) { }
func (*MockHtmlParser) ParseLinks(resp *http.Response) ([]string, error) {
return []string{"http://example.com"}, nil return []string{"http://example.com"}, nil
}
func (*MockHttpParser) GetWebmentionEndpoint(data []byte) (string, error) { }
func (*MockHtmlParser) ParseLinksFromString(string) ([]string, error) {
return []string{"http://example.com"}, nil
}
func (*MockHtmlParser) GetWebmentionEndpoint(resp *http.Response) (string, error) {
return "http://example.com/webmention", nil return "http://example.com/webmention", nil
} }
type MockHttpRetriever struct{} type MockHttpClient struct{}
func (*MockHttpRetriever) Get(url string) ([]byte, error) { func (*MockHttpClient) Get(url string) (resp *http.Response, err error) {
return []byte(""), nil return &http.Response{}, nil
} }
func (*MockHttpClient) Post(url, contentType string, body io.Reader) (resp *http.Response, err error) {
func (m *MockHttpRetriever) Post(url string, data url.Values) ([]byte, error) { return &http.Response{}, nil
return []byte(""), nil }
func (*MockHttpClient) PostForm(url string, data url.Values) (resp *http.Response, err error) {
return &http.Response{}, nil
} }
func randomName() string { func randomName() string {

12
post.go
View File

@ -295,13 +295,13 @@ func (post *Post) UpdateOutgoingWebmention(webmention *WebmentionOut) error {
} }
func (post *Post) EnrichWebmention(source string) error { func (post *Post) EnrichWebmention(source string) error {
html, err := post.user.repo.HttpClient.Get(source) resp, err := post.user.repo.HttpClient.Get(source)
if err == nil { if err == nil {
webmention, err := post.Webmention(source) webmention, err := post.Webmention(source)
if err != nil { if err != nil {
return err return err
} }
entry, err := post.user.repo.Parser.ParseHEntry(html) entry, err := post.user.repo.Parser.ParseHEntry(resp)
if err == nil { if err == nil {
webmention.Title = entry.Title webmention.Title = entry.Title
return post.PersistWebmention(webmention) return post.PersistWebmention(webmention)
@ -359,7 +359,7 @@ func (post *Post) ScanForLinks() error {
// this could be done in markdown parsing, but I don't want to // this could be done in markdown parsing, but I don't want to
// rely on goldmark for this (yet) // rely on goldmark for this (yet)
postHtml := post.RenderedContent() postHtml := post.RenderedContent()
links, _ := post.user.repo.Parser.ParseLinks(postHtml.Bytes()) links, _ := post.user.repo.Parser.ParseLinksFromString(string(postHtml.Bytes()))
for _, link := range links { for _, link := range links {
post.AddOutgoingWebmention(link) post.AddOutgoingWebmention(link)
} }
@ -370,13 +370,13 @@ func (post *Post) SendWebmention(webmention WebmentionOut) error {
defer post.UpdateOutgoingWebmention(&webmention) defer post.UpdateOutgoingWebmention(&webmention)
webmention.ScannedAt = time.Now() webmention.ScannedAt = time.Now()
html, err := post.user.repo.HttpClient.Get(webmention.Target) resp, err := post.user.repo.HttpClient.Get(webmention.Target)
if err != nil { if err != nil {
webmention.Supported = false webmention.Supported = false
return err return err
} }
endpoint, err := post.user.repo.Parser.GetWebmentionEndpoint(html) endpoint, err := post.user.repo.Parser.GetWebmentionEndpoint(resp)
if err != nil { if err != nil {
webmention.Supported = false webmention.Supported = false
return err return err
@ -387,7 +387,7 @@ func (post *Post) SendWebmention(webmention WebmentionOut) error {
payload := url.Values{} payload := url.Values{}
payload.Set("source", post.FullUrl()) payload.Set("source", post.FullUrl())
payload.Set("target", webmention.Target) payload.Set("target", webmention.Target)
_, err = post.user.repo.HttpClient.Post(endpoint, payload) _, err = post.user.repo.HttpClient.PostForm(endpoint, payload)
if err != nil { if err != nil {
return err return err

View File

@ -190,8 +190,8 @@ func TestPersistWebmention(t *testing.T) {
func TestAddWebmentionCreatesFile(t *testing.T) { func TestAddWebmentionCreatesFile(t *testing.T) {
repo := getTestRepo(owl.RepoConfig{}) repo := getTestRepo(owl.RepoConfig{})
repo.HttpClient = &MockHttpRetriever{} repo.HttpClient = &MockHttpClient{}
repo.Parser = &MockHttpParser{} repo.Parser = &MockHtmlParser{}
user, _ := repo.CreateUser("testuser") user, _ := repo.CreateUser("testuser")
post, _ := user.CreateNewPost("testpost") post, _ := user.CreateNewPost("testpost")
@ -208,8 +208,8 @@ func TestAddWebmentionCreatesFile(t *testing.T) {
func TestAddWebmentionNotOverwritingFile(t *testing.T) { func TestAddWebmentionNotOverwritingFile(t *testing.T) {
repo := getTestRepo(owl.RepoConfig{}) repo := getTestRepo(owl.RepoConfig{})
repo.HttpClient = &MockHttpRetriever{} repo.HttpClient = &MockHttpClient{}
repo.Parser = &MockHttpParser{} repo.Parser = &MockHtmlParser{}
user, _ := repo.CreateUser("testuser") user, _ := repo.CreateUser("testuser")
post, _ := user.CreateNewPost("testpost") post, _ := user.CreateNewPost("testpost")
@ -238,8 +238,8 @@ func TestAddWebmentionNotOverwritingFile(t *testing.T) {
func TestAddWebmentionAddsParsedTitle(t *testing.T) { func TestAddWebmentionAddsParsedTitle(t *testing.T) {
repo := getTestRepo(owl.RepoConfig{}) repo := getTestRepo(owl.RepoConfig{})
repo.HttpClient = &MockHttpRetriever{} repo.HttpClient = &MockHttpClient{}
repo.Parser = &MockHttpParser{} repo.Parser = &MockHtmlParser{}
user, _ := repo.CreateUser("testuser") user, _ := repo.CreateUser("testuser")
post, _ := user.CreateNewPost("testpost") post, _ := user.CreateNewPost("testpost")
@ -353,8 +353,8 @@ func TestScanningForLinksDoesNotAddDuplicates(t *testing.T) {
func TestCanSendWebmention(t *testing.T) { func TestCanSendWebmention(t *testing.T) {
repo := getTestRepo(owl.RepoConfig{}) repo := getTestRepo(owl.RepoConfig{})
repo.HttpClient = &MockHttpRetriever{} repo.HttpClient = &MockHttpClient{}
repo.Parser = &MockHttpParser{} repo.Parser = &MockHtmlParser{}
user, _ := repo.CreateUser("testuser") user, _ := repo.CreateUser("testuser")
post, _ := user.CreateNewPost("testpost") post, _ := user.CreateNewPost("testpost")

View File

@ -28,7 +28,7 @@ type RepoConfig struct {
} }
func CreateRepository(name string, config RepoConfig) (Repository, error) { func CreateRepository(name string, config RepoConfig) (Repository, error) {
newRepo := Repository{name: name, Parser: OwlHtmlParser{}, HttpClient: OwlHttpClient{}} newRepo := Repository{name: name, Parser: OwlHtmlParser{}, HttpClient: &OwlHttpClient{}}
// check if repository already exists // check if repository already exists
if dirExists(newRepo.Dir()) { if dirExists(newRepo.Dir()) {
return Repository{}, fmt.Errorf("Repository already exists") return Repository{}, fmt.Errorf("Repository already exists")
@ -69,7 +69,7 @@ func CreateRepository(name string, config RepoConfig) (Repository, error) {
func OpenRepository(name string) (Repository, error) { func OpenRepository(name string) (Repository, error) {
repo := Repository{name: name, Parser: OwlHtmlParser{}, HttpClient: OwlHttpClient{}} repo := Repository{name: name, Parser: OwlHtmlParser{}, HttpClient: &OwlHttpClient{}}
if !dirExists(repo.Dir()) { if !dirExists(repo.Dir()) {
return Repository{}, fmt.Errorf("Repository does not exist: " + repo.Dir()) return Repository{}, fmt.Errorf("Repository does not exist: " + repo.Dir())
} }

View File

@ -3,7 +3,6 @@ package owl
import ( import (
"bytes" "bytes"
"errors" "errors"
"fmt"
"io" "io"
"net/http" "net/http"
"net/url" "net/url"
@ -28,17 +27,19 @@ type WebmentionOut struct {
} }
type HttpClient interface { type HttpClient interface {
Get(url string) ([]byte, error) Get(url string) (resp *http.Response, err error)
Post(url string, data url.Values) ([]byte, error) Post(url, contentType string, body io.Reader) (resp *http.Response, err error)
PostForm(url string, data url.Values) (resp *http.Response, err error)
} }
type HtmlParser interface { type HtmlParser interface {
ParseHEntry(data []byte) (ParsedHEntry, error) ParseHEntry(resp *http.Response) (ParsedHEntry, error)
ParseLinks(data []byte) ([]string, error) ParseLinks(resp *http.Response) ([]string, error)
GetWebmentionEndpoint(data []byte) (string, error) ParseLinksFromString(string) ([]string, error)
GetWebmentionEndpoint(resp *http.Response) (string, error)
} }
type OwlHttpClient struct{} type OwlHttpClient = http.Client
type OwlHtmlParser struct{} type OwlHtmlParser struct{}
@ -46,30 +47,8 @@ type ParsedHEntry struct {
Title string Title string
} }
func (OwlHttpClient) Get(url string) ([]byte, error) {
resp, err := http.Get(url)
if resp.StatusCode < 200 || resp.StatusCode > 299 {
return make([]byte, 0), errors.New("Failed to get url. Status code: " + fmt.Sprint(resp.StatusCode))
}
if err != nil {
return []byte{}, err
}
defer resp.Body.Close()
return io.ReadAll(resp.Body)
}
func (OwlHttpClient) Post(url string, data url.Values) ([]byte, error) {
resp, err := http.Post(url, "application/x-www-form-urlencoded", strings.NewReader(data.Encode()))
if err != nil {
return []byte{}, err
}
defer resp.Body.Close()
return io.ReadAll(resp.Body)
}
func collectText(n *html.Node, buf *bytes.Buffer) { func collectText(n *html.Node, buf *bytes.Buffer) {
if n.Type == html.TextNode { if n.Type == html.TextNode {
buf.WriteString(n.Data) buf.WriteString(n.Data)
} }
@ -78,8 +57,18 @@ func collectText(n *html.Node, buf *bytes.Buffer) {
} }
} }
func (OwlHtmlParser) ParseHEntry(data []byte) (ParsedHEntry, error) { func readResponseBody(resp *http.Response) (string, error) {
doc, err := html.Parse(strings.NewReader(string(data))) defer resp.Body.Close()
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(bodyBytes), nil
}
func (OwlHtmlParser) ParseHEntry(resp *http.Response) (ParsedHEntry, error) {
htmlStr, err := readResponseBody(resp)
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil { if err != nil {
return ParsedHEntry{}, err return ParsedHEntry{}, err
} }
@ -121,8 +110,16 @@ func (OwlHtmlParser) ParseHEntry(data []byte) (ParsedHEntry, error) {
return findHFeed(doc) return findHFeed(doc)
} }
func (OwlHtmlParser) ParseLinks(data []byte) ([]string, error) { func (OwlHtmlParser) ParseLinks(resp *http.Response) ([]string, error) {
doc, err := html.Parse(strings.NewReader(string(data))) htmlStr, err := readResponseBody(resp)
if err != nil {
return []string{}, err
}
return OwlHtmlParser{}.ParseLinksFromString(htmlStr)
}
func (OwlHtmlParser) ParseLinksFromString(htmlStr string) ([]string, error) {
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil { if err != nil {
return make([]string, 0), err return make([]string, 0), err
} }
@ -144,11 +141,11 @@ func (OwlHtmlParser) ParseLinks(data []byte) ([]string, error) {
return links, nil return links, nil
} }
return findLinks(doc) return findLinks(doc)
} }
func (OwlHtmlParser) GetWebmentionEndpoint(data []byte) (string, error) { func (OwlHtmlParser) GetWebmentionEndpoint(resp *http.Response) (string, error) {
doc, err := html.Parse(strings.NewReader(string(data))) htmlStr, err := readResponseBody(resp)
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil { if err != nil {
return "", err return "", err
} }

View File

@ -1,7 +1,10 @@
package owl_test package owl_test
import ( import (
"bytes"
"h4kor/owl-blogs" "h4kor/owl-blogs"
"io"
"net/http"
"testing" "testing"
) )
@ -12,7 +15,7 @@ import (
func TestParseValidHEntry(t *testing.T) { func TestParseValidHEntry(t *testing.T) {
html := []byte("<div class=\"h-entry\"><div class=\"p-name\">Foo</div></div>") html := []byte("<div class=\"h-entry\"><div class=\"p-name\">Foo</div></div>")
parser := &owl.OwlHtmlParser{} parser := &owl.OwlHtmlParser{}
entry, err := parser.ParseHEntry(html) entry, err := parser.ParseHEntry(&http.Response{Body: io.NopCloser(bytes.NewReader(html))})
if err != nil { if err != nil {
t.Errorf("Unable to parse feed: %v", err) t.Errorf("Unable to parse feed: %v", err)
@ -25,7 +28,7 @@ func TestParseValidHEntry(t *testing.T) {
func TestParseValidHEntryWithoutTitle(t *testing.T) { func TestParseValidHEntryWithoutTitle(t *testing.T) {
html := []byte("<div class=\"h-entry\"></div><div class=\"p-name\">Foo</div>") html := []byte("<div class=\"h-entry\"></div><div class=\"p-name\">Foo</div>")
parser := &owl.OwlHtmlParser{} parser := &owl.OwlHtmlParser{}
entry, err := parser.ParseHEntry(html) entry, err := parser.ParseHEntry(&http.Response{Body: io.NopCloser(bytes.NewReader(html))})
if err != nil { if err != nil {
t.Errorf("Unable to parse feed: %v", err) t.Errorf("Unable to parse feed: %v", err)
@ -38,7 +41,7 @@ func TestParseValidHEntryWithoutTitle(t *testing.T) {
func TestGetWebmentionEndpointLink(t *testing.T) { func TestGetWebmentionEndpointLink(t *testing.T) {
html := []byte("<link rel=\"webmention\" href=\"http://example.com/webmention\" />") html := []byte("<link rel=\"webmention\" href=\"http://example.com/webmention\" />")
parser := &owl.OwlHtmlParser{} parser := &owl.OwlHtmlParser{}
endpoint, err := parser.GetWebmentionEndpoint(html) endpoint, err := parser.GetWebmentionEndpoint(&http.Response{Body: io.NopCloser(bytes.NewReader(html))})
if err != nil { if err != nil {
t.Errorf("Unable to parse feed: %v", err) t.Errorf("Unable to parse feed: %v", err)
@ -51,7 +54,7 @@ func TestGetWebmentionEndpointLink(t *testing.T) {
func TestGetWebmentionEndpointLinkA(t *testing.T) { func TestGetWebmentionEndpointLinkA(t *testing.T) {
html := []byte("<a rel=\"webmention\" href=\"http://example.com/webmention\" />") html := []byte("<a rel=\"webmention\" href=\"http://example.com/webmention\" />")
parser := &owl.OwlHtmlParser{} parser := &owl.OwlHtmlParser{}
endpoint, err := parser.GetWebmentionEndpoint(html) endpoint, err := parser.GetWebmentionEndpoint(&http.Response{Body: io.NopCloser(bytes.NewReader(html))})
if err != nil { if err != nil {
t.Errorf("Unable to parse feed: %v", err) t.Errorf("Unable to parse feed: %v", err)
@ -60,3 +63,43 @@ func TestGetWebmentionEndpointLinkA(t *testing.T) {
t.Errorf("Wrong endpoint. Expected %v, got %v", "http://example.com/webmention", endpoint) t.Errorf("Wrong endpoint. Expected %v, got %v", "http://example.com/webmention", endpoint)
} }
} }
// func TestRealWorldWebmention(t *testing.T) {
// links := []string{
// "https://webmention.rocks/test/1",
// "https://webmention.rocks/test/2",
// "https://webmention.rocks/test/3",
// "https://webmention.rocks/test/4",
// "https://webmention.rocks/test/5",
// "https://webmention.rocks/test/6",
// "https://webmention.rocks/test/7",
// "https://webmention.rocks/test/8",
// "https://webmention.rocks/test/9",
// "https://webmention.rocks/test/10",
// "https://webmention.rocks/test/11",
// "https://webmention.rocks/test/12",
// "https://webmention.rocks/test/13",
// "https://webmention.rocks/test/14",
// "https://webmention.rocks/test/15",
// "https://webmention.rocks/test/16",
// "https://webmention.rocks/test/17",
// "https://webmention.rocks/test/18",
// "https://webmention.rocks/test/19",
// "https://webmention.rocks/test/20",
// "https://webmention.rocks/test/21",
// "https://webmention.rocks/test/22",
// "https://webmention.rocks/test/23/page",
// }
// for _, link := range links {
// parser := &owl.OwlHtmlParser{}
// client := &owl.OwlHttpClient{}
// html, _ := client.Get(link)
// _, err := parser.GetWebmentionEndpoint(html)
// if err != nil {
// t.Errorf("Unable to find webmention: %v for link %v", err, link)
// }
// }
// }