move webmention stuff into util function (same code as v1)

This commit is contained in:
Niko Abeler 2023-08-11 15:52:14 +02:00
parent cd116b9a57
commit 08678e2697
6 changed files with 344 additions and 158 deletions

View File

@ -14,7 +14,7 @@ func setupService() *app.EntryService {
register := app.NewEntryTypeRegistry() register := app.NewEntryTypeRegistry()
register.Register(&test.MockEntry{}) register.Register(&test.MockEntry{})
repo := infra.NewEntryRepository(db, register) repo := infra.NewEntryRepository(db, register)
service := app.NewEntryService(repo, app.NewEntryCreationBus()) service := app.NewEntryService(repo, app.NewEventBus())
return service return service
} }

View File

@ -18,7 +18,7 @@ type EventBus struct {
subscribers []Subscriber subscribers []Subscriber
} }
func NewEntryCreationBus() *EventBus { func NewEventBus() *EventBus {
return &EventBus{subscribers: make([]Subscriber, 0)} return &EventBus{subscribers: make([]Subscriber, 0)}
} }

267
app/indieweb_utils.go Normal file
View File

@ -0,0 +1,267 @@
package app
import (
"bytes"
"errors"
"io"
"net/http"
"net/url"
"strings"
"golang.org/x/net/html"
)
type HtmlParser interface {
ParseHEntry(resp *http.Response) (ParsedHEntry, error)
ParseLinks(resp *http.Response) ([]string, error)
ParseLinksFromString(string) ([]string, error)
GetWebmentionEndpoint(resp *http.Response) (string, error)
GetRedirctUris(resp *http.Response) ([]string, error)
}
type ParsedHEntry struct {
Title string
}
func collectText(n *html.Node, buf *bytes.Buffer) {
if n.Type == html.TextNode {
buf.WriteString(n.Data)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
collectText(c, buf)
}
}
func readResponseBody(resp *http.Response) (string, error) {
defer resp.Body.Close()
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(bodyBytes), nil
}
func ParseHEntry(resp *http.Response) (ParsedHEntry, error) {
htmlStr, err := readResponseBody(resp)
if err != nil {
return ParsedHEntry{}, err
}
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil {
return ParsedHEntry{}, err
}
var interpretHFeed func(*html.Node, *ParsedHEntry, bool) (ParsedHEntry, error)
interpretHFeed = func(n *html.Node, curr *ParsedHEntry, parent bool) (ParsedHEntry, error) {
attrs := n.Attr
for _, attr := range attrs {
if attr.Key == "class" && strings.Contains(attr.Val, "p-name") {
buf := &bytes.Buffer{}
collectText(n, buf)
curr.Title = buf.String()
return *curr, nil
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
interpretHFeed(c, curr, false)
}
return *curr, nil
}
var findHFeed func(*html.Node) (ParsedHEntry, error)
findHFeed = func(n *html.Node) (ParsedHEntry, error) {
attrs := n.Attr
for _, attr := range attrs {
if attr.Key == "class" && strings.Contains(attr.Val, "h-entry") {
return interpretHFeed(n, &ParsedHEntry{}, true)
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
entry, err := findHFeed(c)
if err == nil {
return entry, nil
}
}
return ParsedHEntry{}, errors.New("no h-entry found")
}
return findHFeed(doc)
}
func ParseLinks(resp *http.Response) ([]string, error) {
htmlStr, err := readResponseBody(resp)
if err != nil {
return []string{}, err
}
return ParseLinksFromString(htmlStr)
}
func ParseLinksFromString(htmlStr string) ([]string, error) {
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil {
return make([]string, 0), err
}
var findLinks func(*html.Node) ([]string, error)
findLinks = func(n *html.Node) ([]string, error) {
links := make([]string, 0)
if n.Type == html.ElementNode && n.Data == "a" {
for _, attr := range n.Attr {
if attr.Key == "href" {
links = append(links, attr.Val)
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
childLinks, _ := findLinks(c)
links = append(links, childLinks...)
}
return links, nil
}
return findLinks(doc)
}
func GetWebmentionEndpoint(resp *http.Response) (string, error) {
//request url
requestUrl := resp.Request.URL
// Check link headers
for _, linkHeader := range resp.Header["Link"] {
linkHeaderParts := strings.Split(linkHeader, ",")
for _, linkHeaderPart := range linkHeaderParts {
linkHeaderPart = strings.TrimSpace(linkHeaderPart)
params := strings.Split(linkHeaderPart, ";")
if len(params) != 2 {
continue
}
for _, param := range params[1:] {
param = strings.TrimSpace(param)
if strings.Contains(param, "webmention") {
link := strings.Split(params[0], ";")[0]
link = strings.Trim(link, "<>")
linkUrl, err := url.Parse(link)
if err != nil {
return "", err
}
return requestUrl.ResolveReference(linkUrl).String(), nil
}
}
}
}
htmlStr, err := readResponseBody(resp)
if err != nil {
return "", err
}
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil {
return "", err
}
var findEndpoint func(*html.Node) (string, error)
findEndpoint = func(n *html.Node) (string, error) {
if n.Type == html.ElementNode && (n.Data == "link" || n.Data == "a") {
for _, attr := range n.Attr {
if attr.Key == "rel" {
vals := strings.Split(attr.Val, " ")
for _, val := range vals {
if val == "webmention" {
for _, attr := range n.Attr {
if attr.Key == "href" {
return attr.Val, nil
}
}
}
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
endpoint, err := findEndpoint(c)
if err == nil {
return endpoint, nil
}
}
return "", errors.New("no webmention endpoint found")
}
linkUrlStr, err := findEndpoint(doc)
if err != nil {
return "", err
}
linkUrl, err := url.Parse(linkUrlStr)
if err != nil {
return "", err
}
return requestUrl.ResolveReference(linkUrl).String(), nil
}
func GetRedirctUris(resp *http.Response) ([]string, error) {
//request url
requestUrl := resp.Request.URL
htmlStr, err := readResponseBody(resp)
if err != nil {
return make([]string, 0), err
}
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil {
return make([]string, 0), err
}
var findLinks func(*html.Node) ([]string, error)
// Check link headers
header_links := make([]string, 0)
for _, linkHeader := range resp.Header["Link"] {
linkHeaderParts := strings.Split(linkHeader, ",")
for _, linkHeaderPart := range linkHeaderParts {
linkHeaderPart = strings.TrimSpace(linkHeaderPart)
params := strings.Split(linkHeaderPart, ";")
if len(params) != 2 {
continue
}
for _, param := range params[1:] {
param = strings.TrimSpace(param)
if strings.Contains(param, "redirect_uri") {
link := strings.Split(params[0], ";")[0]
link = strings.Trim(link, "<>")
linkUrl, err := url.Parse(link)
if err == nil {
header_links = append(header_links, requestUrl.ResolveReference(linkUrl).String())
}
}
}
}
}
findLinks = func(n *html.Node) ([]string, error) {
links := make([]string, 0)
if n.Type == html.ElementNode && n.Data == "link" {
// check for rel="redirect_uri"
rel := ""
href := ""
for _, attr := range n.Attr {
if attr.Key == "href" {
href = attr.Val
}
if attr.Key == "rel" {
rel = attr.Val
}
}
if rel == "redirect_uri" {
linkUrl, err := url.Parse(href)
if err == nil {
links = append(links, requestUrl.ResolveReference(linkUrl).String())
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
childLinks, _ := findLinks(c)
links = append(links, childLinks...)
}
return links, nil
}
body_links, err := findLinks(doc)
return append(body_links, header_links...), err
}

View File

@ -1,17 +1,10 @@
package app package app
import ( import (
"bytes"
"errors"
"io"
"net/http"
"owl-blogs/app/owlhttp" "owl-blogs/app/owlhttp"
"owl-blogs/app/repository" "owl-blogs/app/repository"
"owl-blogs/interactions" "owl-blogs/interactions"
"strings"
"time" "time"
"golang.org/x/net/html"
) )
type WebmentionService struct { type WebmentionService struct {
@ -20,86 +13,19 @@ type WebmentionService struct {
Http owlhttp.HttpClient Http owlhttp.HttpClient
} }
type ParsedHEntry struct {
Title string
}
func NewWebmentionService( func NewWebmentionService(
interactionRepository repository.InteractionRepository, interactionRepository repository.InteractionRepository,
entryRepository repository.EntryRepository, entryRepository repository.EntryRepository,
http owlhttp.HttpClient, http owlhttp.HttpClient,
bus *EventBus,
) *WebmentionService { ) *WebmentionService {
return &WebmentionService{ svc := &WebmentionService{
InteractionRepository: interactionRepository, InteractionRepository: interactionRepository,
EntryRepository: entryRepository, EntryRepository: entryRepository,
Http: http, Http: http,
} }
} bus.Subscribe(svc)
return svc
func readResponseBody(resp *http.Response) (string, error) {
defer resp.Body.Close()
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return string(bodyBytes), nil
}
func collectText(n *html.Node, buf *bytes.Buffer) {
if n.Type == html.TextNode {
buf.WriteString(n.Data)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
collectText(c, buf)
}
}
func (WebmentionService) ParseHEntry(resp *http.Response) (ParsedHEntry, error) {
htmlStr, err := readResponseBody(resp)
if err != nil {
return ParsedHEntry{}, err
}
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil {
return ParsedHEntry{}, err
}
var interpretHFeed func(*html.Node, *ParsedHEntry, bool) (ParsedHEntry, error)
interpretHFeed = func(n *html.Node, curr *ParsedHEntry, parent bool) (ParsedHEntry, error) {
attrs := n.Attr
for _, attr := range attrs {
if attr.Key == "class" && strings.Contains(attr.Val, "p-name") {
buf := &bytes.Buffer{}
collectText(n, buf)
curr.Title = buf.String()
return *curr, nil
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
interpretHFeed(c, curr, false)
}
return *curr, nil
}
var findHFeed func(*html.Node) (ParsedHEntry, error)
findHFeed = func(n *html.Node) (ParsedHEntry, error) {
attrs := n.Attr
for _, attr := range attrs {
if attr.Key == "class" && strings.Contains(attr.Val, "h-entry") {
return interpretHFeed(n, &ParsedHEntry{}, true)
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
entry, err := findHFeed(c)
if err == nil {
return entry, nil
}
}
return ParsedHEntry{}, errors.New("no h-entry found")
}
return findHFeed(doc)
} }
func (s *WebmentionService) GetExistingWebmention(entryId string, source string, target string) (*interactions.Webmention, error) { func (s *WebmentionService) GetExistingWebmention(entryId string, source string, target string) (*interactions.Webmention, error) {
@ -124,7 +50,7 @@ func (s *WebmentionService) ProcessWebmention(source string, target string) erro
return err return err
} }
hEntry, err := s.ParseHEntry(resp) hEntry, err := ParseHEntry(resp)
if err != nil { if err != nil {
return err return err
} }

View File

@ -15,15 +15,15 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
// func constructResponse(html []byte) *http.Response { func constructResponse(html []byte) *http.Response {
// url, _ := url.Parse("http://example.com/foo/bar") url, _ := url.Parse("http://example.com/foo/bar")
// return &http.Response{ return &http.Response{
// Request: &http.Request{ Request: &http.Request{
// URL: url, URL: url,
// }, },
// Body: io.NopCloser(bytes.NewReader([]byte(html))), Body: io.NopCloser(bytes.NewReader([]byte(html))),
// } }
// } }
type MockHttpClient struct { type MockHttpClient struct {
PageContent string PageContent string
@ -56,9 +56,11 @@ func getWebmentionService() *app.WebmentionService {
interactionRepo := infra.NewInteractionRepo(db, interactionRegister) interactionRepo := infra.NewInteractionRepo(db, interactionRegister)
bus := app.NewEventBus()
http := infra.OwlHttpClient{} http := infra.OwlHttpClient{}
return app.NewWebmentionService( return app.NewWebmentionService(
interactionRepo, entryRepo, &http, interactionRepo, entryRepo, &http, bus,
) )
} }
@ -67,18 +69,16 @@ func getWebmentionService() *app.WebmentionService {
// //
func TestParseValidHEntry(t *testing.T) { func TestParseValidHEntry(t *testing.T) {
service := getWebmentionService()
html := []byte("<div class=\"h-entry\"><div class=\"p-name\">Foo</div></div>") html := []byte("<div class=\"h-entry\"><div class=\"p-name\">Foo</div></div>")
entry, err := service.ParseHEntry(&http.Response{Body: io.NopCloser(bytes.NewReader(html))}) entry, err := app.ParseHEntry(&http.Response{Body: io.NopCloser(bytes.NewReader(html))})
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, entry.Title, "Foo") require.Equal(t, entry.Title, "Foo")
} }
func TestParseValidHEntryWithoutTitle(t *testing.T) { func TestParseValidHEntryWithoutTitle(t *testing.T) {
service := getWebmentionService()
html := []byte("<div class=\"h-entry\"></div><div class=\"p-name\">Foo</div>") html := []byte("<div class=\"h-entry\"></div><div class=\"p-name\">Foo</div>")
entry, err := service.ParseHEntry(&http.Response{Body: io.NopCloser(bytes.NewReader(html))}) entry, err := app.ParseHEntry(&http.Response{Body: io.NopCloser(bytes.NewReader(html))})
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, entry.Title, "") require.Equal(t, entry.Title, "")
@ -108,77 +108,70 @@ func TestCreateNewWebmention(t *testing.T) {
require.Equal(t, meta.Title, "Foo") require.Equal(t, meta.Title, "Foo")
} }
// func TestGetWebmentionEndpointLink(t *testing.T) { func TestGetWebmentionEndpointLink(t *testing.T) {
// service := getWebmentionService() html := []byte("<link rel=\"webmention\" href=\"http://example.com/webmention\" />")
// html := []byte("<link rel=\"webmention\" href=\"http://example.com/webmention\" />") endpoint, err := app.GetWebmentionEndpoint(constructResponse(html))
// endpoint, err := service.GetWebmentionEndpoint(constructResponse(html))
// require.NoError(t, err) require.NoError(t, err)
// require.Equal(t, endpoint, "http://example.com/webmention") require.Equal(t, endpoint, "http://example.com/webmention")
// } }
// func TestGetWebmentionEndpointLinkA(t *testing.T) { func TestGetWebmentionEndpointLinkA(t *testing.T) {
// service := getWebmentionService() html := []byte("<a rel=\"webmention\" href=\"http://example.com/webmention\" />")
// html := []byte("<a rel=\"webmention\" href=\"http://example.com/webmention\" />") endpoint, err := app.GetWebmentionEndpoint(constructResponse(html))
// endpoint, err := service.GetWebmentionEndpoint(constructResponse(html))
// require.NoError(t, err) require.NoError(t, err)
// require.Equal(t, endpoint, "http://example.com/webmention") require.Equal(t, endpoint, "http://example.com/webmention")
// } }
// func TestGetWebmentionEndpointLinkAFakeWebmention(t *testing.T) { func TestGetWebmentionEndpointLinkAFakeWebmention(t *testing.T) {
// service := getWebmentionService() html := []byte("<a rel=\"not-webmention\" href=\"http://example.com/foo\" /><a rel=\"webmention\" href=\"http://example.com/webmention\" />")
// html := []byte("<a rel=\"not-webmention\" href=\"http://example.com/foo\" /><a rel=\"webmention\" href=\"http://example.com/webmention\" />") endpoint, err := app.GetWebmentionEndpoint(constructResponse(html))
// endpoint, err := service.GetWebmentionEndpoint(constructResponse(html))
// require.NoError(t, err) require.NoError(t, err)
// require.Equal(t, endpoint, "http://example.com/webmention") require.Equal(t, endpoint, "http://example.com/webmention")
// } }
// func TestGetWebmentionEndpointLinkHeader(t *testing.T) { func TestGetWebmentionEndpointLinkHeader(t *testing.T) {
// service := getWebmentionService() html := []byte("")
// html := []byte("") resp := constructResponse(html)
// resp := constructResponse(html) resp.Header = http.Header{"Link": []string{"<http://example.com/webmention>; rel=\"webmention\""}}
// resp.Header = http.Header{"Link": []string{"<http://example.com/webmention>; rel=\"webmention\""}} endpoint, err := app.GetWebmentionEndpoint(resp)
// endpoint, err := service.GetWebmentionEndpoint(resp)
// require.NoError(t, err) require.NoError(t, err)
// require.Equal(t, endpoint, "http://example.com/webmention") require.Equal(t, endpoint, "http://example.com/webmention")
// } }
// func TestGetWebmentionEndpointLinkHeaderCommas(t *testing.T) { func TestGetWebmentionEndpointLinkHeaderCommas(t *testing.T) {
// service := getWebmentionService() html := []byte("")
// html := []byte("") resp := constructResponse(html)
// resp := constructResponse(html) resp.Header = http.Header{
// resp.Header = http.Header{ "Link": []string{"<https://webmention.rocks/test/19/webmention/error>; rel=\"other\", <https://webmention.rocks/test/19/webmention>; rel=\"webmention\""},
// "Link": []string{"<https://webmention.rocks/test/19/webmention/error>; rel=\"other\", <https://webmention.rocks/test/19/webmention>; rel=\"webmention\""}, }
// } endpoint, err := app.GetWebmentionEndpoint(resp)
// endpoint, err := service.GetWebmentionEndpoint(resp)
// require.NoError(t, err) require.NoError(t, err)
// require.Equal(t, endpoint, "https://webmention.rocks/test/19/webmention") require.Equal(t, endpoint, "https://webmention.rocks/test/19/webmention")
// } }
// func TestGetWebmentionEndpointRelativeLink(t *testing.T) { func TestGetWebmentionEndpointRelativeLink(t *testing.T) {
// service := getWebmentionService() html := []byte("<link rel=\"webmention\" href=\"/webmention\" />")
// html := []byte("<link rel=\"webmention\" href=\"/webmention\" />") endpoint, err := app.GetWebmentionEndpoint(constructResponse(html))
// endpoint, err := service.GetWebmentionEndpoint(constructResponse(html))
// require.NoError(t, err) require.NoError(t, err)
// require.Equal(t, endpoint, "http://example.com/webmention") require.Equal(t, endpoint, "http://example.com/webmention")
// } }
// func TestGetWebmentionEndpointRelativeLinkInHeader(t *testing.T) { func TestGetWebmentionEndpointRelativeLinkInHeader(t *testing.T) {
// service := getWebmentionService() html := []byte("<link rel=\"webmention\" href=\"/webmention\" />")
// html := []byte("<link rel=\"webmention\" href=\"/webmention\" />") resp := constructResponse(html)
// resp := constructResponse(html) resp.Header = http.Header{"Link": []string{"</webmention>; rel=\"webmention\""}}
// resp.Header = http.Header{"Link": []string{"</webmention>; rel=\"webmention\""}} endpoint, err := app.GetWebmentionEndpoint(resp)
// endpoint, err := service.GetWebmentionEndpoint(resp)
// require.NoError(t, err) require.NoError(t, err)
// require.Equal(t, endpoint, "http://example.com/webmention") require.Equal(t, endpoint, "http://example.com/webmention")
// } }
// func TestRealWorldWebmention(t *testing.T) { // func TestRealWorldWebmention(t *testing.T) {
// service := getWebmentionService() // service := getWebmentionService()
@ -212,7 +205,7 @@ func TestCreateNewWebmention(t *testing.T) {
// //
// client := &owl.OwlHttpClient{} // client := &owl.OwlHttpClient{}
// html, _ := client.Get(link) // html, _ := client.Get(link)
// _, err := service.GetWebmentionEndpoint(html) // _, err := app.GetWebmentionEndpoint(html)
// if err != nil { // if err != nil {
// t.Errorf("Unable to find webmention: %v for link %v", err, link) // t.Errorf("Unable to find webmention: %v for link %v", err, link)

View File

@ -54,20 +54,20 @@ func App(db infra.Database) *web.WebApp {
httpClient := &infra.OwlHttpClient{} httpClient := &infra.OwlHttpClient{}
// busses // busses
entryCreationBus := app.NewEntryCreationBus() eventBus := app.NewEventBus()
// Create Services // Create Services
entryService := app.NewEntryService(entryRepo, entryCreationBus) entryService := app.NewEntryService(entryRepo, eventBus)
binaryService := app.NewBinaryFileService(binRepo) binaryService := app.NewBinaryFileService(binRepo)
authorService := app.NewAuthorService(authorRepo, siteConfigRepo) authorService := app.NewAuthorService(authorRepo, siteConfigRepo)
webmentionService := app.NewWebmentionService( webmentionService := app.NewWebmentionService(
interactionRepo, entryRepo, httpClient, interactionRepo, entryRepo, httpClient, eventBus,
) )
// plugins // plugins
plugings.NewEcho(entryCreationBus) plugings.NewEcho(eventBus)
plugings.RegisterInstagram( plugings.RegisterInstagram(
siteConfigRepo, configRegister, binaryService, entryCreationBus, siteConfigRepo, configRegister, binaryService, eventBus,
) )
// Create WebApp // Create WebApp