diff --git a/app/entry_service_test.go b/app/entry_service_test.go index 5619ca9..88a4e4b 100644 --- a/app/entry_service_test.go +++ b/app/entry_service_test.go @@ -14,7 +14,7 @@ func setupService() *app.EntryService { register := app.NewEntryTypeRegistry() register.Register(&test.MockEntry{}) repo := infra.NewEntryRepository(db, register) - service := app.NewEntryService(repo, app.NewEntryCreationBus()) + service := app.NewEntryService(repo, app.NewEventBus()) return service } diff --git a/app/entry_creation_bus.go b/app/event_bus.go similarity index 96% rename from app/entry_creation_bus.go rename to app/event_bus.go index 1e4fe91..97e1726 100644 --- a/app/entry_creation_bus.go +++ b/app/event_bus.go @@ -18,7 +18,7 @@ type EventBus struct { subscribers []Subscriber } -func NewEntryCreationBus() *EventBus { +func NewEventBus() *EventBus { return &EventBus{subscribers: make([]Subscriber, 0)} } diff --git a/app/indieweb_utils.go b/app/indieweb_utils.go new file mode 100644 index 0000000..7c52a0d --- /dev/null +++ b/app/indieweb_utils.go @@ -0,0 +1,267 @@ +package app + +import ( + "bytes" + "errors" + "io" + "net/http" + "net/url" + "strings" + + "golang.org/x/net/html" +) + +type HtmlParser interface { + ParseHEntry(resp *http.Response) (ParsedHEntry, error) + ParseLinks(resp *http.Response) ([]string, error) + ParseLinksFromString(string) ([]string, error) + GetWebmentionEndpoint(resp *http.Response) (string, error) + GetRedirctUris(resp *http.Response) ([]string, error) +} + +type ParsedHEntry struct { + Title string +} + +func collectText(n *html.Node, buf *bytes.Buffer) { + + if n.Type == html.TextNode { + buf.WriteString(n.Data) + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + collectText(c, buf) + } +} + +func readResponseBody(resp *http.Response) (string, error) { + defer resp.Body.Close() + bodyBytes, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + return string(bodyBytes), nil +} + +func ParseHEntry(resp *http.Response) (ParsedHEntry, error) { + htmlStr, err := readResponseBody(resp) + if err != nil { + return ParsedHEntry{}, err + } + doc, err := html.Parse(strings.NewReader(htmlStr)) + if err != nil { + return ParsedHEntry{}, err + } + + var interpretHFeed func(*html.Node, *ParsedHEntry, bool) (ParsedHEntry, error) + interpretHFeed = func(n *html.Node, curr *ParsedHEntry, parent bool) (ParsedHEntry, error) { + attrs := n.Attr + for _, attr := range attrs { + if attr.Key == "class" && strings.Contains(attr.Val, "p-name") { + buf := &bytes.Buffer{} + collectText(n, buf) + curr.Title = buf.String() + return *curr, nil + } + } + + for c := n.FirstChild; c != nil; c = c.NextSibling { + interpretHFeed(c, curr, false) + } + return *curr, nil + } + + var findHFeed func(*html.Node) (ParsedHEntry, error) + findHFeed = func(n *html.Node) (ParsedHEntry, error) { + attrs := n.Attr + for _, attr := range attrs { + if attr.Key == "class" && strings.Contains(attr.Val, "h-entry") { + return interpretHFeed(n, &ParsedHEntry{}, true) + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + entry, err := findHFeed(c) + if err == nil { + return entry, nil + } + } + return ParsedHEntry{}, errors.New("no h-entry found") + } + return findHFeed(doc) +} + +func ParseLinks(resp *http.Response) ([]string, error) { + htmlStr, err := readResponseBody(resp) + if err != nil { + return []string{}, err + } + return ParseLinksFromString(htmlStr) +} + +func ParseLinksFromString(htmlStr string) ([]string, error) { + doc, err := html.Parse(strings.NewReader(htmlStr)) + if err != nil { + return make([]string, 0), err + } + + var findLinks func(*html.Node) ([]string, error) + findLinks = func(n *html.Node) ([]string, error) { + links := make([]string, 0) + if n.Type == html.ElementNode && n.Data == "a" { + for _, attr := range n.Attr { + if attr.Key == "href" { + links = append(links, attr.Val) + } + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + childLinks, _ := findLinks(c) + links = append(links, childLinks...) + } + return links, nil + } + return findLinks(doc) +} + +func GetWebmentionEndpoint(resp *http.Response) (string, error) { + //request url + requestUrl := resp.Request.URL + + // Check link headers + for _, linkHeader := range resp.Header["Link"] { + linkHeaderParts := strings.Split(linkHeader, ",") + for _, linkHeaderPart := range linkHeaderParts { + linkHeaderPart = strings.TrimSpace(linkHeaderPart) + params := strings.Split(linkHeaderPart, ";") + if len(params) != 2 { + continue + } + for _, param := range params[1:] { + param = strings.TrimSpace(param) + if strings.Contains(param, "webmention") { + link := strings.Split(params[0], ";")[0] + link = strings.Trim(link, "<>") + linkUrl, err := url.Parse(link) + if err != nil { + return "", err + } + return requestUrl.ResolveReference(linkUrl).String(), nil + } + } + } + } + + htmlStr, err := readResponseBody(resp) + if err != nil { + return "", err + } + doc, err := html.Parse(strings.NewReader(htmlStr)) + if err != nil { + return "", err + } + + var findEndpoint func(*html.Node) (string, error) + findEndpoint = func(n *html.Node) (string, error) { + if n.Type == html.ElementNode && (n.Data == "link" || n.Data == "a") { + for _, attr := range n.Attr { + if attr.Key == "rel" { + vals := strings.Split(attr.Val, " ") + for _, val := range vals { + if val == "webmention" { + for _, attr := range n.Attr { + if attr.Key == "href" { + return attr.Val, nil + } + } + } + } + } + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + endpoint, err := findEndpoint(c) + if err == nil { + return endpoint, nil + } + } + return "", errors.New("no webmention endpoint found") + } + linkUrlStr, err := findEndpoint(doc) + if err != nil { + return "", err + } + linkUrl, err := url.Parse(linkUrlStr) + if err != nil { + return "", err + } + return requestUrl.ResolveReference(linkUrl).String(), nil +} + +func GetRedirctUris(resp *http.Response) ([]string, error) { + //request url + requestUrl := resp.Request.URL + + htmlStr, err := readResponseBody(resp) + if err != nil { + return make([]string, 0), err + } + doc, err := html.Parse(strings.NewReader(htmlStr)) + if err != nil { + return make([]string, 0), err + } + + var findLinks func(*html.Node) ([]string, error) + // Check link headers + header_links := make([]string, 0) + for _, linkHeader := range resp.Header["Link"] { + linkHeaderParts := strings.Split(linkHeader, ",") + for _, linkHeaderPart := range linkHeaderParts { + linkHeaderPart = strings.TrimSpace(linkHeaderPart) + params := strings.Split(linkHeaderPart, ";") + if len(params) != 2 { + continue + } + for _, param := range params[1:] { + param = strings.TrimSpace(param) + if strings.Contains(param, "redirect_uri") { + link := strings.Split(params[0], ";")[0] + link = strings.Trim(link, "<>") + linkUrl, err := url.Parse(link) + if err == nil { + header_links = append(header_links, requestUrl.ResolveReference(linkUrl).String()) + } + } + } + } + } + + findLinks = func(n *html.Node) ([]string, error) { + links := make([]string, 0) + if n.Type == html.ElementNode && n.Data == "link" { + // check for rel="redirect_uri" + rel := "" + href := "" + + for _, attr := range n.Attr { + if attr.Key == "href" { + href = attr.Val + } + if attr.Key == "rel" { + rel = attr.Val + } + } + if rel == "redirect_uri" { + linkUrl, err := url.Parse(href) + if err == nil { + links = append(links, requestUrl.ResolveReference(linkUrl).String()) + } + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + childLinks, _ := findLinks(c) + links = append(links, childLinks...) + } + return links, nil + } + body_links, err := findLinks(doc) + return append(body_links, header_links...), err +} diff --git a/app/webmention_service.go b/app/webmention_service.go index 5c712ed..47a5ee7 100644 --- a/app/webmention_service.go +++ b/app/webmention_service.go @@ -1,17 +1,10 @@ package app import ( - "bytes" - "errors" - "io" - "net/http" "owl-blogs/app/owlhttp" "owl-blogs/app/repository" "owl-blogs/interactions" - "strings" "time" - - "golang.org/x/net/html" ) type WebmentionService struct { @@ -20,86 +13,19 @@ type WebmentionService struct { Http owlhttp.HttpClient } -type ParsedHEntry struct { - Title string -} - func NewWebmentionService( interactionRepository repository.InteractionRepository, entryRepository repository.EntryRepository, http owlhttp.HttpClient, + bus *EventBus, ) *WebmentionService { - return &WebmentionService{ + svc := &WebmentionService{ InteractionRepository: interactionRepository, EntryRepository: entryRepository, Http: http, } -} - -func readResponseBody(resp *http.Response) (string, error) { - defer resp.Body.Close() - bodyBytes, err := io.ReadAll(resp.Body) - if err != nil { - return "", err - } - return string(bodyBytes), nil -} - -func collectText(n *html.Node, buf *bytes.Buffer) { - - if n.Type == html.TextNode { - buf.WriteString(n.Data) - } - for c := n.FirstChild; c != nil; c = c.NextSibling { - collectText(c, buf) - } -} - -func (WebmentionService) ParseHEntry(resp *http.Response) (ParsedHEntry, error) { - htmlStr, err := readResponseBody(resp) - if err != nil { - return ParsedHEntry{}, err - } - doc, err := html.Parse(strings.NewReader(htmlStr)) - if err != nil { - return ParsedHEntry{}, err - } - - var interpretHFeed func(*html.Node, *ParsedHEntry, bool) (ParsedHEntry, error) - interpretHFeed = func(n *html.Node, curr *ParsedHEntry, parent bool) (ParsedHEntry, error) { - attrs := n.Attr - for _, attr := range attrs { - if attr.Key == "class" && strings.Contains(attr.Val, "p-name") { - buf := &bytes.Buffer{} - collectText(n, buf) - curr.Title = buf.String() - return *curr, nil - } - } - - for c := n.FirstChild; c != nil; c = c.NextSibling { - interpretHFeed(c, curr, false) - } - return *curr, nil - } - - var findHFeed func(*html.Node) (ParsedHEntry, error) - findHFeed = func(n *html.Node) (ParsedHEntry, error) { - attrs := n.Attr - for _, attr := range attrs { - if attr.Key == "class" && strings.Contains(attr.Val, "h-entry") { - return interpretHFeed(n, &ParsedHEntry{}, true) - } - } - for c := n.FirstChild; c != nil; c = c.NextSibling { - entry, err := findHFeed(c) - if err == nil { - return entry, nil - } - } - return ParsedHEntry{}, errors.New("no h-entry found") - } - return findHFeed(doc) + bus.Subscribe(svc) + return svc } func (s *WebmentionService) GetExistingWebmention(entryId string, source string, target string) (*interactions.Webmention, error) { @@ -124,7 +50,7 @@ func (s *WebmentionService) ProcessWebmention(source string, target string) erro return err } - hEntry, err := s.ParseHEntry(resp) + hEntry, err := ParseHEntry(resp) if err != nil { return err } diff --git a/app/webmention_test.go b/app/webmention_test.go index 29d85bb..a7a139d 100644 --- a/app/webmention_test.go +++ b/app/webmention_test.go @@ -15,15 +15,15 @@ import ( "github.com/stretchr/testify/require" ) -// func constructResponse(html []byte) *http.Response { -// url, _ := url.Parse("http://example.com/foo/bar") -// return &http.Response{ -// Request: &http.Request{ -// URL: url, -// }, -// Body: io.NopCloser(bytes.NewReader([]byte(html))), -// } -// } +func constructResponse(html []byte) *http.Response { + url, _ := url.Parse("http://example.com/foo/bar") + return &http.Response{ + Request: &http.Request{ + URL: url, + }, + Body: io.NopCloser(bytes.NewReader([]byte(html))), + } +} type MockHttpClient struct { PageContent string @@ -56,9 +56,11 @@ func getWebmentionService() *app.WebmentionService { interactionRepo := infra.NewInteractionRepo(db, interactionRegister) + bus := app.NewEventBus() + http := infra.OwlHttpClient{} return app.NewWebmentionService( - interactionRepo, entryRepo, &http, + interactionRepo, entryRepo, &http, bus, ) } @@ -67,18 +69,16 @@ func getWebmentionService() *app.WebmentionService { // func TestParseValidHEntry(t *testing.T) { - service := getWebmentionService() html := []byte("
Foo
") - entry, err := service.ParseHEntry(&http.Response{Body: io.NopCloser(bytes.NewReader(html))}) + entry, err := app.ParseHEntry(&http.Response{Body: io.NopCloser(bytes.NewReader(html))}) require.NoError(t, err) require.Equal(t, entry.Title, "Foo") } func TestParseValidHEntryWithoutTitle(t *testing.T) { - service := getWebmentionService() html := []byte("
Foo
") - entry, err := service.ParseHEntry(&http.Response{Body: io.NopCloser(bytes.NewReader(html))}) + entry, err := app.ParseHEntry(&http.Response{Body: io.NopCloser(bytes.NewReader(html))}) require.NoError(t, err) require.Equal(t, entry.Title, "") @@ -108,77 +108,70 @@ func TestCreateNewWebmention(t *testing.T) { require.Equal(t, meta.Title, "Foo") } -// func TestGetWebmentionEndpointLink(t *testing.T) { -// service := getWebmentionService() -// html := []byte("") -// endpoint, err := service.GetWebmentionEndpoint(constructResponse(html)) +func TestGetWebmentionEndpointLink(t *testing.T) { + html := []byte("") + endpoint, err := app.GetWebmentionEndpoint(constructResponse(html)) -// require.NoError(t, err) + require.NoError(t, err) -// require.Equal(t, endpoint, "http://example.com/webmention") -// } + require.Equal(t, endpoint, "http://example.com/webmention") +} -// func TestGetWebmentionEndpointLinkA(t *testing.T) { -// service := getWebmentionService() -// html := []byte("") -// endpoint, err := service.GetWebmentionEndpoint(constructResponse(html)) +func TestGetWebmentionEndpointLinkA(t *testing.T) { + html := []byte("") + endpoint, err := app.GetWebmentionEndpoint(constructResponse(html)) -// require.NoError(t, err) -// require.Equal(t, endpoint, "http://example.com/webmention") -// } + require.NoError(t, err) + require.Equal(t, endpoint, "http://example.com/webmention") +} -// func TestGetWebmentionEndpointLinkAFakeWebmention(t *testing.T) { -// service := getWebmentionService() -// html := []byte("") -// endpoint, err := service.GetWebmentionEndpoint(constructResponse(html)) +func TestGetWebmentionEndpointLinkAFakeWebmention(t *testing.T) { + html := []byte("") + endpoint, err := app.GetWebmentionEndpoint(constructResponse(html)) -// require.NoError(t, err) -// require.Equal(t, endpoint, "http://example.com/webmention") -// } + require.NoError(t, err) + require.Equal(t, endpoint, "http://example.com/webmention") +} -// func TestGetWebmentionEndpointLinkHeader(t *testing.T) { -// service := getWebmentionService() -// html := []byte("") -// resp := constructResponse(html) -// resp.Header = http.Header{"Link": []string{"; rel=\"webmention\""}} -// endpoint, err := service.GetWebmentionEndpoint(resp) +func TestGetWebmentionEndpointLinkHeader(t *testing.T) { + html := []byte("") + resp := constructResponse(html) + resp.Header = http.Header{"Link": []string{"; rel=\"webmention\""}} + endpoint, err := app.GetWebmentionEndpoint(resp) -// require.NoError(t, err) -// require.Equal(t, endpoint, "http://example.com/webmention") -// } + require.NoError(t, err) + require.Equal(t, endpoint, "http://example.com/webmention") +} -// func TestGetWebmentionEndpointLinkHeaderCommas(t *testing.T) { -// service := getWebmentionService() -// html := []byte("") -// resp := constructResponse(html) -// resp.Header = http.Header{ -// "Link": []string{"; rel=\"other\", ; rel=\"webmention\""}, -// } -// endpoint, err := service.GetWebmentionEndpoint(resp) +func TestGetWebmentionEndpointLinkHeaderCommas(t *testing.T) { + html := []byte("") + resp := constructResponse(html) + resp.Header = http.Header{ + "Link": []string{"; rel=\"other\", ; rel=\"webmention\""}, + } + endpoint, err := app.GetWebmentionEndpoint(resp) -// require.NoError(t, err) -// require.Equal(t, endpoint, "https://webmention.rocks/test/19/webmention") -// } + require.NoError(t, err) + require.Equal(t, endpoint, "https://webmention.rocks/test/19/webmention") +} -// func TestGetWebmentionEndpointRelativeLink(t *testing.T) { -// service := getWebmentionService() -// html := []byte("") -// endpoint, err := service.GetWebmentionEndpoint(constructResponse(html)) +func TestGetWebmentionEndpointRelativeLink(t *testing.T) { + html := []byte("") + endpoint, err := app.GetWebmentionEndpoint(constructResponse(html)) -// require.NoError(t, err) -// require.Equal(t, endpoint, "http://example.com/webmention") -// } + require.NoError(t, err) + require.Equal(t, endpoint, "http://example.com/webmention") +} -// func TestGetWebmentionEndpointRelativeLinkInHeader(t *testing.T) { -// service := getWebmentionService() -// html := []byte("") -// resp := constructResponse(html) -// resp.Header = http.Header{"Link": []string{"; rel=\"webmention\""}} -// endpoint, err := service.GetWebmentionEndpoint(resp) +func TestGetWebmentionEndpointRelativeLinkInHeader(t *testing.T) { + html := []byte("") + resp := constructResponse(html) + resp.Header = http.Header{"Link": []string{"; rel=\"webmention\""}} + endpoint, err := app.GetWebmentionEndpoint(resp) -// require.NoError(t, err) -// require.Equal(t, endpoint, "http://example.com/webmention") -// } + require.NoError(t, err) + require.Equal(t, endpoint, "http://example.com/webmention") +} // func TestRealWorldWebmention(t *testing.T) { // service := getWebmentionService() @@ -212,7 +205,7 @@ func TestCreateNewWebmention(t *testing.T) { // // client := &owl.OwlHttpClient{} // html, _ := client.Get(link) -// _, err := service.GetWebmentionEndpoint(html) +// _, err := app.GetWebmentionEndpoint(html) // if err != nil { // t.Errorf("Unable to find webmention: %v for link %v", err, link) diff --git a/cmd/owl/main.go b/cmd/owl/main.go index 99a495e..4fb3cf8 100644 --- a/cmd/owl/main.go +++ b/cmd/owl/main.go @@ -54,20 +54,20 @@ func App(db infra.Database) *web.WebApp { httpClient := &infra.OwlHttpClient{} // busses - entryCreationBus := app.NewEntryCreationBus() + eventBus := app.NewEventBus() // Create Services - entryService := app.NewEntryService(entryRepo, entryCreationBus) + entryService := app.NewEntryService(entryRepo, eventBus) binaryService := app.NewBinaryFileService(binRepo) authorService := app.NewAuthorService(authorRepo, siteConfigRepo) webmentionService := app.NewWebmentionService( - interactionRepo, entryRepo, httpClient, + interactionRepo, entryRepo, httpClient, eventBus, ) // plugins - plugings.NewEcho(entryCreationBus) + plugings.NewEcho(eventBus) plugings.RegisterInstagram( - siteConfigRepo, configRegister, binaryService, entryCreationBus, + siteConfigRepo, configRegister, binaryService, eventBus, ) // Create WebApp