package owl import ( "bytes" "errors" "net/http" "strings" "time" "golang.org/x/net/html" ) type WebmentionIn struct { Source string `yaml:"source"` Title string `yaml:"title"` ApprovalStatus string `yaml:"approval_status"` RetrievedAt time.Time `yaml:"retrieved_at"` } type WebmentionOut struct { Target string `yaml:"target"` Supported bool `yaml:"supported"` ScannedAt time.Time `yaml:"scanned_at"` LastSentAt time.Time `yaml:"last_sent_at"` } type HttpRetriever interface { Get(url string) ([]byte, error) } type HttpParser interface { ParseHEntry(data []byte) (ParsedHEntry, error) ParseLinks(data []byte) ([]string, error) } type OwlHttpRetriever struct{} type OwlMicroformatParser struct{} type ParsedHEntry struct { Title string } func (OwlHttpRetriever) Get(url string) ([]byte, error) { resp, err := http.Get(url) if err != nil { return []byte{}, err } var data []byte _, err = resp.Body.Read(data) // TODO: encoding return data, err } func collectText(n *html.Node, buf *bytes.Buffer) { if n.Type == html.TextNode { buf.WriteString(n.Data) } for c := n.FirstChild; c != nil; c = c.NextSibling { collectText(c, buf) } } func (OwlMicroformatParser) ParseHEntry(data []byte) (ParsedHEntry, error) { doc, err := html.Parse(strings.NewReader(string(data))) if err != nil { return ParsedHEntry{}, err } var interpretHFeed func(*html.Node, *ParsedHEntry, bool) (ParsedHEntry, error) interpretHFeed = func(n *html.Node, curr *ParsedHEntry, parent bool) (ParsedHEntry, error) { attrs := n.Attr for _, attr := range attrs { if attr.Key == "class" && strings.Contains(attr.Val, "p-name") { buf := &bytes.Buffer{} collectText(n, buf) curr.Title = buf.String() return *curr, nil } } for c := n.FirstChild; c != nil; c = c.NextSibling { interpretHFeed(c, curr, false) } return *curr, nil } var findHFeed func(*html.Node) (ParsedHEntry, error) findHFeed = func(n *html.Node) (ParsedHEntry, error) { attrs := n.Attr for _, attr := range attrs { if attr.Key == "class" && strings.Contains(attr.Val, "h-entry") { return interpretHFeed(n, &ParsedHEntry{}, true) } } for c := n.FirstChild; c != nil; c = c.NextSibling { entry, err := findHFeed(c) if err == nil { return entry, nil } } return ParsedHEntry{}, errors.New("no h-entry found") } return findHFeed(doc) } func (OwlMicroformatParser) ParseLinks(data []byte) ([]string, error) { doc, err := html.Parse(strings.NewReader(string(data))) if err != nil { return make([]string, 0), err } var findLinks func(*html.Node) ([]string, error) findLinks = func(n *html.Node) ([]string, error) { links := make([]string, 0) if n.Type == html.ElementNode && n.Data == "a" { for _, attr := range n.Attr { if attr.Key == "href" { links = append(links, attr.Val) } } } for c := n.FirstChild; c != nil; c = c.NextSibling { childLinks, _ := findLinks(c) links = append(links, childLinks...) } return links, nil } return findLinks(doc) }