-
Notifications
You must be signed in to change notification settings - Fork 230
/
url.go
31 lines (24 loc) · 635 Bytes
/
url.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
package docconv
import (
"bytes"
"io"
"github.com/advancedlogic/GoOse"
)
// ConvertURL fetches the HTML page at the URL given in the io.Reader.
func ConvertURL(input io.Reader, readability bool) (string, map[string]string, error) {
meta := make(map[string]string)
buf := new(bytes.Buffer)
_, err := buf.ReadFrom(input)
if err != nil {
return "", nil, err
}
g := goose.New()
article, err := g.ExtractFromURL(buf.String())
if err != nil {
return "", nil, err
}
meta["title"] = article.Title
meta["description"] = article.MetaDescription
meta["image"] = article.TopImage
return article.CleanedText, meta, nil
}