Skip to content

Commit

Permalink
Now cache image will uses archive
Browse files Browse the repository at this point in the history
  • Loading branch information
RadhiFadlillah committed Aug 12, 2019
1 parent 4e76288 commit 2cb95c1
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 5 deletions.
1 change: 1 addition & 0 deletions internal/view/js/component/dialog.js
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ export default {
var value = field.value;
if (field.type === 'number') value = parseInt(value, 10) || 0;
else if (field.type === 'float') value = parseFloat(value) || 0.0;
else if (field.type === 'check') value = value !== '';
data[field.name] = value;
})

Expand Down
8 changes: 4 additions & 4 deletions internal/webserver/assets-prod.go

Large diffs are not rendered by default.

64 changes: 63 additions & 1 deletion internal/webserver/handler-ui.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,72 @@ func (h *handler) serveBookmarkContent(w http.ResponseWriter, r *http.Request, p
}
}

// Check if it has archive
// Check if it has archive.
archivePath := fp.Join(h.DataDir, "archive", strID)
if fileExists(archivePath) {
bookmark.HasArchive = true

// Open archive, look in cache first
var archive *warc.Archive
cacheData, found := h.ArchiveCache.Get(strID)

if found {
archive = cacheData.(*warc.Archive)
} else {
archivePath := fp.Join(h.DataDir, "archive", strID)
archive, err = warc.Open(archivePath)
checkError(err)

h.ArchiveCache.Set(strID, archive, 0)
}

// Find all image and convert its source to use the archive URL.
createArchivalURL := func(archivalName string) string {
archivalURL := *r.URL
archivalURL.Path = path.Join("/", "bookmark", strID, "archive", archivalName)
return archivalURL.String()
}

buffer := strings.NewReader(bookmark.HTML)
doc, err := goquery.NewDocumentFromReader(buffer)
checkError(err)

doc.Find("img, picture, figure, source").Each(func(_ int, node *goquery.Selection) {
// Get the needed attributes
src, _ := node.Attr("src")
strSrcSets, _ := node.Attr("srcset")

// Convert `src` attributes
if src != "" {
archivalName := getArchivalName(src)
if archivalName != "" && archive.HasResource(archivalName) {
node.SetAttr("src", createArchivalURL(archivalName))
}
}

// Split srcset by comma, then process it like any URLs
srcSets := strings.Split(strSrcSets, ",")
for i, srcSet := range srcSets {
srcSet = strings.TrimSpace(srcSet)
parts := strings.SplitN(srcSet, " ", 2)
if parts[0] == "" {
continue
}

archivalName := getArchivalName(parts[0])
if archivalName != "" && archive.HasResource(archivalName) {
archivalURL := createArchivalURL(archivalName)
srcSets[i] = strings.Replace(srcSets[i], parts[0], archivalURL, 1)
}
}

if len(srcSets) > 0 {
node.SetAttr("srcset", strings.Join(srcSets, ","))
}
})

bookmark.HTML, err = goquery.OuterHtml(doc.Selection)
checkError(err)
}

// Create template
Expand Down
34 changes: 34 additions & 0 deletions internal/webserver/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@ import (
nurl "net/url"
"os"
fp "path/filepath"
"regexp"
"strings"
"syscall"
"time"

"github.com/disintegration/imaging"
)

var rxRepeatedStrip = regexp.MustCompile(`(?i)-+`)

func serveFile(w http.ResponseWriter, filePath string, cache bool) error {
// Open file
src, err := assets.Open(filePath)
Expand Down Expand Up @@ -184,6 +187,37 @@ func createTemplate(filename string, funcMap template.FuncMap) (*template.Templa
return template.New(filename).Delims("$$", "$$").Funcs(funcMap).Parse(string(srcContent))
}

// getArchivalName converts an URL into an archival name.
func getArchivalName(src string) string {
archivalURL := src

// Some URL have its query or path escaped, e.g. Wikipedia and Dev.to.
// For example, Wikipedia's stylesheet looks like this :
// load.php?lang=en&modules=ext.3d.styles%7Cext.cite.styles%7Cext.uls.interlanguage
// However, when browser download it, it will be registered as unescaped query :
// load.php?lang=en&modules=ext.3d.styles|ext.cite.styles|ext.uls.interlanguage
// So, for archival URL, we need to unescape the query and path first.
tmp, err := nurl.Parse(src)
if err == nil {
unescapedQuery, _ := nurl.QueryUnescape(tmp.RawQuery)
if unescapedQuery != "" {
tmp.RawQuery = unescapedQuery
}

archivalURL = tmp.String()
archivalURL = strings.Replace(archivalURL, tmp.EscapedPath(), tmp.Path, 1)
}

archivalURL = strings.ReplaceAll(archivalURL, "://", "/")
archivalURL = strings.ReplaceAll(archivalURL, "?", "-")
archivalURL = strings.ReplaceAll(archivalURL, "#", "-")
archivalURL = strings.ReplaceAll(archivalURL, "/", "-")
archivalURL = strings.ReplaceAll(archivalURL, " ", "-")
archivalURL = rxRepeatedStrip.ReplaceAllString(archivalURL, "-")

return archivalURL
}

func checkError(err error) {
if err == nil {
return
Expand Down
17 changes: 17 additions & 0 deletions pkg/warc/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,20 @@ func (arc *Archive) Read(name string) ([]byte, string, error) {

return content, strContentType, nil
}

// HasResource checks if the resource exists in archive.
func (arc *Archive) HasResource(name string) bool {
// Make sure name exists
if name == "" {
name = "archive-root"
}

var exists bool
arc.db.View(func(tx *bbolt.Tx) error {
bucket := tx.Bucket([]byte(name))
exists = bucket != nil
return nil
})

return exists
}

0 comments on commit 2cb95c1

Please sign in to comment.