Skip to content

Commit

Permalink
Add initial method to disable XHR
Browse files Browse the repository at this point in the history
  • Loading branch information
RadhiFadlillah committed Aug 22, 2019
1 parent 02699ab commit 2b9e225
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
32 changes: 32 additions & 0 deletions pkg/warc/internal/archiver/processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ func (arc *Archiver) ProcessHTMLFile(res ResourceURL, input io.Reader) (result P
}

// TODO: I'm still not really sure, but IMHO it's safer to disable Javascript
// Ideally, we only want to remove XHR request by using function disableXHR(doc).
// Unfortunately, the result is not that good for now, so it's still not used.
removeNodes(getElementsByTagName(doc, "script"), nil)

// Convert lazy loaded image to normal
Expand Down Expand Up @@ -129,6 +131,36 @@ func (arc *Archiver) ProcessOtherFile(res ResourceURL, input io.Reader) (result
return result, nil
}

func disableXHR(doc *html.Node) {
var head *html.Node
heads := getElementsByTagName(doc, "head")
if len(heads) > 0 {
head = heads[0]
} else {
head = createElement("head")
prependChild(doc, head)
}

xhrDisabler := `
fetch = new Promise();
XMLHttpRequest = function() {};
XMLHttpRequest.prototype = {
open: function(){},
send: function(){},
abort: function(){},
setRequestHeader: function(){},
overrideMimeType: function(){},
getResponseHeaders(): function(){},
getAllResponseHeaders(): function(){},
};`

script := createElement("script")
scriptContent := createTextNode(xhrDisabler)
prependChild(script, scriptContent)
prependChild(head, script)
}

// fixRelativeURIs converts each <a> in the given element
// to an absolute URI, ignoring #ref URIs.
func fixRelativeURIs(doc *html.Node, pageURL *nurl.URL) {
Expand Down
16 changes: 16 additions & 0 deletions pkg/warc/internal/archiver/utils-dom.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,22 @@ func appendChild(node *html.Node, child *html.Node) {
}
}

// prependChild works like appendChild, except it adds a node to the
// beginning of the list of children of a specified parent node.
func prependChild(node *html.Node, child *html.Node) {
if child.Parent != nil {
temp := cloneNode(child)
child.Parent.RemoveChild(child)
child = temp
}

if node.FirstChild != nil {
node.InsertBefore(child, node.FirstChild)
} else {
node.AppendChild(child)
}
}

// replaceNode replaces an OldNode with a NewNode.
func replaceNode(oldNode *html.Node, newNode *html.Node) {
if oldNode.Parent == nil {
Expand Down

0 comments on commit 2b9e225

Please sign in to comment.