Skip to content

Commit

Permalink
FEATURE: Body cacher (discourse#460)
Browse files Browse the repository at this point in the history
Optionally pass in a simple caching mechanism provider to `body_cacher` to allow for caching/fetching the bodies of successful HTTP GET requests.

The caching mechanism you provide should support methods called:

`fetch_cached_response_body`
`cached_response_body_exists?

These methods expect a single argument of a URL string.

`cache_response_body`

Expects a URL string, and the response body string.
  • Loading branch information
jbrw authored Mar 31, 2021
1 parent a40caa6 commit ca7ea32
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 16 deletions.
2 changes: 1 addition & 1 deletion lib/onebox/engine/gfycat_onebox.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def match

def nokogiri_page
@nokogiri_page ||= begin
response = Onebox::Helpers.fetch_response(url, 10) rescue nil
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
Nokogiri::HTML(response)
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/onebox/engine/google_docs_onebox.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def match
end

def get_og_data
response = Onebox::Helpers.fetch_response(url, 10) rescue nil
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
html = Nokogiri::HTML(response)
og_data = {}
html.css('meta').each do |m|
Expand Down
3 changes: 2 additions & 1 deletion lib/onebox/engine/html.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ def http_params
end

def raw
@raw ||= Onebox::Helpers.fetch_html_doc(url, http_params)
body_cacher = self.options[:body_cacher] if self.options
@raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
end

def html?
Expand Down
2 changes: 1 addition & 1 deletion lib/onebox/engine/pastebin_onebox.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def truncated?

def lines
return @lines if @lines
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", 1) rescue ""
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
@lines = response.split("\n")
end

Expand Down
2 changes: 1 addition & 1 deletion lib/onebox/engine/twitter_status_onebox.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def http_params
private

def get_twitter_data
response = Onebox::Helpers.fetch_response(url, nil, nil, http_params) rescue nil
response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
html = Nokogiri::HTML(response)
twitter_data = {}
html.css('meta').each do |m|
Expand Down
33 changes: 22 additions & 11 deletions lib/onebox/helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def self.clean(html)
html.gsub(/<[^>]+>/, ' ').gsub(/\n/, '')
end

def self.fetch_html_doc(url, headers = nil)
response = (fetch_response(url, nil, nil, headers) rescue nil)
def self.fetch_html_doc(url, headers = nil, body_cacher = nil)
response = (fetch_response(url, headers: headers, body_cacher: body_cacher) rescue nil)
doc = Nokogiri::HTML(response)
uri = Addressable::URI.parse(url)

Expand All @@ -37,24 +37,31 @@ def self.fetch_html_doc(url, headers = nil)
canonical_link = doc.at('//link[@rel="canonical"]/@href')
canonical_uri = Addressable::URI.parse(canonical_link)
if canonical_link && "#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
response = (fetch_response(canonical_uri.to_s, nil, nil, headers) rescue nil)
response = (fetch_response(canonical_uri.to_s, headers: headers, body_cacher: body_cacher) rescue nil)
doc = Nokogiri::HTML(response) if response
end
end

doc
end

def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
def self.fetch_response(location, redirect_limit: 5, domain: nil, headers: nil, body_cacher: nil)
redirect_limit = Onebox.options.redirect_limit if redirect_limit > Onebox.options.redirect_limit

limit ||= 5
limit = Onebox.options.redirect_limit if limit > Onebox.options.redirect_limit

raise Net::HTTPError.new('HTTP redirect too deep', location) if limit == 0
raise Net::HTTPError.new('HTTP redirect too deep', location) if redirect_limit == 0

uri = Addressable::URI.parse(location)
uri = Addressable::URI.join(domain, uri) if !uri.host

use_body_cacher = body_cacher && body_cacher.respond_to?('fetch_cached_response_body')
if use_body_cacher
response_body = body_cacher.fetch_cached_response_body(uri.to_s)

if response_body.present?
return response_body
end
end

result = StringIO.new
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.normalized_scheme == 'https') do |http|
http.open_timeout = Onebox.options.connect_timeout
Expand Down Expand Up @@ -86,9 +93,9 @@ def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
response.error! unless [301, 302].include?(code)
return fetch_response(
response['location'],
limit - 1,
"#{uri.scheme}://#{uri.host}",
redir_header
redirect_limit: redirect_limit - 1,
domain: "#{uri.scheme}://#{uri.host}",
headers: redir_header
)
end

Expand All @@ -98,6 +105,10 @@ def self.fetch_response(location, limit = nil, domain = nil, headers = nil)
raise Timeout::Error.new if (Time.now - start_time) > Onebox.options.timeout
end

if use_body_cacher && body_cacher.cache_response_body?(uri)
body_cacher.cache_response_body(uri.to_s, result.string)
end

return result.string
end
end
Expand Down

0 comments on commit ca7ea32

Please sign in to comment.