Skip to content

Commit

Permalink
replay: refactor user-agent exclusion, add .pt exclusion (for now)
Browse files Browse the repository at this point in the history
  • Loading branch information
ikreymer committed Sep 24, 2015
1 parent 74547ce commit 3d4b29d
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
4 changes: 3 additions & 1 deletion pywb/archivereplayview.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
WBURL_RX = re.compile('(.*/)([0-9]{1,14})(\w{2}_)?(/https?://.*)')
EXTRACT_ORIG_LINK = re.compile(r'<([^>]+)>;\s*rel=\"original\"')

NO_GZIP_UAS = ['NCSA_Mosaic']


#=============================================================================
class ReplayHandler(WBHandler):
Expand Down Expand Up @@ -78,7 +80,7 @@ def _do_req(self, urls, host, env, skip_hosts):

# disable gzip, as mosaic won't support it!
# TODO: maybe ungzip later
if 'NCSA_Mosaic' in user_agent:
if any(exclude in user_agent for exclude in NO_GZIP_UAS):
headers={'Accept-Encoding': 'identity'}

for url in urls:
Expand Down
2 changes: 1 addition & 1 deletion pywb/mementoquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from redisclient import redisclient


EXCLUDE_LIST = ('http://archive.today/', 'http://archive.is', 'https://archive.today/', 'https://archive.is/')
EXCLUDE_LIST = ('http://archive.today/', 'http://archive.is', 'https://archive.today/', 'https://archive.is/', 'http://arquivo.pt')


#=============================================================================
Expand Down

0 comments on commit 3d4b29d

Please sign in to comment.