Skip to content

Commit

Permalink
browser_app: improved cleanup, batch redis commands, use current url …
Browse files Browse the repository at this point in the history
…if no referrer

add improved inject script, for reference, but not used due to issues with frames, some browsers
  • Loading branch information
ikreymer committed Nov 22, 2015
1 parent bd50799 commit 39c47af
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 38 deletions.
68 changes: 49 additions & 19 deletions app/browser_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@

redis = None
local_redis = None
DEF_EXPIRE_TIME = 30
expire_time = DEF_EXPIRE_TIME

stat_key_expire_time = 40
container_expire_time = 600

HOST = os.environ.get('HOSTNAME', 'localhost')

Expand All @@ -48,7 +49,9 @@ def set_timestamp(timestamp):
'ip': my_ip}

try:
r = requests.get('http://set.pywb.proxy/', params=params, proxies={'http': PYWB_HOST_PORT, 'https': PYWB_HOST_PORT})
r = requests.get('http://set.pywb.proxy/', params=params,
proxies={'http': PYWB_HOST_PORT,
'https': PYWB_HOST_PORT})

if r.status_code == 200:
global curr_ts
Expand All @@ -75,13 +78,19 @@ def pingsock(ws):
last_data = None
sleep_timeout = 0.5

redis.expire('c:' + HOST, container_expire_time)

while True:
try:
data = get_update()
if data != last_data:
logging.debug('Sending' + str(data))
data['ttl'] = redis.ttl('c:' + HOST)
logging.debug('Sending ' + str(data))
ws.send(json.dumps(data))
last_data = data

# for comparison check
del last_data['ttl']
except WebSocketError as e:
traceback.print_exc(e)
mark_for_removal()
Expand All @@ -93,28 +102,29 @@ def pingsock(ws):

def receiver(ws):
while True:
data = ws.receive()
logging.debug('Received' + str(data))
if data is None:
continue

try:
data = ws.receive()
logging.debug('Received ' + str(data))
if data is None:
continue

data = json.loads(data)
if data['ts']:
set_timestamp(data['ts'])

except WebSocketError as e:
traceback.print_exc()
mark_for_removal()
break

except Exception as e:
traceback.print_exc(e)

def mark_for_removal():
redis.delete('c:' + HOST)
logging.debug('Marked for removal')

redis.expire('c:' + HOST, stat_key_expire_time)

keylist = redis.keys(my_ip + ':*')
for key in keylist:
local_redis.delete(key)


def get_update():
Expand All @@ -132,8 +142,22 @@ def get_update():

base_key = my_ip + ':' + curr_ts + ':'

pi = local_redis.pipeline(transaction=False)

pi.hgetall(base_key + 'urls')
pi.smembers(base_key + 'hosts')
pi.get(base_key + 'ref')
pi.get(base_key + 'base')

pi.expire(base_key + 'urls', stat_key_expire_time)
pi.expire(base_key + 'hosts', stat_key_expire_time)
pi.expire(base_key + 'ref', stat_key_expire_time)
pi.expire(base_key + 'base', stat_key_expire_time)

result = pi.execute()

# all urls
all_urls = local_redis.hgetall(base_key + 'urls')
all_urls = result[0]

count = 0
min_sec = sys.maxint
Expand All @@ -145,19 +169,25 @@ def get_update():
max_sec = max(sec, max_sec)

# all_hosts
all_hosts = local_redis.smembers(base_key + 'hosts')
all_hosts = result[1]

referrer = result[2]
base = result[3]

referrer = local_redis.get(base_key + 'ref')
if not referrer:
page_url = base
else:
page_url = referrer

referrer_secs = int(all_urls.get(referrer, 0))
page_url_secs = int(all_urls.get(page_url, 0))

return {'urls': count,
'req_ts': curr_ts,
'min_sec': min_sec,
'max_sec': max_sec,
'hosts': list(all_hosts),
'referrer': referrer,
'referrer_secs': referrer_secs
'page_url': page_url,
'page_url_secs': page_url_secs,
}


Expand Down
2 changes: 1 addition & 1 deletion app/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ api_version: '1.21'
vnc_port: 6080
cmd_port: 6082

container_expire_secs: 600
container_expire_secs: 90

queue_expire_secs: 30
remove_expired_secs: 20
Expand Down
25 changes: 15 additions & 10 deletions app/static/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ var sparkline = undefined;
var page_change = false;
var spark_change = false;


var pingsock = undefined;
var fail_count = 0;

// Load supporting scripts
Util.load_scripts(["webutil.js", "base64.js", "websock.js", "des.js",
Expand All @@ -23,8 +23,6 @@ $(function() {
function init_container() {
var params = {"url": url, "ts": curr_ts, "browser": coll, "state": "ping"};

var fail_count = 0;

function send_request() {
var init_url = "/init_browser?" + $.param(params);

Expand Down Expand Up @@ -108,9 +106,6 @@ $(function() {

$("#datetime").click(lose_focus);

init_container();


function update_replay_state() {
var full_url = "/" + coll + "/" + curr_ts + "/" + url;

Expand All @@ -133,14 +128,14 @@ $(function() {
}

function handle_data_update(data) {
if (data.referrer && data.referrer_secs) {
var date = new Date(data.referrer_secs * 1000);
if (data.page_url && data.page_url_secs) {
var date = new Date(data.page_url_secs * 1000);
var date_time = date.toISOString().slice(0, -5).split("T");
//$("#currLabel").html("Loaded <b>" + data.referrer + "</b> from <b>" + url_date + "</b>");
//$("#currLabel").html("Loaded <b>" + data.page_url + "</b> from <b>" + url_date + "</b>");
$(".rel_message").hide();
$("#curr-date").html(date_time[0]);
$("#curr-time").html(date_time[1]);
url = data.referrer;
//url = data.page_url;
if (page_change) {
ping_interval = 10000;
page_change = false;
Expand Down Expand Up @@ -264,6 +259,7 @@ $(function() {
ping_interval = 1000;
page_change = true;
spark_change = true;
fail_count = 0;

// start ping at regular intervals
//ping_id = window.setTimeout(ping, ping_interval);
Expand Down Expand Up @@ -306,6 +302,15 @@ $(function() {
UIresize();
}, 500);
};


// INIT
if (cmd_host && vnc_host) {
console.log("Reentrant!");
establish_ping_sock();
} else {
init_container();
}
});


Expand Down
11 changes: 10 additions & 1 deletion pywb/archivereplayview.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,21 @@ def __call__(self, cdx, skip_hosts, cdx_loader, wbrequest):
sec = timestamp_to_sec(cdx['timestamp'])
referrer = wbrequest.env.get('HTTP_REFERER')

with redisclient.pipeline() as pi:
try:
pi = redisclient.redis.pipeline(transaction=False)

pi.hset(base_key + ':urls', cdx['url'], sec)
pi.sadd(base_key + ':hosts', archive_name)

if referrer and not referrer.endswith('.css'):
pi.set(base_key + ':ref', referrer)
elif not referrer:
pi.set(base_key + ':base', cdx['url'])

pi.execute()
except Exception as e:
import traceback
traceback.print_exc(e)

statusline = str(response.status_code) + ' ' + response.reason

Expand Down
3 changes: 2 additions & 1 deletion pywb/mementoquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ def timegate_query(self, timestamp, url):
r = None
try:
r = self.session.get(full)
r.raise_for_status()
result = r.json()
except Exception as e:
if not r or r.status_code != 404:
if r and r.status_code != 404:
import traceback
traceback.print_exc(e)

Expand Down
3 changes: 0 additions & 3 deletions pywb/redisclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,6 @@ def save_cdx_cache_iter(self, cdx_list, url, ts):
self.redis.expire(full_key, 180)
yield cdx

def pipeline(self):
return redis.utils.pipeline(self.redis)

@staticmethod
def get_url_key_p(ts, url):
key = ts + '/' + canonicalize(url, False)
Expand Down
16 changes: 13 additions & 3 deletions pywb/templates/head_insert.html
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
<script>
window.__wbinfo = {"ts": "{{ cdx.timestamp }}", "sec": "{{ cdx.timestamp | format_ts('%s') }}"};
</script>
<SCRIPT language="JavaScript">

function beacon() {
if (window == window.top) {
document.writeln('<IMG SRC="http://pywb.proxy/beacon/{{ cdx.url }}" HEIGHT="1" WIDTH="1"/>')
}
}

beacon();
</SCRIPT>
<NOSCRIPT>
<IMG SRC="http://pywb.proxy/beacon/{{ cdx.url }}" HEIGHT="1" WIDTH="1"/>
</NOSCRIPT>

0 comments on commit 39c47af

Please sign in to comment.