Skip to content

Commit

Permalink
Add GeoIP service.
Browse files Browse the repository at this point in the history
  • Loading branch information
bsimpson63 authored and spladug committed May 28, 2014
1 parent ca0001a commit 20f57a1
Show file tree
Hide file tree
Showing 5 changed files with 250 additions and 0 deletions.
27 changes: 27 additions & 0 deletions install-reddit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ python-kazoo
python-stripe
python-tinycss2
python-flask
geoip-bin
geoip-database
python-geoip
nodejs
node-less
gettext
Expand Down Expand Up @@ -547,6 +552,28 @@ fi

start sutro

###############################################################################
# geoip service
###############################################################################
if [ ! -f /etc/gunicorn.d/geoip.conf ]; then
cat > /etc/gunicorn.d/geoip.conf <<GEOIP
CONFIG = {
"mode": "wsgi",
"working_dir": "$REDDIT_HOME/reddit/scripts",
"user": "$REDDIT_USER",
"group": "$REDDIT_USER",
"args": (
"--bind=127.0.0.1:5000",
"--workers=1",
"--limit-request-line=8190",
"geoip_service:application",
),
}
GEOIP
fi

service gunicorn start

###############################################################################
# Job Environment
###############################################################################
Expand Down
2 changes: 2 additions & 0 deletions r2/example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,8 @@ words_file = /usr/dict/words
case_sensitive_domains = i.imgur.com, youtube.com
# whether to load reddit private code (a hack until we structure it better)
import_private = false
# location of geoip service
geoip_location = 127.0.0.1:5000


############################################ AUTHENTICATION
Expand Down
110 changes: 110 additions & 0 deletions r2/r2/lib/geoip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/usr/bin/python
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is reddit.
#
# The Original Developer is the Initial Developer. The Initial Developer of
# the Original Code is reddit Inc.
#
# All portions of the code written by reddit are Copyright (c) 2006-2014 reddit
# Inc. All Rights Reserved.
###############################################################################

import datetime
import httplib
import json
import os
import socket
import urllib2

from pylons import g

from r2.lib.cache import sgm
from r2.lib.utils import in_chunks, tup

# If the geoip service has nginx in front of it there is a default limit of 8kb:
# http://wiki.nginx.org/NginxHttpCoreModule#large_client_header_buffers
# >>> len('GET /geoip/' + '+'.join(['255.255.255.255'] * 500) + ' HTTP/1.1')
# 8019
MAX_IPS_PER_GROUP = 500

GEOIP_CACHE_TIME = datetime.timedelta(days=7).total_seconds()

def _location_by_ips(ips):
if not hasattr(g, 'geoip_location'):
g.log.warning("g.geoip_location not set. skipping GeoIP lookup.")
return {}

ret = {}
for batch in in_chunks(ips, MAX_IPS_PER_GROUP):
ip_string = '+'.join(batch)
url = os.path.join(g.geoip_location, 'geoip', ip_string)

try:
response = urllib2.urlopen(url=url, timeout=3)
json_data = response.read()
except (urllib2.URLError, httplib.HTTPException, socket.error) as e:
g.log.warning("Failed to fetch GeoIP information: %r" % e)
continue

try:
ret.update(json.loads(json_data))
except ValueError, e:
g.log.warning("Invalid JSON response for GeoIP lookup: %r" % e)
continue
return ret


def _organization_by_ips(ips):
if not hasattr(g, 'geoip_location'):
g.log.warning("g.geoip_location not set. skipping GeoIP lookup.")
return {}

ip_string = '+'.join(set(ips))
url = os.path.join(g.geoip_location, 'org', ip_string)

try:
response = urllib2.urlopen(url=url, timeout=3)
json_data = response.read()
except urllib2.URLError, e:
g.log.warning("Failed to fetch GeoIP information: %r" % e)
return {}

try:
return json.loads(json_data)
except ValueError, e:
g.log.warning("Invalid JSON response for GeoIP lookup: %r" % e)
return {}


def location_by_ips(ips):
ips, is_single = tup(ips, ret_is_single=True)
location_by_ip = sgm(g.cache, ips, miss_fn=_location_by_ips,
prefix='location_by_ip',
time=GEOIP_CACHE_TIME)
if is_single and location_by_ip:
return location_by_ip[ips[0]]
else:
return location_by_ip


def organization_by_ips(ips):
ips, is_single = tup(ips, ret_is_single=True)
organization_by_ip = sgm(g.cache, ips, miss_fn=_organization_by_ips,
prefix='organization_by_ip',
time=GEOIP_CACHE_TIME)
if is_single and organization_by_ip:
return organization_by_ip[ips[0]]
else:
return organization_by_ip
98 changes: 98 additions & 0 deletions scripts/geoip_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/python
# The contents of this file are subject to the Common Public Attribution
# License Version 1.0. (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
# License Version 1.1, but Sections 14 and 15 have been added to cover use of
# software over a computer network and provide for limited attribution for the
# Original Developer. In addition, Exhibit A has been modified to be consistent
# with Exhibit B.
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
# the specific language governing rights and limitations under the License.
#
# The Original Code is reddit.
#
# The Original Developer is the Initial Developer. The Initial Developer of
# the Original Code is reddit Inc.
#
# All portions of the code written by reddit are Copyright (c) 2006-2014 reddit
# Inc. All Rights Reserved.
###############################################################################
"""
This is a tiny Flask app used for geoip lookups against a maxmind database.
If you are using this service be sure to set `geoip_location` in your ini file.
"""

import json

import GeoIP
from flask import Flask, make_response

application = Flask(__name__)

# SET THESE PATHS TO YOUR MAXMIND GEOIP LEGACY DATABASES
# http://dev.maxmind.com/geoip/legacy/geolite/
COUNTRY_DB_PATH = '/usr/share/GeoIP/GeoIP.dat'
CITY_DB_PATH = '/usr/share/GeoIP/GeoIPCity.dat'
ORG_DB_PATH = '/usr/share/GeoIP/GeoIPOrg.dat'


try:
gc = GeoIP.open(COUNTRY_DB_PATH, GeoIP.GEOIP_MEMORY_CACHE)
except:
gc = None

try:
gi = GeoIP.open(CITY_DB_PATH, GeoIP.GEOIP_MEMORY_CACHE)
except:
gi = None

try:
go = GeoIP.open(ORG_DB_PATH, GeoIP.GEOIP_MEMORY_CACHE)
except:
go = None


def json_response(result):
json_output = json.dumps(result, ensure_ascii=False, encoding='iso-8859-1')
response = make_response(json_output.encode('utf-8'), 200)
response.headers['Content-Type'] = 'application/json; charset=utf-8'
return response


@application.route('/geoip/<ips>')
def get_record(ips):
result = {}
ips = ips.split('+')

if gi:
for ip in ips:
result[ip] = gi.record_by_addr(ip)
elif gc:
for ip in ips:
result[ip] = {
'country_code': gc.country_code_by_addr(ip),
'country_name': gc.country_name_by_addr(ip),
}

return json_response(result)


@application.route('/org/<ips>')
def get_organizations(ips):
result = {}
ips = ips.split('+')

if go:
for ip in ips:
result[ip] = go.org_by_addr(ip)

return json_response(result)


if __name__ == "__main__":
application.run()
13 changes: 13 additions & 0 deletions upstart/reddit-job-update_geoip.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
description "refresh the geoip databases"

manual
task
stop on reddit-stop or runlevel [016]

nice 10

script
. /etc/default/reddit
geoipupdate
service gunicorn reload geoip.conf
end script

0 comments on commit 20f57a1

Please sign in to comment.