Skip to content

Commit

Permalink
Tools for webmasters to monitor their content on reddit (uses Solr
Browse files Browse the repository at this point in the history
for pulling domain information).

Also includes a re-factor of solrsearch.py and its usage, which
should fix bug #179 as a side-effect
  • Loading branch information
ketralnis committed Sep 30, 2008
1 parent 870364b commit 90278ab
Show file tree
Hide file tree
Showing 14 changed files with 378 additions and 283 deletions.
2 changes: 2 additions & 0 deletions config/solr/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,8 @@ CondeNet, Inc. All Rights Reserved.
<field name="hot" type="hotness" indexed="true" stored="true" required="true" reversed="true" />
<field name="controversy" type="sfloat" indexed="true" stored="true" required="true" reversed="true" />
<field name="points" type="sint" indexed="true" stored="true" required="true" reversed="true" />
<field name="spam" type="boolean" indexed="true" stored="true" required="false" />
<field name="deleted" type="boolean" indexed="true" stored="true" required="false" />
<!-- subreddit,link,comment -->
<field name="author_id" type="integer" indexed="true" stored="false" required="false" />
<field name="author" type="string" indexed="true" stored="false" required="false" />
Expand Down
47 changes: 40 additions & 7 deletions r2/r2/config/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from r2.config.environment import load_environment
from r2.config.rewrites import rewrites
from r2.lib.utils import rstrips
from r2.lib.jsontemplates import api_type

#middleware stuff
from r2.lib.html_source import HTMLValidationParser
Expand Down Expand Up @@ -240,7 +241,7 @@ def __call__(self, environ, start_response):


class SubredditMiddleware(object):
sr_pattern = re.compile(r'^/r/([^/]+)')
sr_pattern = re.compile(r'^/r/([^/]{3,20})')

def __init__(self, app):
self.app = app
Expand All @@ -255,18 +256,50 @@ def __call__(self, environ, start_response):
environ['subreddit'] = 'r'
return self.app(environ, start_response)

class DomainListingMiddleware(object):
domain_pattern = re.compile(r'^/domain/(([\w]+\.)+[\w]+)')

def __init__(self, app):
self.app = app

def __call__(self, environ, start_response):
if not environ.has_key('subreddit'):
path = environ['PATH_INFO']
domain = self.domain_pattern.match(path)
if domain:
environ['domain'] = domain.groups()[0]
environ['PATH_INFO'] = self.domain_pattern.sub('', path) or '/'
return self.app(environ, start_response)

class ExtensionMiddleware(object):
ext_pattern = re.compile(r'\.([^/]+)$')

extensions = {'rss' : ('xml', 'text/xml; charset=UTF-8'),
'xml' : ('xml', 'text/xml; charset=UTF-8'),
'js' : ('js', 'text/javascript; charset=UTF-8'),
'wired' : ('wired', 'text/javascript; charset=UTF-8'),
'embed' : ('htmllite', 'text/javascript; charset=UTF-8'),
'mobile' : ('mobile', 'text/html'),
'png' : ('png', 'image/png'),
'css' : ('css', 'text/css'),
'api' : (api_type(), 'application/json; charset=UTF-8'),
'json' : (api_type(), 'application/json; charset=UTF-8'),
'json-html' : (api_type('html'), 'application/json; charset=UTF-8')}

def __init__(self, app):
self.app = app

def __call__(self, environ, start_response):
path = environ['PATH_INFO']
ext = self.ext_pattern.findall(path)
if ext:
environ['extension'] = ext[0]
environ['PATH_INFO'] = self.ext_pattern.sub('', path) or '/'
domain_ext = environ.get('reddit-domain-extension')
for ext, val in self.extensions.iteritems():
if ext == domain_ext or path.endswith(ext):
environ['extension'] = ext
environ['render_style'] = val[0]
environ['content_type'] = val[1]
#strip off the extension
environ['PATH_INFO'] = path[:-(len(ext) + 1)]
break
return self.app(environ, start_response)

class RewriteMiddleware(object):
Expand Down Expand Up @@ -382,11 +415,11 @@ def make_app(global_conf, full_stack=True, **app_conf):
app = ProfilingMiddleware(app)
app = SourceViewMiddleware(app)

app = SubredditMiddleware(app)
app = DomainMiddleware(app)
app = DomainListingMiddleware(app)
app = SubredditMiddleware(app)
app = ExtensionMiddleware(app)


log_path = global_conf.get('log_path')
if log_path:
process_iden = global_conf.get('scgi_port', 'default')
Expand Down
58 changes: 29 additions & 29 deletions r2/r2/controllers/front.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@
from r2.lib.emailer import has_opted_out, Email
from r2.lib.db.operators import desc
from r2.lib.strings import strings
from r2.lib.solrsearch import RelatedSearchQuery, SubredditSearchQuery, LinkSearchQuery
import r2.lib.db.thing as thing
from listingcontroller import ListingController
from pylons import c, request

import random as rand
import re
import time as time_module
from urllib import quote_plus

from admin import admin_profile_query
Expand Down Expand Up @@ -292,6 +294,7 @@ def GET_stats(self):
def GET_related(self, num, article, after, reverse, count):
"""Related page: performs a search using title of article as
the search query."""

title = c.site.name + ((': ' + article.title) if hasattr(article, 'title') else '')

query = self.related_replace_regex.sub(self.related_replace_with,
Expand All @@ -301,24 +304,25 @@ def GET_related(self, num, article, after, reverse, count):
# longer than this are typically ascii art anyway
query = query[0:1023]

num, t, pane = self._search(query, time = 'all',
count = count,
after = after, reverse = reverse, num = num,
ignore = [article._fullname],
types = [Link])
res = LinkInfoPage(link = article, content = pane).render()
return res
q = RelatedSearchQuery(query, ignore = [article._fullname])
num, t, pane = self._search(q,
num = num, after = after, reverse = reverse,
count = count)

return LinkInfoPage(link = article, content = pane).render()

@base_listing
@validate(query = nop('q'))
def GET_search_reddits(self, query, reverse, after, count, num):
"""Search reddits by title and description."""
num, t, spane = self._search(query, num = num, types = [Subreddit],
sort='points desc', time='all',
after = after, reverse = reverse,
# note that 'downs' is a measure of activity on subreddits
q = SubredditSearchQuery(query, sort = 'downs desc',
timerange = 'all')

num, t, spane = self._search(q, num = num, reverse = reverse, after = after,
count = count)

res = SubredditsPage(content=spane,
res = SubredditsPage(content=spane,
prev_search = query,
elapsed_time = t,
num_results = num,
Expand All @@ -327,7 +331,7 @@ def GET_search_reddits(self, query, reverse, after, count, num):

verify_langs_regex = re.compile(r"^[a-z][a-z](,[a-z][a-z])*$")
@base_listing
@validate(query=nop('q'),
@validate(query = nop('q'),
time = VMenu('action', TimeMenu, remember = False),
langs = nop('langs'))
def GET_search(self, query, num, time, reverse, after, count, langs):
Expand All @@ -340,12 +344,12 @@ def GET_search(self, query, num, time, reverse, after, count, langs):
if langs and self.verify_langs_regex.match(langs):
langs = langs.split(',')
else:
langs = None
langs = c.content_langs

num, t, spane = self._search(query, time=time,
num = num, after = after,
reverse = reverse,
count = count, types = [Link])
q = LinkSearchQuery(q = query, timerange = time, langs = langs)

num, t, spane = self._search(q, num = num, after = after, reverse = reverse,
count = count)

if not isinstance(c.site,FakeSubreddit):
my_reddits_link = "/search%s" % query_string({'q': query})
Expand All @@ -365,26 +369,22 @@ def GET_search(self, query, num, time, reverse, after, count, langs):

return res

def _search(self, query = '', time=None,
sort = 'hot desc',
after = None, reverse = False, num = 25,
ignore = None, count=0, types = None,
langs = None):
def _search(self, query_obj, num, after, reverse, count=0):
"""Helper function for interfacing with search. Basically a
thin wrapper for SearchBuilder."""
builder = SearchBuilder(query, num = num,
sort = sort,
after = after, reverse = reverse,
count = count, types = types,
time = time, ignore = ignore,
langs = langs,
builder = SearchBuilder(query_obj,
after = after, num = num, reverse = reverse,
count = count,
wrap = ListingController.builder_wrapper)

listing = LinkListing(builder, show_nums=True)

# have to do it in two steps since total_num and timing are only
# computed after fetch_more
res = listing.listing()
return builder.total_num, builder.timing, res
timing = time_module.time() - builder.start_time

return builder.total_num, timing, res



Expand Down
3 changes: 3 additions & 0 deletions r2/r2/controllers/listingcontroller.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from r2.lib.db import queries
from r2.lib.strings import Score
from r2.lib import organic
from r2.lib.solrsearch import SearchQuery
from r2.lib.utils import iters, check_cheating

from admin import admin_profile_query
Expand Down Expand Up @@ -112,6 +113,8 @@ def builder(self):
builder_cls = self.builder_cls
elif isinstance(self.query_obj, Query):
builder_cls = QueryBuilder
elif isinstance(self.query_obj, SearchQuery):
builder_cls = SearchBuilder
elif isinstance(self.query_obj, iters):
builder_cls = IDBuilder
elif isinstance(self.query_obj, queries.CachedResults):
Expand Down
59 changes: 21 additions & 38 deletions r2/r2/controllers/reddit_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,18 @@ def over18():
return True

def set_subreddit():
sr_name=request.environ.get("subreddit", request.params.get('r'))
#the r parameter gets added by javascript for POST requests so we
#can reference c.site in api.py
sr_name = request.environ.get("subreddit", request.POST.get('r'))
domain = request.environ.get("domain")

if not sr_name or sr_name == Default.name:
if not sr_name:
#check for cnames
sub_domain = request.environ.get('sub_domain')
sr = Subreddit._by_domain(sub_domain) if sub_domain else None
c.site = sr or Default
elif sr_name == 'r':
#reddits
c.site = Sub
else:
try:
Expand All @@ -227,6 +232,10 @@ def set_subreddit():
c.site = Default
redirect_to("/reddits/create?name=%s" % sr_name)

#if we didn't find a subreddit, check for a domain listing
if not sr_name and c.site == Default and domain:
c.site = DomainSR(domain)

if isinstance(c.site, FakeSubreddit):
c.default_sr = True

Expand All @@ -235,42 +244,16 @@ def set_subreddit():
abort(404, "not found")

def set_content_type():
c.extension = request.environ.get('extension') or \
request.environ.get('reddit-domain-extension') or ''
c.render_style = 'html'
if c.extension in ('rss', 'xml'):
c.render_style = 'xml'
c.response_content_type = 'text/xml; charset=UTF-8'
elif c.extension == 'js':
c.render_style = 'js'
c.response_content_type = 'text/javascript; charset=UTF-8'
elif c.extension.startswith('json') or c.extension == "api":
c.response_content_type = 'application/json; charset=UTF-8'
c.response_access_control = 'allow <*>'
if c.extension == 'json-html':
c.render_style = api_type('html')
else:
c.render_style = api_type()
elif c.extension == 'wired':
c.render_style = 'wired'
c.response_content_type = 'text/javascript; charset=UTF-8'
c.response_wrappers.append(utils.to_js)
elif c.extension == 'embed':
c.render_style = 'htmllite'
c.response_content_type = 'text/javascript; charset=UTF-8'
c.response_wrappers.append(utils.to_js)
elif c.extension == 'mobile':
c.render_style = 'mobile'
elif c.extension == 'png':
c.response_content_type = 'image/png'
c.render_style = 'png'
elif c.extension == 'css':
c.response_content_type = 'text/css'
c.render_style = 'css'
#Insert new extentions above this line
elif c.extension not in ('', 'html'):
# request.path already has the extension stripped off of it
redirect_to(request.path + utils.query_string(request.get))
e = request.environ
if e.has_key('extension'):
c.render_style = e['render_style']
c.response_content_type = e['content_type']

ext = e['extension']
if ext == 'api' or ext.startswith('json'):
c.response_access_control = 'allow <*>'
if ext in ('embed', 'wired'):
c.response_wrappers.append(utils.to_js)

def get_browser_langs():
browser_langs = []
Expand Down
3 changes: 2 additions & 1 deletion r2/r2/lib/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ def format_output_url(cls, url, **kw):
u.mk_cname(**kw)

# make sure the extensions agree with the current page
u.set_extension(c.extension)
if c.extension:
u.set_extension(c.extension)

# unparse and encode it un utf8
return _force_unicode(u.unparse()).encode('utf8')
Expand Down
2 changes: 1 addition & 1 deletion r2/r2/lib/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def test_cache(cache):
# a cache that occasionally dumps itself to be used for long-running
# processes
class SelfEmptyingCache(LocalCache):
def __init__(self,max_size=50*1000):
def __init__(self,max_size=100*1000):
self.max_size = max_size

def maybe_reset(self):
Expand Down
10 changes: 10 additions & 0 deletions r2/r2/lib/db/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from r2.lib.db import query_queue
from r2.lib.db.sorts import epoch_seconds
from r2.lib.utils import fetch_things2, worker
from r2.lib.solrsearch import DomainSearchQuery

from datetime import datetime

Expand All @@ -23,6 +24,12 @@ def db_sort(sort):
cls, col = db_sorts[sort]
return cls(col)

search_sort = dict(hot = 'hot desc',
new = 'date desc',
top = 'points desc',
controversial = 'controversy desc',
old = 'date asc')

db_times = dict(all = None,
hour = Thing.c._date >= timeago('1 hour'),
day = Thing.c._date >= timeago('1 day'),
Expand Down Expand Up @@ -176,6 +183,9 @@ def get_links(sr, sort, time):
q._filter(db_times[time])
return make_results(q)

def get_domain_links(domain, sort, time):
return DomainSearchQuery(domain, sort=search_sort[sort], timerange=time)

def user_query(kind, user, sort, time):
"""General profile-page query."""
q = kind._query(kind.c.author_id == user._id,
Expand Down
Loading

0 comments on commit 90278ab

Please sign in to comment.