Skip to content
This repository has been archived by the owner on May 22, 2022. It is now read-only.

Commit

Permalink
Search providers
Browse files Browse the repository at this point in the history
Separate search logic into provider system. Move existing logic into a
cloudsearch implementation of a search provider, and add a Solr
implementation of a search provider.
  • Loading branch information
kemitche committed May 13, 2015
1 parent 990a583 commit 15d7e93
Show file tree
Hide file tree
Showing 18 changed files with 3,154 additions and 380 deletions.
2 changes: 1 addition & 1 deletion install-reddit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ function set_consumer_count {
}

set_consumer_count log_q 0
set_consumer_count cloudsearch_q 0
set_consumer_count search_q 0
set_consumer_count del_account_q 1
set_consumer_count scraper_q 1
set_consumer_count markread_q 1
Expand Down
5 changes: 4 additions & 1 deletion r2/example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,9 @@ wiki_max_page_separators = 3


############################################ SEARCH
# search provider name
search_provider = cloudsearch

# endpoint for link search
CLOUDSEARCH_SEARCH_API =
# endpoint for link upload
Expand All @@ -419,7 +422,6 @@ CLOUDSEARCH_SUBREDDIT_SEARCH_API =
# endpoint for subreddit upload
CLOUDSEARCH_SUBREDDIT_DOC_API =


############################################ MEMCACHE
num_mc_clients = 5
# core memcache cluster, Things and various other stuff
Expand Down Expand Up @@ -800,3 +802,4 @@ feature_require_https = off
# as well.
feature_give_hsts_grants = off
feature_multireddit_customizations = off

8 changes: 4 additions & 4 deletions r2/r2/controllers/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@
from r2.lib.menus import CommentSortMenu
from r2.lib.captcha import get_iden
from r2.lib.strings import strings
from r2.lib.filters import _force_unicode, _force_utf8, websafe_json, websafe, spaceCompress
from r2.lib.template_helpers import format_html, header_url
from r2.lib.filters import _force_unicode, _force_utf8, websafe_json, websafe, spaceCompress
from r2.lib.db import queries
from r2.lib import media
from r2.lib.db import tdb_cassandra
Expand All @@ -103,7 +103,6 @@
from r2.lib.filters import safemarkdown
from r2.lib.media import str_to_image
from r2.controllers.api_docs import api_doc, api_section
from r2.lib.search import SearchQuery
from r2.controllers.oauth2 import require_oauth2_scope, allow_oauth2_access
from r2.lib.template_helpers import add_sr, get_domain, make_url_protocol_relative
from r2.lib.system_messages import notify_user_added
Expand Down Expand Up @@ -4186,8 +4185,9 @@ def GET_subreddits_by_topic(self, responder, query):
exclude = Subreddit.default_subreddits()

faceting = {"reddit":{"sort":"-sum(text_relevance)", "count":20}}
results = SearchQuery(query, sort="relevance", faceting=faceting, num=0,
syntax="plain").run()
results = g.search.SearchQuery(query, sort="relevance",
faceting=faceting, num=0,
syntax="plain").run()

sr_results = []
for sr, count in results.subreddit_facets:
Expand Down
46 changes: 22 additions & 24 deletions r2/r2/controllers/front.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@
from r2.lib.db import queries
from r2.lib.db.tdb_cassandra import MultiColumnQuery
from r2.lib.strings import strings
from r2.lib.search import (SearchQuery, SubredditSearchQuery, SearchException,
InvalidQuery)
from r2.lib.validator import *
from r2.lib import jsontemplates
from r2.lib import sup
Expand Down Expand Up @@ -851,17 +849,14 @@ def GET_related(self, num, article, after, reverse, count):

query = self.related_replace_regex.sub(self.related_replace_with,
article.title)
query = _force_unicode(query)
query = query[:1024]
query = u"|".join(query.split())
query = u"title:'%s'" % query

rel_range = timedelta(days=3)
start = int(time_module.mktime((article._date - rel_range).utctimetuple()))
end = int(time_module.mktime((article._date + rel_range).utctimetuple()))
nsfw = u"nsfw:0" if not article.is_nsfw else u""
query = u"(and %s timestamp:%s..%s %s)" % (query, start, end, nsfw)
q = SearchQuery(query, raw_sort="-text_relevance", faceting={},
syntax="cloudsearch")
nsfw = article.is_nsfw

q = g.search.get_related_query(query, article, start, end, nsfw)

content = self._search(q, num=num, after=after, reverse=reverse,
count=count)

Expand Down Expand Up @@ -932,8 +927,8 @@ def GET_search_reddits(self, query, reverse, after, count, num):
sort = 'rel1'

if query:
q = SubredditSearchQuery(query, sort=sort, faceting={},
include_over18=include_over18)
q = g.search.SubredditSearchQuery(query, sort=sort, faceting={},
include_over18=include_over18)
content = self._search(q, num=num, reverse=reverse,
after=after, count=count,
skip_deleted_authors=False)
Expand All @@ -958,7 +953,7 @@ def GET_search_reddits(self, query, reverse, after, count, num):
recent=VMenu('t', TimeMenu, remember=False),
restrict_sr=VBoolean('restrict_sr', default=False),
include_facets=VBoolean('include_facets', default=False),
syntax=VOneOf('syntax', options=SearchQuery.known_syntaxes))
syntax=VOneOf('syntax', options=g.search_syntaxes))
@api_doc(api_section.search, supports_rss=True, uses_site=True)
def GET_search(self, query, num, reverse, after, count, sort, recent,
restrict_sr, include_facets, syntax):
Expand All @@ -985,7 +980,7 @@ def GET_search(self, query, num, reverse, after, count, sort, recent,
has_query = query or not isinstance(site, (DefaultSR, AllSR))

if not syntax:
syntax = SearchQuery.default_syntax
syntax = g.search.SearchQuery.default_syntax

# show NSFW to API and RSS users unless obey_over18=true
is_api_or_rss = (c.render_style in API_TYPES
Expand Down Expand Up @@ -1037,15 +1032,16 @@ def GET_search(self, query, num, reverse, after, count, sort, recent,
if num > 0 and has_query:
nav_menus = [SearchSortMenu(default=sort), TimeMenu(default=recent)]
try:
q = SearchQuery(query, site, sort=sort, faceting=faceting,
include_over18=include_over18,
recent=recent, syntax=syntax)
q = g.search.SearchQuery(query, site, sort=sort,
faceting=faceting,
include_over18=include_over18,
recent=recent, syntax=syntax)
content = self._search(q, num=num, after=after, reverse=reverse,
count=count)
converted_data = q.converted_data
subreddit_facets = content.subreddit_facets

except InvalidQuery:
except g.search.InvalidQuery:
g.stats.simple_event('cloudsearch.error.invalidquery')

# Clean the search of characters that might be causing the
Expand All @@ -1055,9 +1051,10 @@ def GET_search(self, query, num, reverse, after, count, sort, recent,
cleaned = re.sub("[^\w\s]+", " ", query)
cleaned = cleaned.lower().strip()

q = SearchQuery(cleaned, site, sort=sort, faceting=faceting,
include_over18=include_over18,
recent=recent)
q = g.search.SearchQuery(cleaned, site, sort=sort,
faceting=faceting,
include_over18=include_over18,
recent=recent)
content = self._search(q, num=num, after=after, reverse=reverse,
count=count)
converted_data = q.converted_data
Expand All @@ -1076,8 +1073,9 @@ def GET_search(self, query, num, reverse, after, count, sort, recent,

# extra search request for subreddit results
if sr_num > 0 and has_query:
sr_q = SubredditSearchQuery(query, sort='rel1', faceting={},
include_over18=include_over18)
sr_q = g.search.SubredditSearchQuery(query, sort='rel1',
faceting={},
include_over18=include_over18)
subreddits = self._search(sr_q, num=sr_num, reverse=reverse,
after=after, count=count, type='sr',
skip_deleted_authors=False)
Expand Down Expand Up @@ -1125,7 +1123,7 @@ def _search(self, query_obj, num, after, reverse, count=0, type=None,

try:
res = listing.listing()
except SearchException + (socket.error,) as e:
except g.search.SearchException + (socket.error,) as e:
return self.search_fail(e)

return res
Expand Down
5 changes: 3 additions & 2 deletions r2/r2/controllers/listingcontroller.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
from r2.lib.db.thing import Query, Merge, Relations
from r2.lib.db import queries
from r2.lib.strings import Score
import r2.lib.search as search
from r2.lib.template_helpers import add_sr
from r2.lib.admin_utils import check_cheating
from r2.lib.csrf import csrf_exempt
Expand All @@ -59,6 +58,7 @@

from api_docs import api_doc, api_section

from pylons import g
from pylons.i18n import _

from datetime import timedelta
Expand All @@ -68,6 +68,7 @@
class ListingController(RedditController):
"""Generalized controller for pages with lists of links."""


# toggle skipping of links based on the users' save/hide/vote preferences
skip = True

Expand Down Expand Up @@ -165,7 +166,7 @@ def builder(self):
builder_cls = self.builder_cls
elif isinstance(self.query_obj, Query):
builder_cls = QueryBuilder
elif isinstance(self.query_obj, search.SearchQuery):
elif isinstance(self.query_obj, g.search.SearchQuery):
builder_cls = SearchBuilder
elif isinstance(self.query_obj, iters):
builder_cls = IDBuilder
Expand Down
3 changes: 1 addition & 2 deletions r2/r2/controllers/reddit_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1798,8 +1798,7 @@ def abort_if_not_modified(self, last_modified, private=True,
abort(304, 'not modified')

def search_fail(self, exception):
from r2.lib.search import SearchException
if isinstance(exception, SearchException + (socket.error,)):
if isinstance(exception, g.search.SearchException + (socket.error,)):
g.log.error("Search Error: %s" % repr(exception))

errpage = pages.RedditError(_("search failed"),
Expand Down
40 changes: 40 additions & 0 deletions r2/r2/lib/app_globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,25 @@


LIVE_CONFIG_NODE = "/config/live"

LINK_SEARCH_SORTS = {
'cloudsearch': {'relevance': '-relevance',
'hot': '-hot2',
'top': '-top',
'new': '-timestamp',
'comments': '-num_comments',},
'solr': {'relevance': 'score desc',
'hot': 'max(hot/45000.0, 1.0) desc',
'top': 'top desc',
'new': 'timestamp desc',
'comments': 'num_comments desc',},
}

SEARCH_SYNTAXES = {
'cloudsearch': ('cloudsearch', 'lucene', 'plain'),
'solr': ('solr', 'plain'),
}

SECRETS_NODE = "/config/secrets"


Expand Down Expand Up @@ -282,6 +301,7 @@ class Globals(object):
'community_email',
'smtp_server',
'events_collector_url',
'search_provider',
],

ConfigValue.choice(ONE=CL_ONE, QUORUM=CL_QUORUM): [
Expand Down Expand Up @@ -966,3 +986,23 @@ def __del__(self):
here.
"""
pass

@property
def search(self):
if getattr(self, 'search_provider', None):
if type(self.search_provider) == str:
self.search_provider = select_provider(self.config,
self.pkg_resources_working_set,
"r2.provider.search",
self.search_provider,
)
return self.search_provider
return None

@property
def search_sorts(self):
return LINK_SEARCH_SORTS[self.config.get('search_provider')]

@property
def search_syntaxes(self):
return SEARCH_SYNTAXES[self.config.get('search_provider')]
8 changes: 3 additions & 5 deletions r2/r2/lib/menus.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@
# Inc. All Rights Reserved.
###############################################################################

from pylons import c, request
from pylons import c, g, request
from pylons.i18n import _, N_

from r2.config import feature
from r2.lib.db import operators
from r2.lib.filters import _force_unicode
from r2.lib.search import sorts as search_sorts
from r2.lib.search import sr_sorts as sr_search_sorts
from r2.lib.strings import StringHandler, plurals
from r2.lib.utils import class_property, query_string, timeago
from r2.lib.wrapped import Styled
Expand Down Expand Up @@ -610,7 +608,7 @@ def make_title(self, attr):
class SearchSortMenu(SortMenu):
"""Sort menu for search pages."""
_default = 'relevance'
mapping = search_sorts
mapping = g.search_sorts
_options = mapping.keys()

@classmethod
Expand All @@ -621,7 +619,7 @@ def operator(cls, sort):
class SubredditSearchSortMenu(SortMenu):
"""Sort menu for subreddit search pages."""
_default = 'relevance'
mapping = sr_search_sorts
mapping = g.search_sorts
_options = mapping.keys()

@classmethod
Expand Down
28 changes: 21 additions & 7 deletions r2/r2/lib/search.py → r2/r2/lib/providers/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,28 @@
# Inc. All Rights Reserved.
###############################################################################

import r2.lib.cloudsearch as cloudsearch

class SearchProvider(object):
"""Provider for search.
"""

InvalidQuery = (cloudsearch.InvalidQuery,)
SearchException = (cloudsearch.CloudSearchHTTPError,)
def InvalidQuery(self):
raise NotImplementedError

SearchQuery = cloudsearch.LinkSearchQuery
SubredditSearchQuery = cloudsearch.SubredditSearchQuery
def SearchException(self):
raise NotImplementedError

sorts = cloudsearch.LinkSearchQuery.sorts_menu_mapping
sr_sorts = cloudsearch.SubredditSearchQuery.sorts_menu_mapping
def Query(self):
raise NotImplementedError

def SubredditSearchQuery(self):
raise NotImplementedError

def sorts(self):
raise NotImplementedError

def run_changed(self):
raise NotImplementedError

def get_related_query(self):
raise NotImplementedError
Loading

0 comments on commit 15d7e93

Please sign in to comment.