From dd4d7c3eb3a1e4943dc7ca7c52edf51a5d5810da Mon Sep 17 00:00:00 2001 From: Joshua Lan Date: Mon, 4 Sep 2017 22:24:40 -0400 Subject: [PATCH] =?UTF-8?q?Fix=20#2553:=20Change=20the=20"index=20all=20ex?= =?UTF-8?q?plorations"=20job=20in=20the=20admin=20dashb=E2=80=A6=20(#3831)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix #2553: Change the "index all explorations" job in the admin dashboard to an "index all activities" job * Address review comments * Address review comments * Update docstring to correct parameter name --- core/controllers/collection_editor.py | 3 +- core/controllers/cron.py | 10 +- core/controllers/editor.py | 3 +- core/domain/activity_jobs_one_off.py | 47 +++ core/domain/activity_jobs_one_off_test.py | 91 ++++++ core/domain/collection_jobs_one_off_test.py | 14 + core/domain/collection_services.py | 128 +-------- core/domain/collection_services_test.py | 103 +------ core/domain/exp_jobs_one_off.py | 17 -- core/domain/exp_jobs_one_off_test.py | 54 ---- core/domain/exp_services.py | 163 +---------- core/domain/exp_services_test.py | 135 +-------- core/domain/search_services.py | 301 ++++++++++++++++++++ core/domain/search_services_test.py | 294 +++++++++++++++++++ core/domain/summary_services.py | 5 +- core/jobs_registry.py | 3 +- core/tests/test_utils.py | 17 ++ main_cron.py | 2 +- 18 files changed, 801 insertions(+), 589 deletions(-) create mode 100644 core/domain/activity_jobs_one_off.py create mode 100644 core/domain/activity_jobs_one_off_test.py create mode 100644 core/domain/search_services.py create mode 100644 core/domain/search_services_test.py diff --git a/core/controllers/collection_editor.py b/core/controllers/collection_editor.py index 2e4b9fd0f6ae..781276c885e2 100644 --- a/core/controllers/collection_editor.py +++ b/core/controllers/collection_editor.py @@ -20,6 +20,7 @@ from core.domain import acl_decorators from core.domain import collection_services from core.domain import rights_manager +from core.domain import search_services from core.domain import summary_services from core.platform import models import feconf @@ -203,7 +204,7 @@ def put(self, collection_id): _require_valid_version(version, collection.version) rights_manager.unpublish_collection(self.user, collection_id) - collection_services.delete_documents_from_search_index([ + search_services.delete_collections_from_search_index([ collection_id]) collection_rights = rights_manager.get_collection_rights( diff --git a/core/controllers/cron.py b/core/controllers/cron.py index 45e4e78a118b..9aea3f6c808e 100644 --- a/core/controllers/cron.py +++ b/core/controllers/cron.py @@ -21,8 +21,8 @@ from core import jobs from core.controllers import base from core.domain import acl_decorators +from core.domain import activity_jobs_one_off from core.domain import email_manager -from core.domain import exp_jobs_one_off from core.domain import recommendations_jobs_one_off from core.domain import user_jobs_one_off from core.platform import models @@ -96,14 +96,14 @@ def get(self): job_class.enqueue(job_class.create_new()) -class CronExplorationSearchRankHandler(base.BaseHandler): - """Handler for computing exploration search ranks.""" +class CronActivitySearchRankHandler(base.BaseHandler): + """Handler for computing activity search ranks.""" @acl_decorators.can_perform_cron_tasks def get(self): """Handles GET requests.""" - exp_jobs_one_off.IndexAllExplorationsJobManager.enqueue( - exp_jobs_one_off.IndexAllExplorationsJobManager.create_new()) + activity_jobs_one_off.IndexAllActivitiesJobManager.enqueue( + activity_jobs_one_off.IndexAllActivitiesJobManager.create_new()) class CronMapreduceCleanupHandler(base.BaseHandler): diff --git a/core/controllers/editor.py b/core/controllers/editor.py index 40bb8fc0ac92..0a76a0f295c4 100644 --- a/core/controllers/editor.py +++ b/core/controllers/editor.py @@ -38,6 +38,7 @@ from core.domain import obj_services from core.domain import rights_manager from core.domain import rte_component_registry +from core.domain import search_services from core.domain import stats_services from core.domain import user_services from core.domain import value_generators_domain @@ -409,7 +410,7 @@ def put(self, exploration_id): # Perform the moderator action. if action == 'unpublish_exploration': rights_manager.unpublish_exploration(self.user, exploration_id) - exp_services.delete_documents_from_search_index([ + search_services.delete_explorations_from_search_index([ exploration_id]) else: raise self.InvalidInputException( diff --git a/core/domain/activity_jobs_one_off.py b/core/domain/activity_jobs_one_off.py new file mode 100644 index 000000000000..9f50cbf5e4de --- /dev/null +++ b/core/domain/activity_jobs_one_off.py @@ -0,0 +1,47 @@ +# coding: utf-8 +# +# Copyright 2017 The Oppia Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""One-off jobs for activities.""" + +from core import jobs +from core.domain import search_services +from core.platform import models + +(exp_models, collection_models,) = models.Registry.import_models([ + models.NAMES.exploration, models.NAMES.collection]) + + +class IndexAllActivitiesJobManager(jobs.BaseMapReduceOneOffJobManager): + """Job that indexes all explorations and collections and compute their + ranks. + """ + + @classmethod + def entity_classes_to_map_over(cls): + return [exp_models.ExpSummaryModel, + collection_models.CollectionSummaryModel] + + @staticmethod + def map(item): + if not item.deleted: + if isinstance(item, exp_models.ExpSummaryModel): + search_services.index_exploration_summaries([item]) + else: + search_services.index_collection_summaries([item]) + + @staticmethod + def reduce(key, values): + pass diff --git a/core/domain/activity_jobs_one_off_test.py b/core/domain/activity_jobs_one_off_test.py new file mode 100644 index 000000000000..48bd7bfc7ff1 --- /dev/null +++ b/core/domain/activity_jobs_one_off_test.py @@ -0,0 +1,91 @@ +# coding: utf-8 +# +# Copyright 2017 The Oppia Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from core.domain import activity_jobs_one_off +from core.domain import collection_services +from core.domain import collection_domain +from core.domain import exp_domain +from core.domain import exp_services +from core.domain import rights_manager +from core.domain import search_services +from core.domain import user_services +from core.platform import models +from core.platform.taskqueue import gae_taskqueue_services as taskqueue_services +from core.tests import test_utils + +gae_search_services = models.Registry.import_search_services() + +class OneOffReindexActivitiesJobTest(test_utils.GenericTestBase): + + def setUp(self): + super(OneOffReindexActivitiesJobTest, self).setUp() + + self.signup(self.OWNER_EMAIL, self.OWNER_USERNAME) + self.owner_id = self.get_user_id_from_email(self.OWNER_EMAIL) + self.owner = user_services.UserActionsInfo(self.owner_id) + + explorations = [exp_domain.Exploration.create_default_exploration( + '%s' % i, + title='title %d' % i, + category='category%d' % i + ) for i in xrange(3)] + + for exp in explorations: + exp_services.save_new_exploration(self.owner_id, exp) + rights_manager.publish_exploration(self.owner, exp.id) + + collections = [collection_domain.Collection.create_default_collection( + '%s' % i, + title='title %d' % i, + category='category%d' % i + ) for i in xrange(3, 6)] + + for collection in collections: + collection_services.save_new_collection(self.owner_id, collection) + rights_manager.publish_collection(self.owner, collection.id) + + self.process_and_flush_pending_tasks() + + def test_standard_operation(self): + job_id = ( + activity_jobs_one_off.IndexAllActivitiesJobManager.create_new()) + activity_jobs_one_off.IndexAllActivitiesJobManager.enqueue(job_id) + + self.assertEqual( + self.count_jobs_in_taskqueue( + taskqueue_services.QUEUE_NAME_ONE_OFF_JOBS), 1) + + indexed_docs = [] + + def add_docs_mock(docs, index): + indexed_docs.extend(docs) + self.assertIn(index, (search_services.SEARCH_INDEX_EXPLORATIONS, + search_services.SEARCH_INDEX_COLLECTIONS)) + + add_docs_swap = self.swap( + gae_search_services, 'add_documents_to_index', add_docs_mock) + + with add_docs_swap: + self.process_and_flush_pending_tasks() + + ids = [doc['id'] for doc in indexed_docs] + titles = [doc['title'] for doc in indexed_docs] + categories = [doc['category'] for doc in indexed_docs] + + for index in xrange(5): + self.assertIn("%s" % index, ids) + self.assertIn('title %d' % index, titles) + self.assertIn('category%d' % index, categories) diff --git a/core/domain/collection_jobs_one_off_test.py b/core/domain/collection_jobs_one_off_test.py index 6d95113e90ab..050a9e55811a 100644 --- a/core/domain/collection_jobs_one_off_test.py +++ b/core/domain/collection_jobs_one_off_test.py @@ -131,6 +131,13 @@ def test_migrate_colections_failing_strict_validation(self): 'category': collection_category, }]) + # Save a collection summary object for indexing. The explicit commit + # does not create a summary object, which is needed for the + # job to update the index after updating the collection. + collection_summary = collection_services.compute_summary_of_collection( + model, self.albert_id) + collection_services.save_collection_summary(collection_summary) + # Start migration job on sample collection. job_id = ( collection_jobs_one_off.CollectionMigrationJob.create_new()) @@ -178,6 +185,13 @@ def test_migration_job_migrates_collection_nodes(self): 'category': collection_category, }]) + # Save a collection summary object for indexing. The explicit commit + # does not create a summary object, which is needed for the + # job to update the index after updating the collection. + collection_summary = collection_services.compute_summary_of_collection( + model, self.albert_id) + collection_services.save_collection_summary(collection_summary) + # Check that collection_contents is empty self.assertEqual(model.collection_contents, {}) diff --git a/core/domain/collection_services.py b/core/domain/collection_services.py index 7f090dc9e8fd..309ab1f63521 100644 --- a/core/domain/collection_services.py +++ b/core/domain/collection_services.py @@ -33,6 +33,7 @@ from core.domain import collection_domain from core.domain import exp_services from core.domain import rights_manager +from core.domain import search_services from core.domain import user_services from core.platform import models import feconf @@ -41,7 +42,6 @@ (collection_models, user_models) = models.Registry.import_models([ models.NAMES.collection, models.NAMES.user]) memcache_services = models.Registry.import_memcache_services() -search_services = models.Registry.import_search_services() # This takes additional 'title' and 'category' parameters. CMD_CREATE_NEW = 'create_new' @@ -53,10 +53,6 @@ # search query. MAX_ITERATIONS = 10 -# This is done to prevent the rank hitting 0 too easily. Note that -# negative ranks are disallowed in the Search API. -_DEFAULT_RANK = 20 - def _migrate_collection_contents_to_latest_schema( versioned_collection_contents): @@ -539,7 +535,7 @@ def get_collection_ids_matching_query(query_string, cursor=None): remaining_to_fetch = feconf.SEARCH_RESULTS_PAGE_SIZE - len( returned_collection_ids) - collection_ids, search_cursor = search_collections( + collection_ids, search_cursor = search_services.search_collections( query_string, remaining_to_fetch, cursor=search_cursor) invalid_collection_ids = [] @@ -837,7 +833,7 @@ def delete_collection(committer_id, collection_id, force_deletion=False): memcache_services.delete(collection_memcache_key) # Delete the collection from search. - delete_documents_from_search_index([collection_id]) + search_services.delete_collections_from_search_index([collection_id]) # Delete the summary of the collection (regardless of whether # force_deletion is True or not). @@ -1243,46 +1239,6 @@ def get_next_page_of_all_non_private_commits( ) for entry in results], new_urlsafe_start_cursor, more) -def _should_index(collection): - """Checks if a particular collection should be indexed. - - Args: - collection: Collection. - """ - rights = rights_manager.get_collection_rights(collection.id) - return rights.status != rights_manager.ACTIVITY_STATUS_PRIVATE - - -def _collection_to_search_dict(collection): - """Converts a collection domain object to a search dict. - - Args: - collection: Collection. The collection domain object to be converted. - - Returns: - The search dict of the collection domain object. - """ - doc = { - 'id': collection.id, - 'title': collection.title, - 'category': collection.category, - 'objective': collection.objective, - 'language_code': collection.language_code, - 'tags': collection.tags, - 'rank': _DEFAULT_RANK, - } - return doc - - -def clear_search_index(): - """Clears the search index. - - WARNING: This runs in-request, and may therefore fail if there are too - many entries in the index. - """ - search_services.clear_index(SEARCH_INDEX_COLLECTIONS) - - def index_collections_given_ids(collection_ids): """Adds the given collections to the search index. @@ -1290,77 +1246,7 @@ def index_collections_given_ids(collection_ids): collection_ids: list(str). List of collection ids whose collections are to be indexed. """ - # We pass 'strict=False' so as not to index deleted collections. - collection_list = get_multiple_collections_by_id( - collection_ids, strict=False).values() - search_services.add_documents_to_index([ - _collection_to_search_dict(collection) - for collection in collection_list - if _should_index(collection) - ], SEARCH_INDEX_COLLECTIONS) - - -def patch_collection_search_document(collection_id, update): - """Patches an collection's current search document, with the values - from the 'update' dictionary. - - Args: - collection_id: str. ID of the collection to be patched. - update: dict. Key-value pairs to patch the current search document with. - """ - doc = search_services.get_document_from_index( - collection_id, SEARCH_INDEX_COLLECTIONS) - doc.update(update) - search_services.add_documents_to_index([doc], SEARCH_INDEX_COLLECTIONS) - - -def update_collection_status_in_search(collection_id): - """Updates the status field of a collection in the search index. - - Args: - collection_id: str. ID of the collection. - """ - rights = rights_manager.get_collection_rights(collection_id) - if rights.status == rights_manager.ACTIVITY_STATUS_PRIVATE: - delete_documents_from_search_index([collection_id]) - else: - patch_collection_search_document(rights.id, {}) - - -def delete_documents_from_search_index(collection_ids): - """Removes the given collections from the search index. - - Args: - collection_ids: list(str). List of IDs of the collections to be removed - from the search index. - """ - search_services.delete_documents_from_index( - collection_ids, SEARCH_INDEX_COLLECTIONS) - - -def search_collections(query, limit, sort=None, cursor=None): - """Searches through the available collections. - - Args: - query_string: str. the query string to search for. - sort: str. This indicates how to sort results. This should be a string - of space separated values. Each value should start with a '+' or a - '-' character indicating whether to sort in ascending or descending - order respectively. This character should be followed by a field - name to sort on. When this is None, results are returned based on - their ranking (which is currently set to the same default value - for all collections). - limit: int. the maximum number of results to return. - cursor: str. A cursor, used to get the next page of results. - If there are more documents that match the query than 'limit', this - function will return a cursor to get the next page. - - Returns: - A 2-tuple with the following elements: - - A list of collection ids that match the query. - - A cursor if there are more matching collections to fetch, None - otherwise. If a cursor is returned, it will be a web-safe string - that can be used in URLs. - """ - return search_services.search( - query, SEARCH_INDEX_COLLECTIONS, cursor, limit, sort, ids_only=True) + collection_summaries = get_collection_summaries_matching_ids(collection_ids) + search_services.index_collection_summaries([ + collection_summary for collection_summary in collection_summaries + if collection_summary is not None]) diff --git a/core/domain/collection_services_test.py b/core/domain/collection_services_test.py index 889ae4d7bc51..60d568ab1eda 100644 --- a/core/domain/collection_services_test.py +++ b/core/domain/collection_services_test.py @@ -27,7 +27,7 @@ (collection_models, user_models) = models.Registry.import_models([ models.NAMES.collection, models.NAMES.user]) -search_services = models.Registry.import_search_services() +gae_search_services = models.Registry.import_search_services() transaction_services = models.Registry.import_transaction_services() @@ -591,7 +591,9 @@ def mock_delete_docs(doc_ids, index): self.assertEqual(doc_ids, [self.COLLECTION_ID]) delete_docs_swap = self.swap( - search_services, 'delete_documents_from_index', mock_delete_docs) + gae_search_services, + 'delete_documents_from_index', + mock_delete_docs) with delete_docs_swap: collection_services.delete_collection( @@ -1586,14 +1588,6 @@ def test_paging_with_no_commits(self): class CollectionSearchTests(CollectionServicesUnitTests): """Test collection search.""" - def test_demo_collections_are_added_to_search_index(self): - results = collection_services.search_collections('Welcome', 2)[0] - self.assertEqual(results, []) - - collection_services.load_demo('0') - results = collection_services.search_collections('Welcome', 2)[0] - self.assertEqual(results, ['0']) - def test_index_collections_given_ids(self): all_collection_ids = ['id0', 'id1', 'id2', 'id3', 'id4'] expected_collection_ids = all_collection_ids[:-1] @@ -1616,7 +1610,7 @@ def mock_add_documents_to_index(docs, index): return ids add_docs_counter = test_utils.CallCounter(mock_add_documents_to_index) - add_docs_swap = self.swap(search_services, + add_docs_swap = self.swap(gae_search_services, 'add_documents_to_index', add_docs_counter) @@ -1638,93 +1632,6 @@ def mock_add_documents_to_index(docs, index): self.assertEqual(add_docs_counter.times_called, 1) - def test_patch_collection_search_document(self): - - def mock_get_doc(doc_id, index): - self.assertEqual(doc_id, self.COLLECTION_ID) - self.assertEqual( - index, collection_services.SEARCH_INDEX_COLLECTIONS) - return {'a': 'b', 'c': 'd'} - - def mock_add_docs(docs, index): - self.assertEqual( - index, collection_services.SEARCH_INDEX_COLLECTIONS) - self.assertEqual(docs, [{'a': 'b', 'c': 'e', 'f': 'g'}]) - - get_doc_swap = self.swap( - search_services, 'get_document_from_index', mock_get_doc) - - add_docs_counter = test_utils.CallCounter(mock_add_docs) - add_docs_swap = self.swap( - search_services, 'add_documents_to_index', add_docs_counter) - - with get_doc_swap, add_docs_swap: - patch = {'c': 'e', 'f': 'g'} - collection_services.patch_collection_search_document( - self.COLLECTION_ID, patch) - - self.assertEqual(add_docs_counter.times_called, 1) - - def test_update_private_collection_status_in_search(self): - - def mock_delete_docs(ids, index): - self.assertEqual(ids, [self.COLLECTION_ID]) - self.assertEqual( - index, collection_services.SEARCH_INDEX_COLLECTIONS) - - def mock_get_rights(unused_collection_id): - return rights_manager.ActivityRights( - self.COLLECTION_ID, - [self.owner_id], [self.editor_id], [self.viewer_id], - status=rights_manager.ACTIVITY_STATUS_PRIVATE - ) - - delete_docs_counter = test_utils.CallCounter(mock_delete_docs) - - delete_docs_swap = self.swap( - search_services, 'delete_documents_from_index', - delete_docs_counter) - get_rights_swap = self.swap( - rights_manager, 'get_collection_rights', mock_get_rights) - - with get_rights_swap, delete_docs_swap: - collection_services.update_collection_status_in_search( - self.COLLECTION_ID) - - self.assertEqual(delete_docs_counter.times_called, 1) - - def test_search_collections(self): - expected_query_string = 'a query string' - expected_cursor = 'cursor' - expected_sort = 'title' - expected_limit = 30 - expected_result_cursor = 'rcursor' - doc_ids = ['id1', 'id2'] - - def mock_search(query_string, index, cursor=None, limit=20, sort='', - ids_only=False, retries=3): - self.assertEqual(query_string, expected_query_string) - self.assertEqual( - index, collection_services.SEARCH_INDEX_COLLECTIONS) - self.assertEqual(cursor, expected_cursor) - self.assertEqual(limit, expected_limit) - self.assertEqual(sort, expected_sort) - self.assertEqual(ids_only, True) - self.assertEqual(retries, 3) - - return doc_ids, expected_result_cursor - - with self.swap(search_services, 'search', mock_search): - result, cursor = collection_services.search_collections( - expected_query_string, - expected_limit, - sort=expected_sort, - cursor=expected_cursor, - ) - - self.assertEqual(cursor, expected_result_cursor) - self.assertEqual(result, doc_ids) - class CollectionSummaryTests(CollectionServicesUnitTests): """Test collection summaries.""" diff --git a/core/domain/exp_jobs_one_off.py b/core/domain/exp_jobs_one_off.py index 4c9811f40252..2eeb895a26f3 100644 --- a/core/domain/exp_jobs_one_off.py +++ b/core/domain/exp_jobs_one_off.py @@ -144,23 +144,6 @@ def reduce(exp_id, stringified_commit_times_msecs): first_published_msec) -class IndexAllExplorationsJobManager(jobs.BaseMapReduceOneOffJobManager): - """One-off job that indexes all explorations and computes their ranks.""" - - @classmethod - def entity_classes_to_map_over(cls): - return [exp_models.ExplorationModel] - - @staticmethod - def map(item): - if not item.deleted: - exp_services.index_explorations_given_ids([item.id]) - - @staticmethod - def reduce(key, values): - pass - - class ExplorationValidityJobManager(jobs.BaseMapReduceOneOffJobManager): """Job that checks that all explorations have appropriate validation statuses. diff --git a/core/domain/exp_jobs_one_off_test.py b/core/domain/exp_jobs_one_off_test.py index 23fad6693fa2..bc05bbe30f80 100644 --- a/core/domain/exp_jobs_one_off_test.py +++ b/core/domain/exp_jobs_one_off_test.py @@ -23,7 +23,6 @@ from core.domain import rights_manager from core.domain import user_services from core.platform import models -from core.platform.taskqueue import gae_taskqueue_services as taskqueue_services from core.tests import test_utils import feconf import utils @@ -550,59 +549,6 @@ def test_nonhuman_committers_not_counted(self): exploration_summary.contributors_summary) -class OneOffReindexExplorationsJobTest(test_utils.GenericTestBase): - - EXP_ID = 'exp_id' - - def setUp(self): - super(OneOffReindexExplorationsJobTest, self).setUp() - - self.signup(self.OWNER_EMAIL, self.OWNER_USERNAME) - self.owner_id = self.get_user_id_from_email(self.OWNER_EMAIL) - self.owner = user_services.UserActionsInfo(self.owner_id) - - explorations = [exp_domain.Exploration.create_default_exploration( - '%s%s' % (self.EXP_ID, i), - title='title %d' % i, - category='category%d' % i - ) for i in xrange(5)] - - for exp in explorations: - exp_services.save_new_exploration(self.owner_id, exp) - rights_manager.publish_exploration(self.owner, exp.id) - - self.process_and_flush_pending_tasks() - - def test_standard_operation(self): - job_id = (exp_jobs_one_off.IndexAllExplorationsJobManager.create_new()) - exp_jobs_one_off.IndexAllExplorationsJobManager.enqueue(job_id) - - self.assertEqual( - self.count_jobs_in_taskqueue( - taskqueue_services.QUEUE_NAME_ONE_OFF_JOBS), 1) - - indexed_docs = [] - - def add_docs_mock(docs, index): - indexed_docs.extend(docs) - self.assertEqual(index, exp_services.SEARCH_INDEX_EXPLORATIONS) - - add_docs_swap = self.swap( - search_services, 'add_documents_to_index', add_docs_mock) - - with add_docs_swap: - self.process_and_flush_pending_tasks() - - ids = [doc['id'] for doc in indexed_docs] - titles = [doc['title'] for doc in indexed_docs] - categories = [doc['category'] for doc in indexed_docs] - - for index in xrange(5): - self.assertIn("%s%s" % (self.EXP_ID, index), ids) - self.assertIn('title %d' % index, titles) - self.assertIn('category%d' % index, categories) - - class ExplorationMigrationJobTest(test_utils.GenericTestBase): ALBERT_EMAIL = 'albert@example.com' diff --git a/core/domain/exp_services.py b/core/domain/exp_services.py index bce69c137b79..2f9f9f6864f5 100644 --- a/core/domain/exp_services.py +++ b/core/domain/exp_services.py @@ -39,13 +39,13 @@ from core.domain import feedback_services from core.domain import fs_domain from core.domain import rights_manager +from core.domain import search_services from core.domain import user_services from core.platform import models import feconf import utils memcache_services = models.Registry.import_memcache_services() -search_services = models.Registry.import_search_services() taskqueue_services = models.Registry.import_taskqueue_services() (exp_models, feedback_models, user_models) = models.Registry.import_models([ models.NAMES.exploration, models.NAMES.feedback, models.NAMES.user @@ -61,10 +61,6 @@ # search query. MAX_ITERATIONS = 10 -# This is done to prevent the rank hitting 0 too easily. Note that -# negative ranks are disallowed in the Search API. -_DEFAULT_RANK = 20 - def _migrate_states_schema(versioned_exploration_states): """Holds the responsibility of performing a step-by-step, sequential update @@ -441,7 +437,7 @@ def get_exploration_ids_matching_query(query_string, cursor=None): remaining_to_fetch = feconf.SEARCH_RESULTS_PAGE_SIZE - len( returned_exploration_ids) - exp_ids, search_cursor = search_explorations( + exp_ids, search_cursor = search_services.search_explorations( query_string, remaining_to_fetch, cursor=search_cursor) invalid_exp_ids = [] @@ -923,7 +919,7 @@ def delete_exploration(committer_id, exploration_id, force_deletion=False): memcache_services.delete(exploration_memcache_key) # Delete the exploration from search. - delete_documents_from_search_index([exploration_id]) + search_services.delete_explorations_from_search_index([exploration_id]) # Delete the exploration summary, regardless of whether or not # force_deletion is True. @@ -1517,17 +1513,6 @@ def get_next_page_of_all_non_private_commits( ) for entry in results], new_urlsafe_start_cursor, more) -def _should_index(exp): - """Returns whether the given exploration should be indexed for future - search queries. - - Args: - exp: Exploration domain object. - """ - rights = rights_manager.get_exploration_rights(exp.id) - return rights.status != rights_manager.ACTIVITY_STATUS_PRIVATE - - def get_number_of_ratings(ratings): """Gets the total number of ratings represented by the given ratings object. @@ -1591,35 +1576,7 @@ def get_scaled_average_rating(ratings): wilson_score_lower_bound = (a - b)/(1 + z**2/n) return 1 + 4 * wilson_score_lower_bound - -def get_search_rank_from_exp_summary(exp_summary): - """Returns an integer determining the document's rank in search. - - Featured explorations get a ranking bump, and so do explorations that - have been more recently updated. Good ratings will increase the ranking - and bad ones will lower it. - - Args: - exp_summary: ExplorationSummary. ExplorationSummary domain object. - - Returns: - int. Document's rank in search. - """ - # TODO(sll): Improve this calculation. - rating_weightings = {'1': -5, '2': -2, '3': 2, '4': 5, '5': 10} - - rank = _DEFAULT_RANK - if exp_summary.ratings: - for rating_value in exp_summary.ratings: - rank += ( - exp_summary.ratings[rating_value] * - rating_weightings[rating_value]) - - # Ranks must be non-negative. - return max(rank, 0) - - -def get_search_rank(exp_id): +def get_exploration_search_rank(exp_id): """Returns the search rank. Args: @@ -1629,39 +1586,7 @@ def get_search_rank(exp_id): int. The rank of the exploration. """ exp_summary = get_exploration_summary_by_id(exp_id) - return get_search_rank_from_exp_summary(exp_summary) - - -def _exp_to_search_dict(exp): - """Updates the dict to be returned, whether the given exploration is to - be indexed for further queries or not. - - Args: - exp: Exploration. Exploration domain object. - - Returns: - dict. The representation of the given exploration, in a form that can - be used by the search index. - """ - doc = { - 'id': exp.id, - 'language_code': exp.language_code, - 'title': exp.title, - 'category': exp.category, - 'tags': exp.tags, - 'blurb': exp.blurb, - 'objective': exp.objective, - 'author_notes': exp.author_notes, - 'rank': get_search_rank(exp.id), - } - return doc - - -def clear_search_index(): - """WARNING: This runs in-request, and may therefore fail if there are too - many entries in the index. - """ - search_services.clear_index(SEARCH_INDEX_EXPLORATIONS) + return search_services.get_search_rank_from_exp_summary(exp_summary) def index_explorations_given_ids(exp_ids): @@ -1670,80 +1595,10 @@ def index_explorations_given_ids(exp_ids): Args: exp_ids: list(str). List of ids of the explorations to be indexed. """ - # We pass 'strict=False' so as not to index deleted explorations. - exploration_models = get_multiple_explorations_by_id(exp_ids, strict=False) - search_services.add_documents_to_index([ - _exp_to_search_dict(exp) for exp in exploration_models.values() - if _should_index(exp) - ], SEARCH_INDEX_EXPLORATIONS) - - -def patch_exploration_search_document(exp_id, update): - """Patches an exploration's current search document, with the values - from the 'update' dictionary. - - Args: - exp_id: str. The id of the exploration to be patched. - update: dict. Key-value pairs to patch the exploration's search - document with. - """ - doc = search_services.get_document_from_index( - exp_id, SEARCH_INDEX_EXPLORATIONS) - doc.update(update) - search_services.add_documents_to_index([doc], SEARCH_INDEX_EXPLORATIONS) - - -def update_exploration_status_in_search(exp_id): - """Updates the exploration status in its search doc. - - Args: - exp_id: str. The id of the exploration whose status is to be - updated. - """ - rights = rights_manager.get_exploration_rights(exp_id) - if rights.status == rights_manager.ACTIVITY_STATUS_PRIVATE: - delete_documents_from_search_index([exp_id]) - else: - patch_exploration_search_document(rights.id, {}) - - -def delete_documents_from_search_index(exploration_ids): - """Deletes the documents corresponding to these exploration_ids from the - search index. - - Args: - exploration_ids: list(str). A list of exploration ids whose - documents are to be deleted from the search index. - """ - search_services.delete_documents_from_index( - exploration_ids, SEARCH_INDEX_EXPLORATIONS) - - -def search_explorations(query, limit, sort=None, cursor=None): - """Searches through the available explorations. - - Args: - query_string: str. The query string to search for. - limit: int. The maximum number of results to return. - sort: str. A string indicating how to sort results. This should be a - string of space separated values. Each value should start with a - '+' or a '-' character indicating whether to sort in ascending or - descending order respectively. This character should be followed by - a field name to sort on. When this is None, results are based on - 'rank'. See get_search_rank to see how rank is determined. - cursor: str or None. A cursor, used to get the next page of results. If - there are more documents that match the query than 'limit', this - function will return a cursor to get the next page. - - Returns: - tuple. A 2-tuple consisting of: - - list(str). A list of exploration ids that match the query. - - str or None. A cursor if there are more matching explorations to - fetch, None otherwise. If a cursor is returned, it will be a - web-safe string that can be used in URLs. - """ - return search_services.search( - query, SEARCH_INDEX_EXPLORATIONS, cursor, limit, sort, ids_only=True) + exploration_summaries = get_exploration_summaries_matching_ids(exp_ids) + search_services.index_exploration_summaries([ + exploration_summary for exploration_summary in exploration_summaries + if exploration_summary is not None]) def _is_suggestion_valid(thread_id, exploration_id): diff --git a/core/domain/exp_services_test.py b/core/domain/exp_services_test.py index 22f7a3b19560..7edc9b0f4ca5 100644 --- a/core/domain/exp_services_test.py +++ b/core/domain/exp_services_test.py @@ -28,6 +28,7 @@ from core.domain import param_domain from core.domain import rating_services from core.domain import rights_manager +from core.domain import search_services from core.domain import user_services from core.platform import models from core.tests import test_utils @@ -1803,14 +1804,6 @@ class ExplorationSearchTests(ExplorationServicesUnitTests): USER_ID_1 = 'user_1' USER_ID_2 = 'user_2' - def test_demo_explorations_are_added_to_search_index(self): - results, _ = exp_services.search_explorations('Welcome', 2) - self.assertEqual(results, []) - - exp_services.load_demo('0') - results, _ = exp_services.search_explorations('Welcome', 2) - self.assertEqual(results, ['0']) - def test_index_explorations_given_ids(self): all_exp_ids = ['id0', 'id1', 'id2', 'id3', 'id4'] expected_exp_ids = all_exp_ids[:-1] @@ -1853,87 +1846,6 @@ def mock_add_documents_to_index(docs, index): self.assertEqual(add_docs_counter.times_called, 1) - def test_patch_exploration_search_document(self): - - def mock_get_doc(doc_id, index): - self.assertEqual(doc_id, self.EXP_ID) - self.assertEqual(index, exp_services.SEARCH_INDEX_EXPLORATIONS) - return {'a': 'b', 'c': 'd'} - - def mock_add_docs(docs, index): - self.assertEqual(index, exp_services.SEARCH_INDEX_EXPLORATIONS) - self.assertEqual(docs, [{'a': 'b', 'c': 'e', 'f': 'g'}]) - - get_doc_swap = self.swap( - search_services, 'get_document_from_index', mock_get_doc) - - add_docs_counter = test_utils.CallCounter(mock_add_docs) - add_docs_swap = self.swap( - search_services, 'add_documents_to_index', add_docs_counter) - - with get_doc_swap, add_docs_swap: - patch = {'c': 'e', 'f': 'g'} - exp_services.patch_exploration_search_document(self.EXP_ID, patch) - - self.assertEqual(add_docs_counter.times_called, 1) - - def test_update_private_exploration_status_in_search(self): - - def mock_delete_docs(ids, index): - self.assertEqual(ids, [self.EXP_ID]) - self.assertEqual(index, exp_services.SEARCH_INDEX_EXPLORATIONS) - - def mock_get_rights(unused_exp_id): - return rights_manager.ActivityRights( - self.EXP_ID, - [self.owner_id], [self.editor_id], [self.viewer_id], - status=rights_manager.ACTIVITY_STATUS_PRIVATE - ) - - delete_docs_counter = test_utils.CallCounter(mock_delete_docs) - - delete_docs_swap = self.swap( - search_services, 'delete_documents_from_index', - delete_docs_counter) - get_rights_swap = self.swap( - rights_manager, 'get_exploration_rights', mock_get_rights) - - with get_rights_swap, delete_docs_swap: - exp_services.update_exploration_status_in_search(self.EXP_ID) - - self.assertEqual(delete_docs_counter.times_called, 1) - - def test_search_explorations(self): - expected_query_string = 'a query string' - expected_cursor = 'cursor' - expected_sort = 'title' - expected_limit = 30 - expected_result_cursor = 'rcursor' - doc_ids = ['id1', 'id2'] - - def mock_search(query_string, index, cursor=None, limit=20, sort='', - ids_only=False, retries=3): - self.assertEqual(query_string, expected_query_string) - self.assertEqual(index, exp_services.SEARCH_INDEX_EXPLORATIONS) - self.assertEqual(cursor, expected_cursor) - self.assertEqual(limit, expected_limit) - self.assertEqual(sort, expected_sort) - self.assertEqual(ids_only, True) - self.assertEqual(retries, 3) - - return doc_ids, expected_result_cursor - - with self.swap(search_services, 'search', mock_search): - result, cursor = exp_services.search_explorations( - expected_query_string, - expected_limit, - sort=expected_sort, - cursor=expected_cursor, - ) - - self.assertEqual(cursor, expected_result_cursor) - self.assertEqual(result, doc_ids) - def test_get_number_of_ratings(self): self.save_new_valid_exploration(self.EXP_ID, self.owner_id) exp = exp_services.get_exploration_summary_by_id(self.EXP_ID) @@ -2000,51 +1912,6 @@ def test_get_lower_bound_wilson_rating_from_exp_summary(self): 2.056191454757, places=4) - def test_get_search_rank(self): - self.save_new_valid_exploration(self.EXP_ID, self.owner_id) - - base_search_rank = 20 - - self.assertEqual( - exp_services.get_search_rank(self.EXP_ID), base_search_rank) - - rights_manager.publish_exploration(self.owner, self.EXP_ID) - self.assertEqual( - exp_services.get_search_rank(self.EXP_ID), base_search_rank) - - rating_services.assign_rating_to_exploration( - self.owner_id, self.EXP_ID, 5) - self.assertEqual( - exp_services.get_search_rank(self.EXP_ID), base_search_rank + 10) - - rating_services.assign_rating_to_exploration( - self.user_id_admin, self.EXP_ID, 2) - self.assertEqual( - exp_services.get_search_rank(self.EXP_ID), base_search_rank + 8) - - def test_search_ranks_cannot_be_negative(self): - self.save_new_valid_exploration(self.EXP_ID, self.owner_id) - - base_search_rank = 20 - - self.assertEqual( - exp_services.get_search_rank(self.EXP_ID), base_search_rank) - - # A user can (down-)rate an exploration at most once. - for i in xrange(50): - rating_services.assign_rating_to_exploration( - 'user_id_1', self.EXP_ID, 1) - self.assertEqual( - exp_services.get_search_rank(self.EXP_ID), base_search_rank - 5) - - for i in xrange(50): - rating_services.assign_rating_to_exploration( - 'user_id_%s' % i, self.EXP_ID, 1) - - # The rank will be at least 0. - self.assertEqual(exp_services.get_search_rank(self.EXP_ID), 0) - - class ExplorationSummaryTests(ExplorationServicesUnitTests): """Test exploration summaries.""" ALBERT_EMAIL = 'albert@example.com' diff --git a/core/domain/search_services.py b/core/domain/search_services.py new file mode 100644 index 000000000000..51831978d8d6 --- /dev/null +++ b/core/domain/search_services.py @@ -0,0 +1,301 @@ +# coding: utf-8 +# +# Copyright 2017 The Oppia Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Commands for operating on the search status of activities.""" + +from core.domain import rights_manager +from core.platform import models + +search_services = models.Registry.import_search_services() + +# Name for the exploration search index. +SEARCH_INDEX_EXPLORATIONS = 'explorations' + +# Name for the collection search index. +SEARCH_INDEX_COLLECTIONS = 'collections' + +# This is done to prevent the rank hitting 0 too easily. Note that +# negative ranks are disallowed in the Search API. +_DEFAULT_RANK = 20 + +def index_exploration_summaries(exp_summaries): + """Adds the explorations to the search index. + + Args: + exp_summaries: list(ExpSummaryModel). List of Exp Summary domain + objects to be indexed. + """ + search_services.add_documents_to_index([ + _exp_summary_to_search_dict(exp_summary) + for exp_summary in exp_summaries + if _should_index_exploration(exp_summary) + ], SEARCH_INDEX_EXPLORATIONS) + + +def _exp_summary_to_search_dict(exp_summary): + """Updates the dict to be returned, whether the given exploration is to + be indexed for further queries or not. + + Args: + exp_summary: ExpSummaryModel. ExplorationSummary domain object. + + Returns: + dict. The representation of the given exploration, in a form that can + be used by the search index. + """ + doc = { + 'id': exp_summary.id, + 'language_code': exp_summary.language_code, + 'title': exp_summary.title, + 'category': exp_summary.category, + 'tags': exp_summary.tags, + 'objective': exp_summary.objective, + 'rank': get_search_rank_from_exp_summary(exp_summary), + } + return doc + + +def _should_index_exploration(exp_summary): + """Returns whether the given exploration should be indexed for future + search queries. + + Args: + exp_summary: ExpSummaryModel. ExplorationSummary domain object. + """ + rights = rights_manager.get_exploration_rights(exp_summary.id) + return rights.status != rights_manager.ACTIVITY_STATUS_PRIVATE + + +def get_search_rank_from_exp_summary(exp_summary): + """Returns an integer determining the document's rank in search. + + Featured explorations get a ranking bump, and so do explorations that + have been more recently updated. Good ratings will increase the ranking + and bad ones will lower it. + + Args: + exp_summary: ExplorationSummary. ExplorationSummary domain object. + + Returns: + int. Document's rank in search. + """ + # TODO(sll): Improve this calculation. + rating_weightings = {'1': -5, '2': -2, '3': 2, '4': 5, '5': 10} + + rank = _DEFAULT_RANK + if exp_summary.ratings: + for rating_value in exp_summary.ratings: + rank += ( + exp_summary.ratings[rating_value] * + rating_weightings[rating_value]) + + # Ranks must be non-negative. + return max(rank, 0) + + +def index_collection_summaries(collection_summaries): + """Adds the collections to the search index. + + Args: + collection_summaries: list(CollectionSummaryModel). List of + Collection Summary domain objects to be indexed. + """ + search_services.add_documents_to_index([ + _collection_summary_to_search_dict(collection_summary) + for collection_summary in collection_summaries + if _should_index_collection(collection_summary) + ], SEARCH_INDEX_COLLECTIONS) + + +def update_exploration_status_in_search(exp_id): + """Updates the exploration status in its search doc. + + Args: + exp_id: str. The id of the exploration whose status is to be + updated. + """ + rights = rights_manager.get_exploration_rights(exp_id) + if rights.status == rights_manager.ACTIVITY_STATUS_PRIVATE: + delete_explorations_from_search_index([exp_id]) + else: + patch_exploration_search_document(rights.id, {}) + + +def _collection_summary_to_search_dict(collection_summary): + """Converts a collection domain object to a search dict. + + Args: + collection_summary: CollectionSummaryModel. The collection + summary object to be converted. + + Returns: + dict. The search dict of the collection domain object. + """ + doc = { + 'id': collection_summary.id, + 'title': collection_summary.title, + 'category': collection_summary.category, + 'objective': collection_summary.objective, + 'language_code': collection_summary.language_code, + 'tags': collection_summary.tags, + 'rank': _DEFAULT_RANK, + } + return doc + + +def _should_index_collection(collection): + """Checks if a particular collection should be indexed. + + Args: + collection: CollectionSummaryModel. + """ + rights = rights_manager.get_collection_rights(collection.id) + return rights.status != rights_manager.ACTIVITY_STATUS_PRIVATE + + +def search_explorations(query, limit, sort=None, cursor=None): + """Searches through the available explorations. + + Args: + query_string: str or None. The query string to search for. + limit: int. The maximum number of results to return. + sort: str. A string indicating how to sort results. This should be a + string of space separated values. Each value should start with a + '+' or a '-' character indicating whether to sort in ascending or + descending order respectively. This character should be followed by + a field name to sort on. When this is None, results are based on + 'rank'. get_search_rank_from_exp_summary to see how + rank is determined. + cursor: str or None. A cursor, used to get the next page of results. If + there are more documents that match the query than 'limit', this + function will return a cursor to get the next page. + + Returns: + tuple. A 2-tuple consisting of: + - list(str). A list of exploration ids that match the query. + - str or None. A cursor if there are more matching explorations to + fetch, None otherwise. If a cursor is returned, it will be a + web-safe string that can be used in URLs. + """ + return search_services.search( + query, SEARCH_INDEX_EXPLORATIONS, cursor, limit, sort, ids_only=True) + + +def delete_explorations_from_search_index(exploration_ids): + """Deletes the documents corresponding to these exploration_ids from the + search index. + + Args: + exploration_ids: list(str). A list of exploration ids whose + documents are to be deleted from the search index. + """ + search_services.delete_documents_from_index( + exploration_ids, SEARCH_INDEX_EXPLORATIONS) + + +def patch_exploration_search_document(exp_id, update): + """Patches an exploration's current search document, with the values + from the 'update' dictionary. + + Args: + exp_id: str. The id of the exploration to be patched. + update: dict. Key-value pairs to patch the exploration's search + document with. + """ + doc = search_services.get_document_from_index( + exp_id, SEARCH_INDEX_EXPLORATIONS) + doc.update(update) + search_services.add_documents_to_index([doc], SEARCH_INDEX_EXPLORATIONS) + + +def clear_exploration_search_index(): + """WARNING: This runs in-request, and may therefore fail if there are too + many entries in the index. + """ + search_services.clear_index(SEARCH_INDEX_EXPLORATIONS) + +def search_collections(query, limit, sort=None, cursor=None): + """Searches through the available collections. + + Args: + query_string: str or None. the query string to search for. + limit: int. the maximum number of results to return. + sort: str. This indicates how to sort results. This should be a string + of space separated values. Each value should start with a '+' or a + '-' character indicating whether to sort in ascending or descending + order respectively. This character should be followed by a field + name to sort on. When this is None, results are returned based on + their ranking (which is currently set to the same default value + for all collections). + cursor: str or None. A cursor, used to get the next page of results. + If there are more documents that match the query than 'limit', this + function will return a cursor to get the next page. + + Returns: + A 2-tuple with the following elements: + - A list of collection ids that match the query. + - A cursor if there are more matching collections to fetch, None + otherwise. If a cursor is returned, it will be a web-safe string + that can be used in URLs. + """ + return search_services.search( + query, SEARCH_INDEX_COLLECTIONS, cursor, limit, sort, ids_only=True) + + +def delete_collections_from_search_index(collection_ids): + """Removes the given collections from the search index. + + Args: + collection_ids: list(str). List of IDs of the collections to be removed + from the search index. + """ + search_services.delete_documents_from_index( + collection_ids, SEARCH_INDEX_COLLECTIONS) + + +def patch_collection_search_document(collection_id, update): + """Patches an collection's current search document, with the values + from the 'update' dictionary. + + Args: + collection_id: str. ID of the collection to be patched. + update: dict. Key-value pairs to patch the current search document with. + """ + doc = search_services.get_document_from_index( + collection_id, SEARCH_INDEX_COLLECTIONS) + doc.update(update) + search_services.add_documents_to_index([doc], SEARCH_INDEX_COLLECTIONS) + + +def clear_collection_search_index(): + """Clears the search index. + + WARNING: This runs in-request, and may therefore fail if there are too + many entries in the index. + """ + search_services.clear_index(SEARCH_INDEX_COLLECTIONS) + +def update_collection_status_in_search(collection_id): + """Updates the status field of a collection in the search index. + + Args: + collection_id: str. ID of the collection. + """ + rights = rights_manager.get_collection_rights(collection_id) + if rights.status == rights_manager.ACTIVITY_STATUS_PRIVATE: + delete_collections_from_search_index([collection_id]) + else: + patch_collection_search_document(rights.id, {}) diff --git a/core/domain/search_services_test.py b/core/domain/search_services_test.py new file mode 100644 index 000000000000..34c5f88da09a --- /dev/null +++ b/core/domain/search_services_test.py @@ -0,0 +1,294 @@ +# coding: utf-8 +# +# Copyright 2014 The Oppia Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS-IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from core.domain import collection_services +from core.domain import exp_services +from core.domain import rating_services +from core.domain import rights_manager +from core.domain import search_services +from core.domain import user_services +from core.platform import models +from core.tests import test_utils + +gae_search_services = models.Registry.import_search_services() + +class SearchServicesUnitTests(test_utils.GenericTestBase): + """Test the search services module.""" + EXP_ID = 'An_exploration_id' + COLLECTION_ID = 'A_collection_id' + + def setUp(self): + super(SearchServicesUnitTests, self).setUp() + + self.owner_id = self.get_user_id_from_email(self.OWNER_EMAIL) + self.editor_id = self.get_user_id_from_email(self.EDITOR_EMAIL) + self.viewer_id = self.get_user_id_from_email(self.VIEWER_EMAIL) + + user_services.create_new_user(self.owner_id, self.OWNER_EMAIL) + user_services.create_new_user(self.editor_id, self.EDITOR_EMAIL) + user_services.create_new_user(self.viewer_id, self.VIEWER_EMAIL) + + self.signup(self.OWNER_EMAIL, self.OWNER_USERNAME) + self.signup(self.EDITOR_EMAIL, self.EDITOR_USERNAME) + self.signup(self.VIEWER_EMAIL, self.VIEWER_USERNAME) + self.signup(self.ADMIN_EMAIL, self.ADMIN_USERNAME) + + self.owner = user_services.UserActionsInfo(self.owner_id) + + self.set_admins([self.ADMIN_USERNAME]) + self.user_id_admin = self.get_user_id_from_email(self.ADMIN_EMAIL) + + def test_get_search_rank(self): + self.save_new_valid_exploration(self.EXP_ID, self.owner_id) + exp_summary = exp_services.get_exploration_summary_by_id(self.EXP_ID) + + base_search_rank = 20 + + self.assertEqual( + search_services.get_search_rank_from_exp_summary(exp_summary), + base_search_rank) + + rights_manager.publish_exploration(self.owner, self.EXP_ID) + self.assertEqual( + search_services.get_search_rank_from_exp_summary(exp_summary), + base_search_rank) + + rating_services.assign_rating_to_exploration( + self.owner_id, self.EXP_ID, 5) + exp_summary = exp_services.get_exploration_summary_by_id(self.EXP_ID) + self.assertEqual( + search_services.get_search_rank_from_exp_summary(exp_summary), + base_search_rank + 10) + + rating_services.assign_rating_to_exploration( + self.user_id_admin, self.EXP_ID, 2) + exp_summary = exp_services.get_exploration_summary_by_id(self.EXP_ID) + self.assertEqual( + search_services.get_search_rank_from_exp_summary(exp_summary), + base_search_rank + 8) + + def test_search_ranks_cannot_be_negative(self): + self.save_new_valid_exploration(self.EXP_ID, self.owner_id) + exp_summary = exp_services.get_exploration_summary_by_id(self.EXP_ID) + + base_search_rank = 20 + + self.assertEqual( + search_services.get_search_rank_from_exp_summary(exp_summary), + base_search_rank) + + # A user can (down-)rate an exploration at most once. + for i in xrange(50): + rating_services.assign_rating_to_exploration( + 'user_id_1', self.EXP_ID, 1) + exp_summary = exp_services.get_exploration_summary_by_id(self.EXP_ID) + self.assertEqual( + search_services.get_search_rank_from_exp_summary(exp_summary), + base_search_rank - 5) + + for i in xrange(50): + rating_services.assign_rating_to_exploration( + 'user_id_%s' % i, self.EXP_ID, 1) + + # The rank will be at least 0. + exp_summary = exp_services.get_exploration_summary_by_id(self.EXP_ID) + self.assertEqual(search_services.get_search_rank_from_exp_summary( + exp_summary), 0) + + def test_search_explorations(self): + expected_query_string = 'a query string' + expected_cursor = 'cursor' + expected_sort = 'title' + expected_limit = 30 + expected_result_cursor = 'rcursor' + doc_ids = ['id1', 'id2'] + + def mock_search(query_string, index, cursor=None, limit=20, sort='', + ids_only=False, retries=3): + self.assertEqual(query_string, expected_query_string) + self.assertEqual(index, search_services.SEARCH_INDEX_EXPLORATIONS) + self.assertEqual(cursor, expected_cursor) + self.assertEqual(limit, expected_limit) + self.assertEqual(sort, expected_sort) + self.assertEqual(ids_only, True) + self.assertEqual(retries, 3) + + return doc_ids, expected_result_cursor + + with self.swap(gae_search_services, 'search', mock_search): + result, cursor = search_services.search_explorations( + expected_query_string, + expected_limit, + sort=expected_sort, + cursor=expected_cursor, + ) + + self.assertEqual(cursor, expected_result_cursor) + self.assertEqual(result, doc_ids) + + def test_patch_exploration_search_document(self): + + def mock_get_doc(doc_id, index): + self.assertEqual(doc_id, self.EXP_ID) + self.assertEqual(index, search_services.SEARCH_INDEX_EXPLORATIONS) + return {'a': 'b', 'c': 'd'} + + def mock_add_docs(docs, index): + self.assertEqual(index, search_services.SEARCH_INDEX_EXPLORATIONS) + self.assertEqual(docs, [{'a': 'b', 'c': 'e', 'f': 'g'}]) + + get_doc_swap = self.swap( + gae_search_services, 'get_document_from_index', mock_get_doc) + + add_docs_counter = test_utils.CallCounter(mock_add_docs) + add_docs_swap = self.swap( + gae_search_services, 'add_documents_to_index', add_docs_counter) + + with get_doc_swap, add_docs_swap: + patch = {'c': 'e', 'f': 'g'} + search_services.patch_exploration_search_document( + self.EXP_ID, patch) + + self.assertEqual(add_docs_counter.times_called, 1) + + def test_search_collections(self): + expected_query_string = 'a query string' + expected_cursor = 'cursor' + expected_sort = 'title' + expected_limit = 30 + expected_result_cursor = 'rcursor' + doc_ids = ['id1', 'id2'] + + def mock_search(query_string, index, cursor=None, limit=20, sort='', + ids_only=False, retries=3): + self.assertEqual(query_string, expected_query_string) + self.assertEqual( + index, collection_services.SEARCH_INDEX_COLLECTIONS) + self.assertEqual(cursor, expected_cursor) + self.assertEqual(limit, expected_limit) + self.assertEqual(sort, expected_sort) + self.assertEqual(ids_only, True) + self.assertEqual(retries, 3) + + return doc_ids, expected_result_cursor + + with self.swap(gae_search_services, 'search', mock_search): + result, cursor = search_services.search_collections( + expected_query_string, + expected_limit, + sort=expected_sort, + cursor=expected_cursor, + ) + + self.assertEqual(cursor, expected_result_cursor) + self.assertEqual(result, doc_ids) + + def test_patch_collection_search_document(self): + + def mock_get_doc(doc_id, index): + self.assertEqual(doc_id, self.COLLECTION_ID) + self.assertEqual( + index, search_services.SEARCH_INDEX_COLLECTIONS) + return {'a': 'b', 'c': 'd'} + + def mock_add_docs(docs, index): + self.assertEqual( + index, search_services.SEARCH_INDEX_COLLECTIONS) + self.assertEqual(docs, [{'a': 'b', 'c': 'e', 'f': 'g'}]) + + get_doc_swap = self.swap( + gae_search_services, 'get_document_from_index', mock_get_doc) + + add_docs_counter = test_utils.CallCounter(mock_add_docs) + add_docs_swap = self.swap( + gae_search_services, 'add_documents_to_index', add_docs_counter) + + with get_doc_swap, add_docs_swap: + patch = {'c': 'e', 'f': 'g'} + search_services.patch_collection_search_document( + self.COLLECTION_ID, patch) + + self.assertEqual(add_docs_counter.times_called, 1) + + def test_update_private_collection_status_in_search(self): + + def mock_delete_docs(ids, index): + self.assertEqual(ids, [self.COLLECTION_ID]) + self.assertEqual( + index, search_services.SEARCH_INDEX_COLLECTIONS) + + def mock_get_rights(unused_collection_id): + return rights_manager.ActivityRights( + self.COLLECTION_ID, + [self.owner_id], [self.editor_id], [self.viewer_id], + status=rights_manager.ACTIVITY_STATUS_PRIVATE + ) + + delete_docs_counter = test_utils.CallCounter(mock_delete_docs) + + delete_docs_swap = self.swap( + gae_search_services, 'delete_documents_from_index', + delete_docs_counter) + get_rights_swap = self.swap( + rights_manager, 'get_collection_rights', mock_get_rights) + + with get_rights_swap, delete_docs_swap: + search_services.update_collection_status_in_search( + self.COLLECTION_ID) + + self.assertEqual(delete_docs_counter.times_called, 1) + + def test_demo_collections_are_added_to_search_index(self): + results = search_services.search_collections('Welcome', 2)[0] + self.assertEqual(results, []) + + collection_services.load_demo('0') + results = search_services.search_collections('Welcome', 2)[0] + self.assertEqual(results, ['0']) + + def test_demo_explorations_are_added_to_search_index(self): + results, _ = search_services.search_explorations('Welcome', 2) + self.assertEqual(results, []) + + exp_services.load_demo('0') + results, _ = search_services.search_explorations('Welcome', 2) + self.assertEqual(results, ['0']) + + def test_update_private_exploration_status_in_search(self): + + def mock_delete_docs(ids, index): + self.assertEqual(ids, [self.EXP_ID]) + self.assertEqual(index, search_services.SEARCH_INDEX_EXPLORATIONS) + + def mock_get_rights(unused_exp_id): + return rights_manager.ActivityRights( + self.EXP_ID, + [self.owner_id], [self.editor_id], [self.viewer_id], + status=rights_manager.ACTIVITY_STATUS_PRIVATE + ) + + delete_docs_counter = test_utils.CallCounter(mock_delete_docs) + + delete_docs_swap = self.swap( + gae_search_services, 'delete_documents_from_index', + delete_docs_counter) + get_rights_swap = self.swap( + rights_manager, 'get_exploration_rights', mock_get_rights) + + with get_rights_swap, delete_docs_swap: + search_services.update_exploration_status_in_search(self.EXP_ID) + + self.assertEqual(delete_docs_counter.times_called, 1) diff --git a/core/domain/summary_services.py b/core/domain/summary_services.py index 1c4a3e797a36..67de3ff6c9ae 100644 --- a/core/domain/summary_services.py +++ b/core/domain/summary_services.py @@ -21,6 +21,7 @@ from core.domain import collection_services from core.domain import exp_services from core.domain import rights_manager +from core.domain import search_services from core.domain import stats_jobs_continuous from core.domain import user_services import utils @@ -497,7 +498,7 @@ def _generate_query(categories): all_collection_ids = [] header_id_to_collection_ids = {} for group in _LIBRARY_INDEX_GROUPS: - collection_ids = collection_services.search_collections( + collection_ids = search_services.search_collections( _generate_query(group['search_categories']), 8)[0] header_id_to_collection_ids[group['header_i18n_id']] = collection_ids all_collection_ids += collection_ids @@ -518,7 +519,7 @@ def _generate_query(categories): all_exp_ids = [] header_to_exp_ids = {} for group in _LIBRARY_INDEX_GROUPS: - exp_ids = exp_services.search_explorations( + exp_ids = search_services.search_explorations( _generate_query(group['search_categories']), 8)[0] header_to_exp_ids[group['header_i18n_id']] = exp_ids all_exp_ids += exp_ids diff --git a/core/jobs_registry.py b/core/jobs_registry.py index 0da989fce671..24a9813b3cde 100644 --- a/core/jobs_registry.py +++ b/core/jobs_registry.py @@ -16,6 +16,7 @@ """Job registries.""" +from core.domain import activity_jobs_one_off from core.domain import collection_jobs_one_off from core.domain import exp_jobs_one_off from core.domain import feedback_jobs_one_off @@ -30,6 +31,7 @@ # List of all manager classes for one-off batch jobs for which to show controls # on the admin dashboard. ONE_OFF_JOB_MANAGERS = [ + activity_jobs_one_off.IndexAllActivitiesJobManager, collection_jobs_one_off.CollectionMigrationJob, email_jobs_one_off.EmailHashRegenerationOneOffJob, exp_jobs_one_off.ExpSummariesContributorsOneOffJob, @@ -38,7 +40,6 @@ exp_jobs_one_off.ExplorationFirstPublishedOneOffJob, exp_jobs_one_off.ExplorationMigrationJobManager, exp_jobs_one_off.ExplorationValidityJobManager, - exp_jobs_one_off.IndexAllExplorationsJobManager, exp_jobs_one_off.ItemSelectionInteractionOneOffJob, exp_jobs_one_off.ViewableExplorationsAuditJob, feedback_jobs_one_off.FeedbackThreadMessagesCountOneOffJob, diff --git a/core/tests/test_utils.py b/core/tests/test_utils.py index 17b4c9801065..9463593a263d 100644 --- a/core/tests/test_utils.py +++ b/core/tests/test_utils.py @@ -480,6 +480,23 @@ def save_new_exp_with_states_schema_v0(self, exp_id, user_id, title): 'title': 'title', 'category': 'category', }]) + exp_rights = exp_models.ExplorationRightsModel.get_by_id(exp_id) + exp_summary_model = exp_models.ExpSummaryModel( + id=exp_id, + title=title, + category='category', + objective='Old objective', + language_code='en', + tags=[], + ratings=feconf.get_empty_ratings(), + scaled_average_rating=feconf.EMPTY_SCALED_AVERAGE_RATING, + status=exp_rights.status, + community_owned=exp_rights.community_owned, + owner_ids=exp_rights.owner_ids, + contributor_ids=[], + contributors_summary={}, + ) + exp_summary_model.put() def save_new_default_collection( self, collection_id, owner_id, title='A title', diff --git a/main_cron.py b/main_cron.py index 7ca3aa612a61..a6a918a40d30 100644 --- a/main_cron.py +++ b/main_cron.py @@ -36,7 +36,7 @@ cron.CronExplorationRecommendationsHandler), main.get_redirect_route( r'/cron/explorations/search_rank', - cron.CronExplorationSearchRankHandler), + cron.CronActivitySearchRankHandler), main.get_redirect_route( r'/cron/jobs/cleanup', cron.CronMapreduceCleanupHandler), ]