Skip to content

Commit

Permalink
grangier#188 - test refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Xavier Grangier committed Dec 31, 2014
1 parent b762ea8 commit ea693a9
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 141 deletions.
File renamed without changes.
File renamed without changes.
135 changes: 1 addition & 134 deletions tests/extractors/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,130 +20,11 @@
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import json
from base import TestExtractionBase

from base import BaseMockTests
from base import MockResponseExtractors

from goose import Goose
from goose.configuration import Configuration
from goose.text import StopWordsChinese
from goose.text import StopWordsArabic
from goose.text import StopWordsKorean
from goose.utils import FileHelper


CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))


class TestExtractionBase(BaseMockTests):
"""\
Extraction test case
"""
callback = MockResponseExtractors

def getRawHtml(self):
test, suite, module, cls, func = self.id().split('.')
path = os.path.join(
os.path.dirname(CURRENT_PATH),
"data",
suite,
module,
"%s.html" % func)
path = os.path.abspath(path)
content = FileHelper.loadResourceFile(path)
return content

def loadData(self):
"""\
"""
test, suite, module, cls, func = self.id().split('.')
path = os.path.join(
os.path.dirname(CURRENT_PATH),
"data",
suite,
module,
"%s.json" % func)
path = os.path.abspath(path)
content = FileHelper.loadResourceFile(path)
self.data = json.loads(content)

def assert_cleaned_text(self, field, expected_value, result_value):
"""\
"""
# # TODO : handle verbose level in tests
# print "\n=======================::. ARTICLE REPORT %s .::======================\n" % self.id()
# print 'expected_value (%s) \n' % len(expected_value)
# print expected_value
# print "-------"
# print 'result_value (%s) \n' % len(result_value)
# print result_value

# cleaned_text is Null
msg = u"Resulting article text was NULL!"
self.assertNotEqual(result_value, None, msg=msg)

# cleaned_text length
msg = u"Article text was not as long as expected beginning!"
self.assertTrue(len(expected_value) <= len(result_value), msg=msg)

# clean_text value
result_value = result_value[0:len(expected_value)]
msg = u"The beginning of the article text was not as expected!"
self.assertEqual(expected_value, result_value, msg=msg)

def runArticleAssertions(self, article, fields):
"""\
"""
for field in fields:
expected_value = self.data['expected'][field]
result_value = getattr(article, field, None)

# custom assertion for a given field
assertion = 'assert_%s' % field
if hasattr(self, assertion):
getattr(self, assertion)(field, expected_value, result_value)
continue

# default assertion
msg = u"Error %s \nexpected: %s\nresult: %s" % (field, expected_value, result_value)
self.assertEqual(expected_value, result_value, msg=msg)

def extract(self, instance):
article = instance.extract(url=self.data['url'])
return article

def getConfig(self):
config = Configuration()
config.enable_image_fetching = False
return config

def getArticle(self):
"""\
"""
# load test case data
self.loadData()

# basic configuration
# no image fetching
config = self.getConfig()
self.parser = config.get_parser()

# target language
# needed for non english language most of the time
target_language = self.data.get('target_language')
if target_language:
config.target_language = target_language
config.use_meta_language = False

# run goose
g = Goose(config=config)
return self.extract(g)


class TestExtractions(TestExtractionBase):
Expand Down Expand Up @@ -330,11 +211,6 @@ def test_okaymarketing(self):
fields = ['cleaned_text']
self.runArticleAssertions(article=article, fields=fields)

def test_opengraph(self):
article = self.getArticle()
fields = ['opengraph']
self.runArticleAssertions(article=article, fields=fields)

def test_title_opengraph(self):
article = self.getArticle()
fields = ['title']
Expand Down Expand Up @@ -424,15 +300,6 @@ def extract(self, instance):
return article


class TestArticleLinks(TestExtractionBase):

def test_links(self):
article = self.getArticle()
number_links = len(article.links)
expected_number_links = self.data['expected']['links']
self.assertEqual(number_links, expected_number_links)


class TestArticleAuthor(TestExtractionBase):

def test_author_schema(self):
Expand Down
33 changes: 33 additions & 0 deletions tests/extractors/links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
"""\
This is a python port of "Goose" orignialy licensed to Gravity.com
under one or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.
Python port was written by Xavier Grangier for Recrutae
Gravity.com licenses this file
to you under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from base import TestExtractionBase


class TestArticleLinks(TestExtractionBase):

def test_links(self):
article = self.getArticle()
number_links = len(article.links)
expected_number_links = self.data['expected']['links']
self.assertEqual(number_links, expected_number_links)
32 changes: 32 additions & 0 deletions tests/extractors/opengraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
"""\
This is a python port of "Goose" orignialy licensed to Gravity.com
under one or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership.
Python port was written by Xavier Grangier for Recrutae
Gravity.com licenses this file
to you under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from base import TestExtractionBase


class TestOpenGraph(TestExtractionBase):

def test_opengraph(self):
article = self.getArticle()
fields = ['opengraph']
self.runArticleAssertions(article=article, fields=fields)
7 changes: 0 additions & 7 deletions tests/extractors/videos.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,8 @@
See the License for the specific language governing permissions and
limitations under the License.
"""
import os

from base import MockResponse
from base import TestExtractionBase

from goose.utils import FileHelper

CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))


class ImageExtractionTests(TestExtractionBase):
"""\
Expand Down

0 comments on commit ea693a9

Please sign in to comment.