Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[tests]: test metadataproviders in online/offline modes #23

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ venv/
splits/
__pycache__/
.idea
tracks.txt
tracks.txt
tests/resources/html_files_captured/
Empty file added MetaDataProviders/__init__.py
Empty file.
Empty file added __init__.py
Empty file.
Empty file added tests/__init__.py
Empty file.
Empty file added tests/resources/__init__.py
Empty file.
49 changes: 49 additions & 0 deletions tests/resources/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#constants.py

#UPDATE_HTML_FILES_CAPTURED
#boolean (default: 'False')
#Mark 'False' to not update your HTML files, from Amazon and Wikipedia by running 'html_file_downloader.py'.
#Mark 'True' to update your HTML files, from Amazon and Wikipedia by running 'html_file_downloader.py'.
#The 'HTML_file_downloader.py' gets newest HTMLs to be used in offline tests and, stores all old HTMLs in a proper datetimed subfolder.
UPDATE_HTML_FILES_CAPTURED = False

#DO_ONLINE_TESTS
#boolean (default: 'False')
#Mark 'False' to run only offline tests, and 'True' to run online tests.
#If you mark 'True', the online tests will consume two things from you:
#1- Your internet bandwith;
#2- Your accesses in Amazon.
DO_ONLINE_TESTS = False

#RESPONSES_IN_HTML_FILES_DIR_PATH
#string (default: './tests/resources/html_files_captured/latest_html_files_captured/')
#Points to directory (folder) that contains all *.html responses exported from Amazon and Wikipedia.
#The HTML are used in offline tests, helping in simulate the responses.
#The folder contains only HTML from 3 sites used in tests (Amazon, Wikipedia).
RESPONSES_IN_HTML_FILES_DIR_PATH = './tests/resources/html_files_captured/latest_html_files_captured/'

#TRACK_FILENAME_INFO
#Name of tha file that contains tracks and times information.
TRACK_FILENAME = 'tracks.txt'

#AMAZON TEST INFO
#Relation of Amazon Urls to be accessed
AMAZON_URLS = {
'with_song_table' : 'https://www.amazon.com/Dogs-Eating-blink-182/dp/B00B054FFA',
'without_song_table' : 'https://www.amazon.com/p/feature/rzekmvyjojcp6uc',
#'with_404' : 'https://www.amazon.com/404',
}

WIKIPEDIA_URLS = {
'with_song_table' : 'https://en.wikipedia.org/wiki/Dogs_Eating_Dogs',
'without_song_table' : 'https://en.wikipedia.org/wiki/Wikipedia:About',
#'with_404' : '',
}

VALID_URLS = {
'amazon' : 'https?://(?:\w+\.)?amazon\..*/.*',
'wikipedia' : 'https?://(?:\w+\.)?wikipedia\..*/.*',
}



35 changes: 35 additions & 0 deletions tests/resources/fake_response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#fake_response.py

import unittest

import requests
from urllib.parse import urlparse
import os.path
from io import BytesIO

from resources import constants

#Return a fake response from a loaded json filesystem by url_path
def fake_requests_get(url):

root_domain = urlparse(url).hostname.split('.')[1]
kind_of_file = ''

for rd, d in [('amazon', constants.AMAZON_URLS),('wikipedia', constants.WIKIPEDIA_URLS)]:
if root_domain == rd:
for kind, site in d.items():
if url == site:
kind_of_file = kind

resource_file = os.path.normpath(
constants.RESPONSES_IN_HTML_FILES_DIR_PATH
+ root_domain
+ '_'
+ kind_of_file
+ '.html')
faked_response = None

with open(resource_file, mode='rb') as f:
data = f.read()
faked_response = BytesIO(data)
return faked_response
48 changes: 48 additions & 0 deletions tests/resources/html_file_downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os
import re
import datetime
#import requests
from urllib.request import build_opener

from resources import constants

def write_html_file(page_name, data_to_be_saved, iso_date):
for specific_dir in ['latest_html_files_captured', iso_date]:
filename = os.path.normpath(
'./tests/resources/html_files_captured/'
+ specific_dir
+ '/'
+ page_name
+ '.html')
dirpath = os.path.dirname(filename)
if not os.path.exists(dirpath):
os.makedirs(dirpath)
print('New HTML file downloaded in: ', filename)
with open(filename, 'w') as file_html:
file_html.write(page_html)


def access_html_from(url):
opener = build_opener()
opener.addheaders = [('User-agent', 'Album-Splitter')]
page_html = opener.open(url).read()
return page_html.decode()


def domain_from(url):
for domain, url_regex in constants.VALID_URLS.items():
pattern = re.compile(url_regex)
if pattern.match(url):
return domain

iso_date = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
for d in [constants.AMAZON_URLS, constants.WIKIPEDIA_URLS]:
for key, url in d.items():
site_name = domain_from(url)
if site_name:
file_name = site_name + '_' + key
page_html = access_html_from(url)
write_html_file(file_name, page_html, iso_date)



Empty file.
50 changes: 50 additions & 0 deletions tests/test_MetaDataProviders/test_Amazon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#test_Amazon.py
#run this command to test all: 'python3 -m unittest discover -s tests -v'

import unittest
from unittest.mock import patch

from urllib.request import OpenerDirector
from http.client import HTTPResponse

from MetaDataProviders import Amazon
from resources import constants
from resources import fake_response


class TestRepo(unittest.TestCase):

@classmethod
def setUpClass(self):
self.module_Amazon = Amazon


def test_Amazon_VALID_URL(self):
self.assertEqual(self.module_Amazon, Amazon)
self.assertEqual(type(self.module_Amazon), type(Amazon))
self.assertEqual(self.module_Amazon.VALID_URL, constants.VALID_URLS['amazon'])
self.assertEqual(self.module_Amazon.VALID_URL, Amazon.VALID_URL)


def test_Amazon_lookup(self):
with_song_table = constants.AMAZON_URLS['with_song_table']
without_song_table = constants.AMAZON_URLS['without_song_table']
#with_404 = constants.AMAZON_URLS['url_with_404']

tracks_filename = constants.TRACK_FILENAME

with patch('http.client.HTTPResponse.read') as mocked_read:
with patch('urllib.request.OpenerDirector.open', side_effect = fake_response.fake_requests_get) as mocked_response:
self.assertEqual(self.module_Amazon.lookup(with_song_table, tracks_filename), True)
self.assertEqual(self.module_Amazon.lookup(without_song_table, tracks_filename), None)
#self.assertRaises(HTTPError, self.module_Amazon.lookup(with_404, tracks_filename))

if constants.DO_ONLINE_TESTS:
self.assertEqual(self.module_Amazon.lookup(with_song_table, tracks_filename), True)
self.assertEqual(self.module_Amazon.lookup(without_song_table, tracks_filename), None)
#self.assertRaises(HTTPError, self.module_Amazon.lookup(with_404, tracks_filename))


if __name__ == '__main__':
unittest.main()

50 changes: 50 additions & 0 deletions tests/test_MetaDataProviders/test_Wikipedia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#test_Wikipedia.py
#run this command to test all: 'python3 -m unittest discover -s tests -v'

import unittest
from unittest.mock import patch

from urllib.request import OpenerDirector
from http.client import HTTPResponse

from MetaDataProviders import Wikipedia
from resources import constants
from resources import fake_response


class TestRepo(unittest.TestCase):

@classmethod
def setUpClass(self):
self.module_Wikipedia = Wikipedia


def test_Wikipedia_VALID_URL(self):
self.assertEqual(self.module_Wikipedia, Wikipedia)
self.assertEqual(type(self.module_Wikipedia), type(Wikipedia))
self.assertEqual(self.module_Wikipedia.VALID_URL, constants.VALID_URLS['wikipedia'])
self.assertEqual(self.module_Wikipedia.VALID_URL, Wikipedia.VALID_URL)


def test_Wikipedia_lookup(self):
with_song_table = constants.WIKIPEDIA_URLS['with_song_table']
without_song_table = constants.WIKIPEDIA_URLS['without_song_table']
#with_404 = constants.WIKIPEDIA_URLS['url_with_404']

tracks_filename = constants.TRACK_FILENAME

with patch('http.client.HTTPResponse.read') as mocked_read:
with patch('urllib.request.OpenerDirector.open', side_effect = fake_response.fake_requests_get) as mocked_response:
self.assertEqual(self.module_Wikipedia.lookup(with_song_table, tracks_filename), True)
self.assertEqual(self.module_Wikipedia.lookup(without_song_table, tracks_filename), None)
#self.assertRaises(HTTPError, self.module_Wikipedia.lookup(with_404, tracks_filename))

if constants.DO_ONLINE_TESTS:
self.assertEqual(self.module_Wikipedia.lookup(with_song_table, tracks_filename), True)
self.assertEqual(self.module_Wikipedia.lookup(without_song_table, tracks_filename), None)
#self.assertRaises(HTTPError, self.module_Wikipedia.lookup(with_404, tracks_filename))


if __name__ == '__main__':
unittest.main()

22 changes: 22 additions & 0 deletions tests/test_split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#test_split.py
#run this command to test all: 'python3 -m unittest discover -s tests -v'

import unittest

from tests.resources import constants

if constants.UPDATE_HTML_FILES_CAPTURED:
from tests.resources import html_file_downloader

class TestSplit(unittest.TestCase):

@classmethod
def setUpClass(self):
pass

#def test_compare_compare(self):
# pass


if __name__ == '__main__':
unittest.main()