forked from keras-team/keras
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
get_file() with tar, tgz, tar.bz, zip and sha256, resolves keras-team…
…#5861. (keras-team#5882) * get_file() with tar, tgz, tar.bz, zip and sha256, resolves keras-team#5861. The changes were designed to preserve backwards compatibility while adding support for .tar.gz, .tgz, .tar.bz, and .zip files. sha256 hash is now supported in addition to md5. * get_file() improve large file performance keras-team#5861. * getfile() extract parameter fix (keras-team#5861) * extract_archive() py3 fix (keras-team#5861) * get_file() tarfile fix (keras-team#5861) * data_utils.py and data_utils_test.py updated based on review (keras-team#5861) # This is a combination of 4 commits. # The first commit's message is: get_file() with tar, tgz, tar.bz, zip and sha256, resolves keras-team#5861. The changes were designed to preserve backwards compatibility while adding support for .tar.gz, .tgz, .tar.bz, and .zip files. Adds extract_archive() and hash_file() functions. sha256 hash is now supported in addition to md5. adds data_utils_test.py to test new functionality # This is the 2nd commit message: extract_archive() redundant open (keras-team#5861) # This is the 3rd commit message: data_utils.py and data_utils_test.py updated based on review (keras-team#5861) test creates its own tiny file to download and extract locally. test covers md5 sha256 zip and tar _hash_file() now private _extract_archive() now private # This is the 4th commit message: data_utils.py and data_utils_test.py updated based on review (keras-team#5861) test creates its own tiny file to download and extract locally. test covers md5 sha256 zip and tar _hash_file() now private _extract_archive() now private * data_utils.py and data_utils_test.py updated based on review (keras-team#5861) * data_utils.py get_file() cache_dir docs (keras-team#5861) * data_utils.py address docs comments (keras-team#5861) * get_file() comment link, path, & typo fix
- Loading branch information
Showing
2 changed files
with
206 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
"""Tests for functions in data_utils.py. | ||
""" | ||
import os | ||
import pytest | ||
import tarfile | ||
import zipfile | ||
from six.moves.urllib.request import pathname2url | ||
from six.moves.urllib.parse import urljoin | ||
from keras.utils.data_utils import get_file | ||
from keras.utils.data_utils import validate_file | ||
from keras.utils.data_utils import _hash_file | ||
from keras import activations | ||
from keras import regularizers | ||
|
||
|
||
def test_data_utils(): | ||
"""Tests get_file from a url, plus extraction and validation. | ||
""" | ||
dirname = 'data_utils' | ||
|
||
with open('test.txt', 'w') as text_file: | ||
text_file.write('Float like a butterfly, sting like a bee.') | ||
|
||
with tarfile.open('test.tar.gz', 'w:gz') as tar_file: | ||
tar_file.add('test.txt') | ||
|
||
with zipfile.ZipFile('test.zip', 'w') as zip_file: | ||
zip_file.write('test.txt') | ||
|
||
origin = urljoin('file://', pathname2url(os.path.abspath('test.tar.gz'))) | ||
|
||
path = get_file(dirname, origin, untar=True) | ||
filepath = path + '.tar.gz' | ||
hashval_sha256 = _hash_file(filepath) | ||
hashval_md5 = _hash_file(filepath, algorithm='md5') | ||
path = get_file(dirname, origin, md5_hash=hashval_md5, untar=True) | ||
path = get_file(filepath, origin, file_hash=hashval_sha256, extract=True) | ||
assert os.path.exists(filepath) | ||
assert validate_file(filepath, hashval_sha256) | ||
assert validate_file(filepath, hashval_md5) | ||
os.remove(filepath) | ||
os.remove('test.tar.gz') | ||
|
||
origin = urljoin('file://', pathname2url(os.path.abspath('test.zip'))) | ||
|
||
hashval_sha256 = _hash_file('test.zip') | ||
hashval_md5 = _hash_file('test.zip', algorithm='md5') | ||
path = get_file(dirname, origin, md5_hash=hashval_md5, extract=True) | ||
path = get_file(dirname, origin, file_hash=hashval_sha256, extract=True) | ||
assert os.path.exists(path) | ||
assert validate_file(path, hashval_sha256) | ||
assert validate_file(path, hashval_md5) | ||
|
||
os.remove(path) | ||
os.remove('test.txt') | ||
os.remove('test.zip') | ||
|
||
if __name__ == '__main__': | ||
pytest.main([__file__]) |