diff --git a/keras/applications/imagenet_utils.py b/keras/applications/imagenet_utils.py index ab59cd8e238..d7594c348ad 100644 --- a/keras/applications/imagenet_utils.py +++ b/keras/applications/imagenet_utils.py @@ -4,319 +4,7 @@ from __future__ import division from __future__ import print_function -import json -import warnings -import numpy as np +from keras_applications import imagenet_utils -from ..utils.data_utils import get_file -from .. import backend as K - -CLASS_INDEX = None -CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json' - -# Global tensor of imagenet mean for preprocessing symbolic inputs -_IMAGENET_MEAN = None - - -def _preprocess_numpy_input(x, data_format, mode): - """Preprocesses a Numpy array encoding a batch of images. - - # Arguments - x: Input array, 3D or 4D. - data_format: Data format of the image array. - mode: One of "caffe", "tf" or "torch". - - caffe: will convert the images from RGB to BGR, - then will zero-center each color channel with - respect to the ImageNet dataset, - without scaling. - - tf: will scale pixels between -1 and 1, - sample-wise. - - torch: will scale pixels between 0 and 1 and then - will normalize each channel with respect to the - ImageNet dataset. - - # Returns - Preprocessed Numpy array. - """ - if not issubclass(x.dtype.type, np.floating): - x = x.astype(K.floatx(), copy=False) - - if mode == 'tf': - x /= 127.5 - x -= 1. - return x - - if mode == 'torch': - x /= 255. - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - else: - if data_format == 'channels_first': - # 'RGB'->'BGR' - if x.ndim == 3: - x = x[::-1, ...] - else: - x = x[:, ::-1, ...] - else: - # 'RGB'->'BGR' - x = x[..., ::-1] - mean = [103.939, 116.779, 123.68] - std = None - - # Zero-center by mean pixel - if data_format == 'channels_first': - if x.ndim == 3: - x[0, :, :] -= mean[0] - x[1, :, :] -= mean[1] - x[2, :, :] -= mean[2] - if std is not None: - x[0, :, :] /= std[0] - x[1, :, :] /= std[1] - x[2, :, :] /= std[2] - else: - x[:, 0, :, :] -= mean[0] - x[:, 1, :, :] -= mean[1] - x[:, 2, :, :] -= mean[2] - if std is not None: - x[:, 0, :, :] /= std[0] - x[:, 1, :, :] /= std[1] - x[:, 2, :, :] /= std[2] - else: - x[..., 0] -= mean[0] - x[..., 1] -= mean[1] - x[..., 2] -= mean[2] - if std is not None: - x[..., 0] /= std[0] - x[..., 1] /= std[1] - x[..., 2] /= std[2] - return x - - -def _preprocess_symbolic_input(x, data_format, mode): - """Preprocesses a tensor encoding a batch of images. - - # Arguments - x: Input tensor, 3D or 4D. - data_format: Data format of the image tensor. - mode: One of "caffe", "tf" or "torch". - - caffe: will convert the images from RGB to BGR, - then will zero-center each color channel with - respect to the ImageNet dataset, - without scaling. - - tf: will scale pixels between -1 and 1, - sample-wise. - - torch: will scale pixels between 0 and 1 and then - will normalize each channel with respect to the - ImageNet dataset. - - # Returns - Preprocessed tensor. - """ - global _IMAGENET_MEAN - - if mode == 'tf': - x /= 127.5 - x -= 1. - return x - - if mode == 'torch': - x /= 255. - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - else: - if data_format == 'channels_first': - # 'RGB'->'BGR' - if K.ndim(x) == 3: - x = x[::-1, ...] - else: - x = x[:, ::-1, ...] - else: - # 'RGB'->'BGR' - x = x[..., ::-1] - mean = [103.939, 116.779, 123.68] - std = None - - if _IMAGENET_MEAN is None: - _IMAGENET_MEAN = K.constant(-np.array(mean)) - - # Zero-center by mean pixel - if K.dtype(x) != K.dtype(_IMAGENET_MEAN): - x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format) - else: - x = K.bias_add(x, _IMAGENET_MEAN, data_format) - if std is not None: - x /= std - return x - - -def preprocess_input(x, data_format=None, mode='caffe'): - """Preprocesses a tensor or Numpy array encoding a batch of images. - - # Arguments - x: Input Numpy or symbolic tensor, 3D or 4D. - The preprocessed data is written over the input data - if the data types are compatible. To avoid this - behaviour, `numpy.copy(x)` can be used. - data_format: Data format of the image tensor/array. - mode: One of "caffe", "tf" or "torch". - - caffe: will convert the images from RGB to BGR, - then will zero-center each color channel with - respect to the ImageNet dataset, - without scaling. - - tf: will scale pixels between -1 and 1, - sample-wise. - - torch: will scale pixels between 0 and 1 and then - will normalize each channel with respect to the - ImageNet dataset. - - # Returns - Preprocessed tensor or Numpy array. - - # Raises - ValueError: In case of unknown `data_format` argument. - """ - if data_format is None: - data_format = K.image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) - - if isinstance(x, np.ndarray): - return _preprocess_numpy_input(x, data_format=data_format, mode=mode) - else: - return _preprocess_symbolic_input(x, data_format=data_format, - mode=mode) - - -def decode_predictions(preds, top=5): - """Decodes the prediction of an ImageNet model. - - # Arguments - preds: Numpy tensor encoding a batch of predictions. - top: Integer, how many top-guesses to return. - - # Returns - A list of lists of top class prediction tuples - `(class_name, class_description, score)`. - One list of tuples per sample in batch input. - - # Raises - ValueError: In case of invalid shape of the `pred` array - (must be 2D). - """ - global CLASS_INDEX - if len(preds.shape) != 2 or preds.shape[1] != 1000: - raise ValueError('`decode_predictions` expects ' - 'a batch of predictions ' - '(i.e. a 2D array of shape (samples, 1000)). ' - 'Found array with shape: ' + str(preds.shape)) - if CLASS_INDEX is None: - fpath = get_file('imagenet_class_index.json', - CLASS_INDEX_PATH, - cache_subdir='models', - file_hash='c2c37ea517e94d9795004a39431a14cb') - with open(fpath) as f: - CLASS_INDEX = json.load(f) - results = [] - for pred in preds: - top_indices = pred.argsort()[-top:][::-1] - result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] - result.sort(key=lambda x: x[2], reverse=True) - results.append(result) - return results - - -def _obtain_input_shape(input_shape, - default_size, - min_size, - data_format, - require_flatten, - weights=None): - """Internal utility to compute/validate a model's input shape. - - # Arguments - input_shape: Either None (will return the default network input shape), - or a user-provided shape to be validated. - default_size: Default input width/height for the model. - min_size: Minimum input width/height accepted by the model. - data_format: Image data format to use. - require_flatten: Whether the model is expected to - be linked to a classifier via a Flatten layer. - weights: One of `None` (random initialization) - or 'imagenet' (pre-training on ImageNet). - If weights='imagenet' input channels must be equal to 3. - - # Returns - An integer shape tuple (may include None entries). - - # Raises - ValueError: In case of invalid argument values. - """ - if weights != 'imagenet' and input_shape and len(input_shape) == 3: - if data_format == 'channels_first': - if input_shape[0] not in {1, 3}: - warnings.warn( - 'This model usually expects 1 or 3 input channels. ' - 'However, it was passed an input_shape with ' + - str(input_shape[0]) + ' input channels.') - default_shape = (input_shape[0], default_size, default_size) - else: - if input_shape[-1] not in {1, 3}: - warnings.warn( - 'This model usually expects 1 or 3 input channels. ' - 'However, it was passed an input_shape with ' + - str(input_shape[-1]) + ' input channels.') - default_shape = (default_size, default_size, input_shape[-1]) - else: - if data_format == 'channels_first': - default_shape = (3, default_size, default_size) - else: - default_shape = (default_size, default_size, 3) - if weights == 'imagenet' and require_flatten: - if input_shape is not None: - if input_shape != default_shape: - raise ValueError('When setting`include_top=True` ' - 'and loading `imagenet` weights, ' - '`input_shape` should be ' + - str(default_shape) + '.') - return default_shape - if input_shape: - if data_format == 'channels_first': - if input_shape is not None: - if len(input_shape) != 3: - raise ValueError( - '`input_shape` must be a tuple of three integers.') - if input_shape[0] != 3 and weights == 'imagenet': - raise ValueError('The input must have 3 channels; got ' - '`input_shape=' + str(input_shape) + '`') - if ((input_shape[1] is not None and input_shape[1] < min_size) or - (input_shape[2] is not None and input_shape[2] < min_size)): - raise ValueError('Input size must be at least ' + - str(min_size) + 'x' + str(min_size) + '; got ' - '`input_shape=' + str(input_shape) + '`') - else: - if input_shape is not None: - if len(input_shape) != 3: - raise ValueError( - '`input_shape` must be a tuple of three integers.') - if input_shape[-1] != 3 and weights == 'imagenet': - raise ValueError('The input must have 3 channels; got ' - '`input_shape=' + str(input_shape) + '`') - if ((input_shape[0] is not None and input_shape[0] < min_size) or - (input_shape[1] is not None and input_shape[1] < min_size)): - raise ValueError('Input size must be at least ' + - str(min_size) + 'x' + str(min_size) + '; got ' - '`input_shape=' + str(input_shape) + '`') - else: - if require_flatten: - input_shape = default_shape - else: - if data_format == 'channels_first': - input_shape = (3, None, None) - else: - input_shape = (None, None, 3) - if require_flatten: - if None in input_shape: - raise ValueError('If `include_top` is True, ' - 'you should specify a static `input_shape`. ' - 'Got `input_shape=' + str(input_shape) + '`') - return input_shape +preprocess_input = imagenet_utils.preprocess_input +decode_predictions = imagenet_utils.decode_predictions diff --git a/tests/integration_tests/imagenet_utils_test.py b/tests/integration_tests/imagenet_utils_test.py index 91481203a60..e1f663726ff 100644 --- a/tests/integration_tests/imagenet_utils_test.py +++ b/tests/integration_tests/imagenet_utils_test.py @@ -108,117 +108,5 @@ def test_decode_predictions(): utils.decode_predictions(np.ones((2, 100))) -def test_obtain_input_shape(): - # input_shape and default_size are not identical. - with pytest.raises(ValueError): - utils._obtain_input_shape( - input_shape=(224, 224, 3), - default_size=299, - min_size=139, - data_format='channels_last', - require_flatten=True, - weights='imagenet') - - # Test invalid use cases - for data_format in ['channels_last', 'channels_first']: - - # test warning - shape = (139, 139) - input_shape = shape + (99,) if data_format == 'channels_last' else (99,) + shape - with pytest.warns(UserWarning): - utils._obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False, - weights='fake_weights') - - # input_shape is smaller than min_size. - shape = (100, 100) - input_shape = shape + (3,) if data_format == 'channels_last' else (3,) + shape - with pytest.raises(ValueError): - utils._obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False) - - # shape is 1D. - shape = (100,) - input_shape = shape + (3,) if data_format == 'channels_last' else (3,) + shape - with pytest.raises(ValueError): - utils._obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False) - - # the number of channels is 5 not 3. - shape = (100, 100) - input_shape = shape + (5,) if data_format == 'channels_last' else (5,) + shape - with pytest.raises(ValueError): - utils._obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False) - - # require_flatten=True with dynamic input shape. - with pytest.raises(ValueError): - utils._obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=True) - - # test include top - assert utils._obtain_input_shape( - input_shape=(3, 200, 200), - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=True) == (3, 200, 200) - - assert utils._obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_last', - require_flatten=False) == (None, None, 3) - - assert utils._obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=False) == (3, None, None) - - assert utils._obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_last', - require_flatten=False) == (None, None, 3) - - assert utils._obtain_input_shape( - input_shape=(150, 150, 3), - default_size=None, - min_size=139, - data_format='channels_last', - require_flatten=False) == (150, 150, 3) - - assert utils._obtain_input_shape( - input_shape=(3, None, None), - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=False) == (3, None, None) - - if __name__ == '__main__': pytest.main([__file__])