From 28ac71eaa276524aa39d85a8e4f482ce77e4ac41 Mon Sep 17 00:00:00 2001 From: Yuqi Li Date: Fri, 10 Apr 2020 01:09:53 -0700 Subject: [PATCH] Add CustomModel to be the base class for QA task. PiperOrigin-RevId: 305842035 --- .../core/task/classification_model.py | 107 ++---------- .../core/task/classification_model_test.py | 70 ++++++++ .../model_maker/core/task/custom_model.py | 155 ++++++++++++++++++ .../core/task/custom_model_test.py | 119 ++++++++++++++ .../model_maker/core/task/text_classifier.py | 6 - .../lite/model_maker/core/test_util.py | 34 +++- 6 files changed, 387 insertions(+), 104 deletions(-) create mode 100644 tensorflow_examples/lite/model_maker/core/task/classification_model_test.py create mode 100644 tensorflow_examples/lite/model_maker/core/task/custom_model.py create mode 100644 tensorflow_examples/lite/model_maker/core/task/custom_model_test.py diff --git a/tensorflow_examples/lite/model_maker/core/task/classification_model.py b/tensorflow_examples/lite/model_maker/core/task/classification_model.py index ad3e44a657d..06937984025 100644 --- a/tensorflow_examples/lite/model_maker/core/task/classification_model.py +++ b/tensorflow_examples/lite/model_maker/core/task/classification_model.py @@ -11,35 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Custom model that is already retained by data.""" +"""Custom classification model that is already retained by data.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import abc -import os -import tempfile import numpy as np -import tensorflow as tf -from tensorflow_examples.lite.model_maker.core import compat +import tensorflow.compat.v2 as tf from tensorflow_examples.lite.model_maker.core import model_export_format as mef +from tensorflow_examples.lite.model_maker.core.task import custom_model -DEFAULT_QUANTIZATION_STEPS = 2000 - -def get_representative_dataset_gen(dataset, num_steps): - - def representative_dataset_gen(): - """Generates representative dataset for quantized.""" - for image, _ in dataset.take(num_steps): - yield [image] - - return representative_dataset_gen - - -class ClassificationModel(abc.ABC): +class ClassificationModel(custom_model.CustomModel): """"The abstract base class that represents a Tensorflow classification model.""" def __init__(self, model_export_format, model_spec, index_to_label, @@ -60,28 +45,11 @@ def __init__(self, model_export_format, model_spec, index_to_label, raise ValueError('Model export format %s is not supported currently.' % str(model_export_format)) - self.model_export_format = model_export_format - self.model_spec = model_spec + super(ClassificationModel, self).__init__(model_export_format, model_spec, + shuffle) self.index_to_label = index_to_label self.num_classes = num_classes - self.shuffle = shuffle self.train_whole_model = train_whole_model - self.model = None - - @abc.abstractmethod - def preprocess(self, sample_data, label): - return - - @abc.abstractmethod - def train(self, train_data, validation_data=None, **kwargs): - return - - @abc.abstractmethod - def export(self, **kwargs): - return - - def summary(self): - self.model.summary() def evaluate(self, data, batch_size=32): """Evaluates the model. @@ -122,31 +90,6 @@ def predict_top_k(self, data, k=1, batch_size=32): return label_prob - def _gen_dataset(self, - data, - batch_size=32, - is_training=True, - input_pipeline_context=None): - """Generates training / validation dataset.""" - # The dataset is always sharded by number of hosts. - # num_input_pipelines is the number of hosts rather than number of cores. - ds = data.dataset - if input_pipeline_context and input_pipeline_context.num_input_pipelines > 1: - ds = ds.shard(input_pipeline_context.num_input_pipelines, - input_pipeline_context.input_pipeline_id) - - ds = ds.map( - self.preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE) - - if is_training: - if self.shuffle: - ds = ds.shuffle(buffer_size=min(data.size, 100)) - ds = ds.repeat() - - ds = ds.batch(batch_size) - ds = ds.prefetch(tf.data.experimental.AUTOTUNE) - return ds - def _export_tflite(self, tflite_filename, label_filename, @@ -164,41 +107,11 @@ def _export_tflite(self, representative_data: Representative data used for post-training quantization. Used only if `quantized` is True. """ - temp_dir = None - if compat.get_tf_behavior() == 1: - temp_dir = tempfile.TemporaryDirectory() - save_path = os.path.join(temp_dir.name, 'saved_model') - self.model.save(save_path, include_optimizer=False, save_format='tf') - converter = tf.compat.v1.lite.TFLiteConverter.from_saved_model(save_path) - else: - converter = tf.lite.TFLiteConverter.from_keras_model(self.model) - - if quantized: - if quantization_steps is None: - quantization_steps = DEFAULT_QUANTIZATION_STEPS - if representative_data is None: - raise ValueError( - 'representative_data couldn\'t be None if model is quantized.') - ds = self._gen_dataset( - representative_data, batch_size=1, is_training=False) - converter.representative_dataset = tf.lite.RepresentativeDataset( - get_representative_dataset_gen(ds, quantization_steps)) - - converter.optimizations = [tf.lite.Optimize.DEFAULT] - converter.inference_input_type = tf.uint8 - converter.inference_output_type = tf.uint8 - converter.target_spec.supported_ops = [ - tf.lite.OpsSet.TFLITE_BUILTINS_INT8 - ] - tflite_model = converter.convert() - if temp_dir: - temp_dir.cleanup() - - with tf.io.gfile.GFile(tflite_filename, 'wb') as f: - f.write(tflite_model) + super(ClassificationModel, + self)._export_tflite(tflite_filename, quantized, quantization_steps, + representative_data) with tf.io.gfile.GFile(label_filename, 'w') as f: f.write('\n'.join(self.index_to_label)) - tf.compat.v1.logging.info('Export to tflite model %s, saved labels in %s.', - tflite_filename, label_filename) + tf.compat.v1.logging.info('Saved labels in %s.', label_filename) diff --git a/tensorflow_examples/lite/model_maker/core/task/classification_model_test.py b/tensorflow_examples/lite/model_maker/core/task/classification_model_test.py new file mode 100644 index 00000000000..1666019a35f --- /dev/null +++ b/tensorflow_examples/lite/model_maker/core/task/classification_model_test.py @@ -0,0 +1,70 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow.compat.v2 as tf +from tensorflow_examples.lite.model_maker.core import model_export_format as mef +from tensorflow_examples.lite.model_maker.core import test_util +from tensorflow_examples.lite.model_maker.core.task import classification_model + + +class MockClassificationModel(classification_model.ClassificationModel): + + def train(self, train_data, validation_data=None, **kwargs): + pass + + def export(self, **kwargs): + pass + + def evaluate(self, data, **kwargs): + pass + + +class ClassificationModelTest(tf.test.TestCase): + + def test_predict_top_k(self): + input_shape = [24, 24, 3] + num_classes = 2 + model = MockClassificationModel( + model_export_format=mef.ModelExportFormat.TFLITE, + model_spec=None, + index_to_label=['pos', 'neg'], + num_classes=2, + train_whole_model=False, + shuffle=False) + model.model = test_util.build_model(input_shape, num_classes) + data = test_util.get_dataloader(2, input_shape, num_classes) + + topk_results = model.predict_top_k(data, k=2, batch_size=1) + for topk_result in topk_results: + top1_result, top2_result = topk_result[0], topk_result[1] + top1_label, top1_prob = top1_result[0], top1_result[1] + top2_label, top2_prob = top2_result[0], top2_result[1] + + self.assertIn(top1_label, model.index_to_label) + self.assertIn(top2_label, model.index_to_label) + self.assertNotEqual(top1_label, top2_label) + + self.assertLessEqual(top1_prob, 1) + self.assertGreaterEqual(top1_prob, top2_prob) + self.assertGreaterEqual(top2_prob, 0) + + self.assertEqual(top1_prob + top2_prob, 1.0) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_examples/lite/model_maker/core/task/custom_model.py b/tensorflow_examples/lite/model_maker/core/task/custom_model.py new file mode 100644 index 00000000000..ce70fd42de5 --- /dev/null +++ b/tensorflow_examples/lite/model_maker/core/task/custom_model.py @@ -0,0 +1,155 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Base custom model that is already retained by data.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc +import os +import tempfile + +import tensorflow.compat.v2 as tf +from tensorflow_examples.lite.model_maker.core import compat +from tensorflow_examples.lite.model_maker.core import model_export_format as mef + +DEFAULT_QUANTIZATION_STEPS = 2000 + + +def get_representative_dataset_gen(dataset, num_steps): + + def representative_dataset_gen(): + """Generates representative dataset for quantized.""" + for image, _ in dataset.take(num_steps): + yield [image] + + return representative_dataset_gen + + +class CustomModel(abc.ABC): + """"The abstract base class that represents a Tensorflow classification model.""" + + def __init__(self, model_export_format, model_spec, shuffle): + """Initialize a instance with data, deploy mode and other related parameters. + + Args: + model_export_format: Model export format such as saved_model / tflite. + model_spec: Specification for the model. + shuffle: Whether the data should be shuffled. + """ + if model_export_format != mef.ModelExportFormat.TFLITE: + raise ValueError('Model export format %s is not supported currently.' % + str(model_export_format)) + + self.model_export_format = model_export_format + self.model_spec = model_spec + self.shuffle = shuffle + self.model = None + + def preprocess(self, sample_data, label): + """Preprocess the data.""" + # TODO(yuqili): remove this method once preprocess for image classifier is + # also moved to DataLoader part. + return sample_data, label + + @abc.abstractmethod + def train(self, train_data, validation_data=None, **kwargs): + return + + @abc.abstractmethod + def export(self, **kwargs): + return + + def summary(self): + self.model.summary() + + @abc.abstractmethod + def evaluate(self, data, **kwargs): + return + + def _gen_dataset(self, + data, + batch_size=32, + is_training=True, + input_pipeline_context=None): + """Generates training / validation dataset.""" + # The dataset is always sharded by number of hosts. + # num_input_pipelines is the number of hosts rather than number of cores. + ds = data.dataset + if input_pipeline_context and input_pipeline_context.num_input_pipelines > 1: + ds = ds.shard(input_pipeline_context.num_input_pipelines, + input_pipeline_context.input_pipeline_id) + + ds = ds.map( + self.preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE) + + if is_training: + if self.shuffle: + ds = ds.shuffle(buffer_size=min(data.size, 100)) + ds = ds.repeat() + + ds = ds.batch(batch_size) + ds = ds.prefetch(tf.data.experimental.AUTOTUNE) + return ds + + def _export_tflite(self, + tflite_filename, + quantized=False, + quantization_steps=None, + representative_data=None): + """Converts the retrained model to tflite format and saves it. + + Args: + tflite_filename: File name to save tflite model. + quantized: boolean, if True, save quantized model. + quantization_steps: Number of post-training quantization calibration steps + to run. Used only if `quantized` is True. + representative_data: Representative data used for post-training + quantization. Used only if `quantized` is True. + """ + temp_dir = None + if compat.get_tf_behavior() == 1: + temp_dir = tempfile.TemporaryDirectory() + save_path = os.path.join(temp_dir.name, 'saved_model') + self.model.save(save_path, include_optimizer=False, save_format='tf') + converter = tf.compat.v1.lite.TFLiteConverter.from_saved_model(save_path) + else: + converter = tf.lite.TFLiteConverter.from_keras_model(self.model) + + if quantized: + if quantization_steps is None: + quantization_steps = DEFAULT_QUANTIZATION_STEPS + if representative_data is None: + raise ValueError( + 'representative_data couldn\'t be None if model is quantized.') + ds = self._gen_dataset( + representative_data, batch_size=1, is_training=False) + converter.representative_dataset = tf.lite.RepresentativeDataset( + get_representative_dataset_gen(ds, quantization_steps)) + + converter.optimizations = [tf.lite.Optimize.DEFAULT] + converter.inference_input_type = tf.uint8 + converter.inference_output_type = tf.uint8 + converter.target_spec.supported_ops = [ + tf.lite.OpsSet.TFLITE_BUILTINS_INT8 + ] + tflite_model = converter.convert() + if temp_dir: + temp_dir.cleanup() + + with tf.io.gfile.GFile(tflite_filename, 'wb') as f: + f.write(tflite_model) + + tf.compat.v1.logging.info('Export to tflite model in %s.', tflite_filename) diff --git a/tensorflow_examples/lite/model_maker/core/task/custom_model_test.py b/tensorflow_examples/lite/model_maker/core/task/custom_model_test.py new file mode 100644 index 00000000000..4fac96ba2fc --- /dev/null +++ b/tensorflow_examples/lite/model_maker/core/task/custom_model_test.py @@ -0,0 +1,119 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import numpy as np +import tensorflow.compat.v2 as tf +from tensorflow_examples.lite.model_maker.core import model_export_format as mef +from tensorflow_examples.lite.model_maker.core import test_util +from tensorflow_examples.lite.model_maker.core.task import custom_model + + +class MockCustomModel(custom_model.CustomModel): + + def train(self, train_data, validation_data=None, **kwargs): + pass + + def export(self, **kwargs): + pass + + def evaluate(self, data, **kwargs): + pass + + +class CustomModelTest(tf.test.TestCase): + + def setUp(self): + super(CustomModelTest, self).setUp() + self.model = MockCustomModel( + model_export_format=mef.ModelExportFormat.TFLITE, + model_spec=None, + shuffle=False) + + def test_gen_dataset(self): + input_dim = 8 + data = test_util.get_dataloader( + data_size=2, input_shape=[input_dim], num_classes=2) + + ds = self.model._gen_dataset(data, batch_size=1, is_training=False) + expected = list(data.dataset.as_numpy_iterator()) + for i, (feature, label) in enumerate(ds): + expected_feature = [expected[i][0]] + expected_label = [expected[i][1]] + self.assertTrue((feature.numpy() == expected_feature).any()) + self.assertEqual(label.numpy(), expected_label) + + def test_export_tflite(self): + input_dim = 4 + self.model.model = test_util.build_model( + input_shape=[input_dim], num_classes=2) + tflite_file = os.path.join(self.get_temp_dir(), 'model.tflite') + self.model._export_tflite(tflite_file) + self._test_tflite(self.model.model, tflite_file, input_dim) + + def test_export_tflite_quantized(self): + input_dim = 4 + num_classes = 2 + max_input_value = 5 + self.model.model = test_util.build_model([input_dim], num_classes) + tflite_file = os.path.join(self.get_temp_dir(), 'model_quantized.tflite') + self.model._export_tflite( + tflite_file, + quantized=True, + quantization_steps=1, + representative_data=test_util.get_dataloader( + data_size=1, + input_shape=[input_dim], + num_classes=num_classes, + max_input_value=max_input_value)) + self._test_tflite( + self.model.model, tflite_file, input_dim, max_input_value, atol=1e-01) + + def _test_tflite(self, + keras_model, + tflite_model_file, + input_dim, + max_input_value=1000, + atol=1e-04): + with tf.io.gfile.GFile(tflite_model_file, 'rb') as f: + tflite_model = f.read() + + random_input = tf.random.uniform( + shape=(1, input_dim), + minval=0, + maxval=max_input_value, + dtype=tf.float32) + + # Gets output from keras model. + keras_output = keras_model.predict(random_input) + + # Gets output from tflite model. + interpreter = tf.lite.Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + interpreter.set_tensor(interpreter.get_input_details()[0]['index'], + random_input) + interpreter.invoke() + lite_output = interpreter.get_tensor( + interpreter.get_output_details()[0]['index']) + + self.assertTrue(np.allclose(lite_output, keras_output, atol=atol)) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow_examples/lite/model_maker/core/task/text_classifier.py b/tensorflow_examples/lite/model_maker/core/task/text_classifier.py index 8ec0133de0a..8fe69b8ede2 100644 --- a/tensorflow_examples/lite/model_maker/core/task/text_classifier.py +++ b/tensorflow_examples/lite/model_maker/core/task/text_classifier.py @@ -90,12 +90,6 @@ def __init__(self, shuffle, train_whole_model=True) - def preprocess(self, raw_text, label): - """Preprocess the text.""" - # TODO(yuqili): remove this method once preprocess for image classifier is - # also moved to DataLoader part. - return raw_text, label - def get_dataset_fn(self, input_data, global_batch_size, is_training): """Gets a closure to create a dataset.""" diff --git a/tensorflow_examples/lite/model_maker/core/test_util.py b/tensorflow_examples/lite/model_maker/core/test_util.py index 2d3e92b4196..149fcb2ce81 100644 --- a/tensorflow_examples/lite/model_maker/core/test_util.py +++ b/tensorflow_examples/lite/model_maker/core/test_util.py @@ -21,8 +21,9 @@ from absl import flags -import tensorflow as tf +import tensorflow.compat.v2 as tf from tensorflow_examples.lite.model_maker.core import compat +from tensorflow_examples.lite.model_maker.core.data_util import dataloader FLAGS = flags.FLAGS @@ -86,3 +87,34 @@ def decorator(*args, **kwargs): fn(*args, **kwargs) return decorator + + +def build_model(input_shape, num_classes): + """Builds a simple model for test.""" + inputs = tf.keras.layers.Input(shape=input_shape) + if len(input_shape) == 3: # Image inputs. + outputs = tf.keras.layers.GlobalAveragePooling2D()(inputs) + outputs = tf.keras.layers.Dense(num_classes, activation="softmax")(outputs) + elif len(input_shape) == 1: # Text inputs. + outputs = tf.keras.layers.Dense(num_classes, activation="softmax")(inputs) + else: + raise ValueError("Model inputs should be 2D tensor or 4D tensor.") + + model = tf.keras.Model(inputs=inputs, outputs=outputs) + return model + + +def get_dataloader(data_size, input_shape, num_classes, max_input_value=1000): + """Gets a simple `DataLoader` object for test.""" + features = tf.random.uniform( + shape=[data_size] + input_shape, + minval=0, + maxval=max_input_value, + dtype=tf.float32) + + labels = tf.random.uniform( + shape=[data_size], minval=0, maxval=num_classes, dtype=tf.int32) + + ds = tf.data.Dataset.from_tensor_slices((features, labels)) + data = dataloader.DataLoader(ds, data_size) + return data