From 28ac71eaa276524aa39d85a8e4f482ce77e4ac41 Mon Sep 17 00:00:00 2001
From: Yuqi Li <yuqili@google.com>
Date: Fri, 10 Apr 2020 01:09:53 -0700
Subject: [PATCH] Add CustomModel to be the base class for QA task.

PiperOrigin-RevId: 305842035
---
 .../core/task/classification_model.py         | 107 ++----------
 .../core/task/classification_model_test.py    |  70 ++++++++
 .../model_maker/core/task/custom_model.py     | 155 ++++++++++++++++++
 .../core/task/custom_model_test.py            | 119 ++++++++++++++
 .../model_maker/core/task/text_classifier.py  |   6 -
 .../lite/model_maker/core/test_util.py        |  34 +++-
 6 files changed, 387 insertions(+), 104 deletions(-)
 create mode 100644 tensorflow_examples/lite/model_maker/core/task/classification_model_test.py
 create mode 100644 tensorflow_examples/lite/model_maker/core/task/custom_model.py
 create mode 100644 tensorflow_examples/lite/model_maker/core/task/custom_model_test.py

diff --git a/tensorflow_examples/lite/model_maker/core/task/classification_model.py b/tensorflow_examples/lite/model_maker/core/task/classification_model.py
index ad3e44a657d..06937984025 100644
--- a/tensorflow_examples/lite/model_maker/core/task/classification_model.py
+++ b/tensorflow_examples/lite/model_maker/core/task/classification_model.py
@@ -11,35 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Custom model that is already retained by data."""
+"""Custom classification model that is already retained by data."""
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import abc
-import os
-import tempfile
 
 import numpy as np
-import tensorflow as tf
-from tensorflow_examples.lite.model_maker.core import compat
+import tensorflow.compat.v2 as tf
 from tensorflow_examples.lite.model_maker.core import model_export_format as mef
+from tensorflow_examples.lite.model_maker.core.task import custom_model
 
-DEFAULT_QUANTIZATION_STEPS = 2000
 
-
-def get_representative_dataset_gen(dataset, num_steps):
-
-  def representative_dataset_gen():
-    """Generates representative dataset for quantized."""
-    for image, _ in dataset.take(num_steps):
-      yield [image]
-
-  return representative_dataset_gen
-
-
-class ClassificationModel(abc.ABC):
+class ClassificationModel(custom_model.CustomModel):
   """"The abstract base class that represents a Tensorflow classification model."""
 
   def __init__(self, model_export_format, model_spec, index_to_label,
@@ -60,28 +45,11 @@ def __init__(self, model_export_format, model_spec, index_to_label,
       raise ValueError('Model export format %s is not supported currently.' %
                        str(model_export_format))
 
-    self.model_export_format = model_export_format
-    self.model_spec = model_spec
+    super(ClassificationModel, self).__init__(model_export_format, model_spec,
+                                              shuffle)
     self.index_to_label = index_to_label
     self.num_classes = num_classes
-    self.shuffle = shuffle
     self.train_whole_model = train_whole_model
-    self.model = None
-
-  @abc.abstractmethod
-  def preprocess(self, sample_data, label):
-    return
-
-  @abc.abstractmethod
-  def train(self, train_data, validation_data=None, **kwargs):
-    return
-
-  @abc.abstractmethod
-  def export(self, **kwargs):
-    return
-
-  def summary(self):
-    self.model.summary()
 
   def evaluate(self, data, batch_size=32):
     """Evaluates the model.
@@ -122,31 +90,6 @@ def predict_top_k(self, data, k=1, batch_size=32):
 
     return label_prob
 
-  def _gen_dataset(self,
-                   data,
-                   batch_size=32,
-                   is_training=True,
-                   input_pipeline_context=None):
-    """Generates training / validation dataset."""
-    # The dataset is always sharded by number of hosts.
-    # num_input_pipelines is the number of hosts rather than number of cores.
-    ds = data.dataset
-    if input_pipeline_context and input_pipeline_context.num_input_pipelines > 1:
-      ds = ds.shard(input_pipeline_context.num_input_pipelines,
-                    input_pipeline_context.input_pipeline_id)
-
-    ds = ds.map(
-        self.preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
-
-    if is_training:
-      if self.shuffle:
-        ds = ds.shuffle(buffer_size=min(data.size, 100))
-      ds = ds.repeat()
-
-    ds = ds.batch(batch_size)
-    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
-    return ds
-
   def _export_tflite(self,
                      tflite_filename,
                      label_filename,
@@ -164,41 +107,11 @@ def _export_tflite(self,
       representative_data: Representative data used for post-training
         quantization. Used only if `quantized` is True.
     """
-    temp_dir = None
-    if compat.get_tf_behavior() == 1:
-      temp_dir = tempfile.TemporaryDirectory()
-      save_path = os.path.join(temp_dir.name, 'saved_model')
-      self.model.save(save_path, include_optimizer=False, save_format='tf')
-      converter = tf.compat.v1.lite.TFLiteConverter.from_saved_model(save_path)
-    else:
-      converter = tf.lite.TFLiteConverter.from_keras_model(self.model)
-
-    if quantized:
-      if quantization_steps is None:
-        quantization_steps = DEFAULT_QUANTIZATION_STEPS
-      if representative_data is None:
-        raise ValueError(
-            'representative_data couldn\'t be None if model is quantized.')
-      ds = self._gen_dataset(
-          representative_data, batch_size=1, is_training=False)
-      converter.representative_dataset = tf.lite.RepresentativeDataset(
-          get_representative_dataset_gen(ds, quantization_steps))
-
-      converter.optimizations = [tf.lite.Optimize.DEFAULT]
-      converter.inference_input_type = tf.uint8
-      converter.inference_output_type = tf.uint8
-      converter.target_spec.supported_ops = [
-          tf.lite.OpsSet.TFLITE_BUILTINS_INT8
-      ]
-    tflite_model = converter.convert()
-    if temp_dir:
-      temp_dir.cleanup()
-
-    with tf.io.gfile.GFile(tflite_filename, 'wb') as f:
-      f.write(tflite_model)
+    super(ClassificationModel,
+          self)._export_tflite(tflite_filename, quantized, quantization_steps,
+                               representative_data)
 
     with tf.io.gfile.GFile(label_filename, 'w') as f:
       f.write('\n'.join(self.index_to_label))
 
-    tf.compat.v1.logging.info('Export to tflite model %s, saved labels in %s.',
-                              tflite_filename, label_filename)
+    tf.compat.v1.logging.info('Saved labels in %s.', label_filename)
diff --git a/tensorflow_examples/lite/model_maker/core/task/classification_model_test.py b/tensorflow_examples/lite/model_maker/core/task/classification_model_test.py
new file mode 100644
index 00000000000..1666019a35f
--- /dev/null
+++ b/tensorflow_examples/lite/model_maker/core/task/classification_model_test.py
@@ -0,0 +1,70 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.compat.v2 as tf
+from tensorflow_examples.lite.model_maker.core import model_export_format as mef
+from tensorflow_examples.lite.model_maker.core import test_util
+from tensorflow_examples.lite.model_maker.core.task import classification_model
+
+
+class MockClassificationModel(classification_model.ClassificationModel):
+
+  def train(self, train_data, validation_data=None, **kwargs):
+    pass
+
+  def export(self, **kwargs):
+    pass
+
+  def evaluate(self, data, **kwargs):
+    pass
+
+
+class ClassificationModelTest(tf.test.TestCase):
+
+  def test_predict_top_k(self):
+    input_shape = [24, 24, 3]
+    num_classes = 2
+    model = MockClassificationModel(
+        model_export_format=mef.ModelExportFormat.TFLITE,
+        model_spec=None,
+        index_to_label=['pos', 'neg'],
+        num_classes=2,
+        train_whole_model=False,
+        shuffle=False)
+    model.model = test_util.build_model(input_shape, num_classes)
+    data = test_util.get_dataloader(2, input_shape, num_classes)
+
+    topk_results = model.predict_top_k(data, k=2, batch_size=1)
+    for topk_result in topk_results:
+      top1_result, top2_result = topk_result[0], topk_result[1]
+      top1_label, top1_prob = top1_result[0], top1_result[1]
+      top2_label, top2_prob = top2_result[0], top2_result[1]
+
+      self.assertIn(top1_label, model.index_to_label)
+      self.assertIn(top2_label, model.index_to_label)
+      self.assertNotEqual(top1_label, top2_label)
+
+      self.assertLessEqual(top1_prob, 1)
+      self.assertGreaterEqual(top1_prob, top2_prob)
+      self.assertGreaterEqual(top2_prob, 0)
+
+      self.assertEqual(top1_prob + top2_prob, 1.0)
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow_examples/lite/model_maker/core/task/custom_model.py b/tensorflow_examples/lite/model_maker/core/task/custom_model.py
new file mode 100644
index 00000000000..ce70fd42de5
--- /dev/null
+++ b/tensorflow_examples/lite/model_maker/core/task/custom_model.py
@@ -0,0 +1,155 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Base custom model that is already retained by data."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+import os
+import tempfile
+
+import tensorflow.compat.v2 as tf
+from tensorflow_examples.lite.model_maker.core import compat
+from tensorflow_examples.lite.model_maker.core import model_export_format as mef
+
+DEFAULT_QUANTIZATION_STEPS = 2000
+
+
+def get_representative_dataset_gen(dataset, num_steps):
+
+  def representative_dataset_gen():
+    """Generates representative dataset for quantized."""
+    for image, _ in dataset.take(num_steps):
+      yield [image]
+
+  return representative_dataset_gen
+
+
+class CustomModel(abc.ABC):
+  """"The abstract base class that represents a Tensorflow classification model."""
+
+  def __init__(self, model_export_format, model_spec, shuffle):
+    """Initialize a instance with data, deploy mode and other related parameters.
+
+    Args:
+      model_export_format: Model export format such as saved_model / tflite.
+      model_spec: Specification for the model.
+      shuffle: Whether the data should be shuffled.
+    """
+    if model_export_format != mef.ModelExportFormat.TFLITE:
+      raise ValueError('Model export format %s is not supported currently.' %
+                       str(model_export_format))
+
+    self.model_export_format = model_export_format
+    self.model_spec = model_spec
+    self.shuffle = shuffle
+    self.model = None
+
+  def preprocess(self, sample_data, label):
+    """Preprocess the data."""
+    # TODO(yuqili): remove this method once preprocess for image classifier is
+    # also moved to DataLoader part.
+    return sample_data, label
+
+  @abc.abstractmethod
+  def train(self, train_data, validation_data=None, **kwargs):
+    return
+
+  @abc.abstractmethod
+  def export(self, **kwargs):
+    return
+
+  def summary(self):
+    self.model.summary()
+
+  @abc.abstractmethod
+  def evaluate(self, data, **kwargs):
+    return
+
+  def _gen_dataset(self,
+                   data,
+                   batch_size=32,
+                   is_training=True,
+                   input_pipeline_context=None):
+    """Generates training / validation dataset."""
+    # The dataset is always sharded by number of hosts.
+    # num_input_pipelines is the number of hosts rather than number of cores.
+    ds = data.dataset
+    if input_pipeline_context and input_pipeline_context.num_input_pipelines > 1:
+      ds = ds.shard(input_pipeline_context.num_input_pipelines,
+                    input_pipeline_context.input_pipeline_id)
+
+    ds = ds.map(
+        self.preprocess, num_parallel_calls=tf.data.experimental.AUTOTUNE)
+
+    if is_training:
+      if self.shuffle:
+        ds = ds.shuffle(buffer_size=min(data.size, 100))
+      ds = ds.repeat()
+
+    ds = ds.batch(batch_size)
+    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
+    return ds
+
+  def _export_tflite(self,
+                     tflite_filename,
+                     quantized=False,
+                     quantization_steps=None,
+                     representative_data=None):
+    """Converts the retrained model to tflite format and saves it.
+
+    Args:
+      tflite_filename: File name to save tflite model.
+      quantized: boolean, if True, save quantized model.
+      quantization_steps: Number of post-training quantization calibration steps
+        to run. Used only if `quantized` is True.
+      representative_data: Representative data used for post-training
+        quantization. Used only if `quantized` is True.
+    """
+    temp_dir = None
+    if compat.get_tf_behavior() == 1:
+      temp_dir = tempfile.TemporaryDirectory()
+      save_path = os.path.join(temp_dir.name, 'saved_model')
+      self.model.save(save_path, include_optimizer=False, save_format='tf')
+      converter = tf.compat.v1.lite.TFLiteConverter.from_saved_model(save_path)
+    else:
+      converter = tf.lite.TFLiteConverter.from_keras_model(self.model)
+
+    if quantized:
+      if quantization_steps is None:
+        quantization_steps = DEFAULT_QUANTIZATION_STEPS
+      if representative_data is None:
+        raise ValueError(
+            'representative_data couldn\'t be None if model is quantized.')
+      ds = self._gen_dataset(
+          representative_data, batch_size=1, is_training=False)
+      converter.representative_dataset = tf.lite.RepresentativeDataset(
+          get_representative_dataset_gen(ds, quantization_steps))
+
+      converter.optimizations = [tf.lite.Optimize.DEFAULT]
+      converter.inference_input_type = tf.uint8
+      converter.inference_output_type = tf.uint8
+      converter.target_spec.supported_ops = [
+          tf.lite.OpsSet.TFLITE_BUILTINS_INT8
+      ]
+    tflite_model = converter.convert()
+    if temp_dir:
+      temp_dir.cleanup()
+
+    with tf.io.gfile.GFile(tflite_filename, 'wb') as f:
+      f.write(tflite_model)
+
+    tf.compat.v1.logging.info('Export to tflite model in %s.', tflite_filename)
diff --git a/tensorflow_examples/lite/model_maker/core/task/custom_model_test.py b/tensorflow_examples/lite/model_maker/core/task/custom_model_test.py
new file mode 100644
index 00000000000..4fac96ba2fc
--- /dev/null
+++ b/tensorflow_examples/lite/model_maker/core/task/custom_model_test.py
@@ -0,0 +1,119 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import numpy as np
+import tensorflow.compat.v2 as tf
+from tensorflow_examples.lite.model_maker.core import model_export_format as mef
+from tensorflow_examples.lite.model_maker.core import test_util
+from tensorflow_examples.lite.model_maker.core.task import custom_model
+
+
+class MockCustomModel(custom_model.CustomModel):
+
+  def train(self, train_data, validation_data=None, **kwargs):
+    pass
+
+  def export(self, **kwargs):
+    pass
+
+  def evaluate(self, data, **kwargs):
+    pass
+
+
+class CustomModelTest(tf.test.TestCase):
+
+  def setUp(self):
+    super(CustomModelTest, self).setUp()
+    self.model = MockCustomModel(
+        model_export_format=mef.ModelExportFormat.TFLITE,
+        model_spec=None,
+        shuffle=False)
+
+  def test_gen_dataset(self):
+    input_dim = 8
+    data = test_util.get_dataloader(
+        data_size=2, input_shape=[input_dim], num_classes=2)
+
+    ds = self.model._gen_dataset(data, batch_size=1, is_training=False)
+    expected = list(data.dataset.as_numpy_iterator())
+    for i, (feature, label) in enumerate(ds):
+      expected_feature = [expected[i][0]]
+      expected_label = [expected[i][1]]
+      self.assertTrue((feature.numpy() == expected_feature).any())
+      self.assertEqual(label.numpy(), expected_label)
+
+  def test_export_tflite(self):
+    input_dim = 4
+    self.model.model = test_util.build_model(
+        input_shape=[input_dim], num_classes=2)
+    tflite_file = os.path.join(self.get_temp_dir(), 'model.tflite')
+    self.model._export_tflite(tflite_file)
+    self._test_tflite(self.model.model, tflite_file, input_dim)
+
+  def test_export_tflite_quantized(self):
+    input_dim = 4
+    num_classes = 2
+    max_input_value = 5
+    self.model.model = test_util.build_model([input_dim], num_classes)
+    tflite_file = os.path.join(self.get_temp_dir(), 'model_quantized.tflite')
+    self.model._export_tflite(
+        tflite_file,
+        quantized=True,
+        quantization_steps=1,
+        representative_data=test_util.get_dataloader(
+            data_size=1,
+            input_shape=[input_dim],
+            num_classes=num_classes,
+            max_input_value=max_input_value))
+    self._test_tflite(
+        self.model.model, tflite_file, input_dim, max_input_value, atol=1e-01)
+
+  def _test_tflite(self,
+                   keras_model,
+                   tflite_model_file,
+                   input_dim,
+                   max_input_value=1000,
+                   atol=1e-04):
+    with tf.io.gfile.GFile(tflite_model_file, 'rb') as f:
+      tflite_model = f.read()
+
+    random_input = tf.random.uniform(
+        shape=(1, input_dim),
+        minval=0,
+        maxval=max_input_value,
+        dtype=tf.float32)
+
+    # Gets output from keras model.
+    keras_output = keras_model.predict(random_input)
+
+    # Gets output from tflite model.
+    interpreter = tf.lite.Interpreter(model_content=tflite_model)
+    interpreter.allocate_tensors()
+    interpreter.set_tensor(interpreter.get_input_details()[0]['index'],
+                           random_input)
+    interpreter.invoke()
+    lite_output = interpreter.get_tensor(
+        interpreter.get_output_details()[0]['index'])
+
+    self.assertTrue(np.allclose(lite_output, keras_output, atol=atol))
+
+
+if __name__ == '__main__':
+  tf.test.main()
diff --git a/tensorflow_examples/lite/model_maker/core/task/text_classifier.py b/tensorflow_examples/lite/model_maker/core/task/text_classifier.py
index 8ec0133de0a..8fe69b8ede2 100644
--- a/tensorflow_examples/lite/model_maker/core/task/text_classifier.py
+++ b/tensorflow_examples/lite/model_maker/core/task/text_classifier.py
@@ -90,12 +90,6 @@ def __init__(self,
         shuffle,
         train_whole_model=True)
 
-  def preprocess(self, raw_text, label):
-    """Preprocess the text."""
-    # TODO(yuqili): remove this method once preprocess for image classifier is
-    # also moved to DataLoader part.
-    return raw_text, label
-
   def get_dataset_fn(self, input_data, global_batch_size, is_training):
     """Gets a closure to create a dataset."""
 
diff --git a/tensorflow_examples/lite/model_maker/core/test_util.py b/tensorflow_examples/lite/model_maker/core/test_util.py
index 2d3e92b4196..149fcb2ce81 100644
--- a/tensorflow_examples/lite/model_maker/core/test_util.py
+++ b/tensorflow_examples/lite/model_maker/core/test_util.py
@@ -21,8 +21,9 @@
 
 from absl import flags
 
-import tensorflow as tf
+import tensorflow.compat.v2 as tf
 from tensorflow_examples.lite.model_maker.core import compat
+from tensorflow_examples.lite.model_maker.core.data_util import dataloader
 
 FLAGS = flags.FLAGS
 
@@ -86,3 +87,34 @@ def decorator(*args, **kwargs):
     fn(*args, **kwargs)
 
   return decorator
+
+
+def build_model(input_shape, num_classes):
+  """Builds a simple model for test."""
+  inputs = tf.keras.layers.Input(shape=input_shape)
+  if len(input_shape) == 3:  # Image inputs.
+    outputs = tf.keras.layers.GlobalAveragePooling2D()(inputs)
+    outputs = tf.keras.layers.Dense(num_classes, activation="softmax")(outputs)
+  elif len(input_shape) == 1:  # Text inputs.
+    outputs = tf.keras.layers.Dense(num_classes, activation="softmax")(inputs)
+  else:
+    raise ValueError("Model inputs should be 2D tensor or 4D tensor.")
+
+  model = tf.keras.Model(inputs=inputs, outputs=outputs)
+  return model
+
+
+def get_dataloader(data_size, input_shape, num_classes, max_input_value=1000):
+  """Gets a simple `DataLoader` object for test."""
+  features = tf.random.uniform(
+      shape=[data_size] + input_shape,
+      minval=0,
+      maxval=max_input_value,
+      dtype=tf.float32)
+
+  labels = tf.random.uniform(
+      shape=[data_size], minval=0, maxval=num_classes, dtype=tf.int32)
+
+  ds = tf.data.Dataset.from_tensor_slices((features, labels))
+  data = dataloader.DataLoader(ds, data_size)
+  return data