Adding the compatibility with python 3 and TF2. Providing a docker fi…

…le for training the model on GPU. Edit the run_local_gpu.sh with the location of your dataset. PiperOrigin-RevId: 291449218
Teora · Jan 24, 2020 · 3d912f3 · 3d912f3
1 parent 7157071
commit 3d912f3
Show file tree

Hide file tree

Showing 12 changed files with 86 additions and 74 deletions.
diff --git a/capsule_em/DockerfileGPU b/capsule_em/DockerfileGPU
@@ -0,0 +1,27 @@
+# --- Template docker image
+FROM nvidia/cuda:10.0-cudnn7-runtime
+
+# --- system packages (not in nvidia image)
+RUN apt-get update
+RUN apt-get -y install python3
+RUN apt-get -y install python3-pip
+
+# --- (unclear why necessary)
+WORKDIR /root
+
+# --- Installs packages
+RUN pip3 install --upgrade pip
+RUN pip3 install numpy
+RUN pip3 install matplotlib
+RUN pip3 install absl-py
+RUN pip3 install tqdm
+RUN pip3 install tensorflow-gpu==2.0.0-alpha0
+
+# --- Copies the source code
+RUN mkdir /root/capsule_em
+COPY ./*.py /root/capsule_em/
+COPY ./norb/ /root/capsule_em/norb/
+COPY ./mnist/ /root/capsule_em/mnist/
+
+# --- Defines the entry point
+ENTRYPOINT ["python3", "-m", "capsule_em.experiment"]
diff --git a/capsule_em/README.md b/capsule_em/README.md
@@ -23,3 +23,5 @@ To get 1.3 (1.4 in the paper), enable patching:
 ```
 python -m capsule_em.experiment  --train=0 --eval_once=1 --eval_size=24300 --ckpnt=$HOME/model.ckpt-1 --final_beta=0.01 --norb_data_dir=$HOME/smallNORB/ --patching=True
 ```
+
+A docker is now added to the repository. Please install docker with NVIDIA support. Then modify the run_local_gpu.sh with your smallNORB directory (-v option). Running ./run_local_gpu.sh will start training the model.
diff --git a/capsule_em/em_layers.py b/capsule_em/em_layers.py
@@ -302,12 +302,12 @@ def connector_capsule_mat(input_tensor,
           input_dim, input_shape[0], input_shape[3], input_shape[4],
           num_out_atoms, output_dim
       ])
-      wx_trans.set_shape((input_dim, None, input_tensor.get_shape()[3].value,
-                          input_tensor.get_shape()[4].value, num_out_atoms,
+      wx_trans.set_shape((input_dim, None, input_tensor.get_shape()[3],
+                          input_tensor.get_shape()[4], num_out_atoms,
                           output_dim))
       h, w, _ = position_grid.get_shape()
-      height = h.value
-      width = w.value
+      height = h
+      width = w
       # t_pose = tf.transpose(position_grid, [2, 0, 1])
       # t_pose_exp = tf.scatter_nd([[sqr_num_out_atoms -1],
       #   [2 * sqr_num_out_atoms - 1]], t_pose, [num_out_atoms, height, width])
@@ -387,13 +387,13 @@ def conv_capsule_mat(input_tensor,
           input_shape[0] * input_dim * in_atom_sq, input_shape[3],
           input_shape[4], 1
       ])
-      input_tensor_reshaped.set_shape((None, input_tensor.get_shape()[3].value,
-                                       input_tensor.get_shape()[4].value, 1))
+      input_tensor_reshaped.set_shape((None, input_tensor.get_shape()[3],
+                                       input_tensor.get_shape()[4], 1))
       input_act_reshaped = tf.reshape(
           input_activation,
           [input_shape[0] * input_dim, input_shape[3], input_shape[4], 1])
-      input_act_reshaped.set_shape((None, input_tensor.get_shape()[3].value,
-                                    input_tensor.get_shape()[4].value, 1))
+      input_act_reshaped.set_shape((None, input_tensor.get_shape()[3],
+                                    input_tensor.get_shape()[4], 1))
       print(input_tensor_reshaped.get_shape())
       # conv: [x*128,out*out_at, c3,c4]
       conv_patches = tf.extract_image_patches(
@@ -410,8 +410,8 @@ def conv_capsule_mat(input_tensor,
           rates=[1, 1, 1, 1],
           padding='VALID',
       )
-      o_height = (in_height.value - kernel_size) // stride + 1
-      o_width = (in_width.value - kernel_size) // stride + 1
+      o_height = (in_height - kernel_size) // stride + 1
+      o_width = (in_width - kernel_size) // stride + 1
       patches = tf.reshape(conv_patches,
                            (input_shape[0], input_dim, in_atom_sq, o_height,
                             o_width, kernel_size, kernel_size))
@@ -490,8 +490,8 @@ def primary_caps(conv, conv_dim, output_dim, out_atoms):
     conv_reshaped = tf.reshape(conv_caps, [
         conv_shape[0], output_dim, out_atoms + 1, conv_shape[2], conv_shape[3]
     ])
-    conv_reshaped.set_shape((None, output_dim, out_atoms + 1, c_height.value,
-                             c_width.value))
+    conv_reshaped.set_shape((None, output_dim, out_atoms + 1, c_height,
+                             c_width))
     conv_caps_center, conv_caps_logit = tf.split(
         conv_reshaped, [out_atoms, 1], axis=2)
     conv_caps_activation = tf.sigmoid(conv_caps_logit - 1.0)

diff --git a/capsule_em/em_model.py b/capsule_em/em_model.py
@@ -19,11 +19,10 @@
 from __future__ import division
 from __future__ import print_function
 import math
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
 from capsule_em import em_layers
 from capsule_em import simple_model
 from capsule_em import utils
-from tensorflow.contrib import layers as contrib_layers
 FLAGS = tf.app.flags.FLAGS
 
 
@@ -65,7 +64,7 @@ def _build_capsule(input_tensor, input_atom, position_grid, num_classes):
       print(conv_caps_center.get_shape())
       print(conv_caps_act.get_shape())
 
-  capsule1_act = contrib_layers.flatten(conv_caps_act)
+  capsule1_act = tf.layers.flatten(conv_caps_act)
 
   position_grid = tf.squeeze(position_grid, axis=[0])
   position_grid = tf.transpose(position_grid, [1, 2, 0])

diff --git a/capsule_em/experiment.py b/capsule_em/experiment.py
@@ -18,17 +18,15 @@
 from __future__ import division
 from __future__ import print_function
 import os
-import sys
 import time
 import numpy as np
 
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
 from capsule_em import model as f_model
 from capsule_em.mnist \
   import mnist_record
 from capsule_em.norb \
   import norb_record
-from tensorflow.contrib import tfprof as contrib_tfprof
 from tensorflow.python import debug as tf_debug
 
 FLAGS = tf.app.flags.FLAGS
@@ -136,7 +134,7 @@
 tf.app.flags.DEFINE_string('data_set', 'norb', 'the data set to use.')
 tf.app.flags.DEFINE_string('cifar_data_dir', '/tmp/cifar10_data',
                            """Path to the CIFAR-10 data directory.""")
-tf.app.flags.DEFINE_string('norb_data_dir', '/tmp/smallNORB/',
+tf.app.flags.DEFINE_string('norb_data_dir', '/root/datasets/smallNORB/',
                            """Path to the norb data directory.""")
 tf.app.flags.DEFINE_string('affnist_data_dir', '/tmp/affnist_data',
                            """Path to the affnist data directory.""")
@@ -159,7 +157,7 @@ def get_features(train, total_batch):
   batch_size = total_batch // max(1, FLAGS.num_gpus)
   split = 'train' if train else 'test'
   features = []
-  for i in xrange(FLAGS.num_gpus):
+  for i in range(FLAGS.num_gpus):
     with tf.device('/cpu:0'):
       with tf.name_scope('input_tower_%d' % (i)):
         if FLAGS.data_set == 'norb':
@@ -222,15 +220,7 @@ def run_training():
     model = f_model.multi_gpu_model
     print('so far so good!')
     result = model(features)
-    param_stats = contrib_tfprof.model_analyzer.print_model_analysis(
-        tf.get_default_graph(),
-        tfprof_options=contrib_tfprof.model_analyzer
-        .TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
-    sys.stdout.write('total_params: %d\n' % param_stats.total_parameters)
-
-    contrib_tfprof.model_analyzer.print_model_analysis(
-        tf.get_default_graph(),
-        tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
+
     merged = result['summary']
     train_step = result['train']
     # test_writer = tf.summary.FileWriter(FLAGS.summary_dir + '/test')

diff --git a/capsule_em/layers.py b/capsule_em/layers.py
@@ -20,7 +20,6 @@
 from __future__ import division
 from __future__ import print_function
 import tensorflow.compat.v1 as tf
-from tensorflow.contrib import metrics as contrib_metrics
 
 
 def margin_loss(labels, raw_logits, margin=0.4, downweight=0.5):
@@ -68,8 +67,7 @@ def optimizer(logits, labels, multi, scope, softmax, rate=1.0, step=0.0):
     with tf.name_scope('correct_prediction'):
       _, classes = tf.nn.top_k(labels, k=2 if multi else 1)
       _, preds = tf.nn.top_k(logits, k=2 if multi else 1)
-      wrong = contrib_metrics.set_size(
-          contrib_metrics.set_difference(classes, preds))
+      wrong = tf.sets.size(tf.sets.difference(classes, preds))
       correct_prediction = tf.equal(wrong, 0)
       almost_correct = tf.less(wrong, 2)
       correct_prediction_sum = tf.reduce_sum(

diff --git a/capsule_em/mnist/mnist_record.py b/capsule_em/mnist/mnist_record.py
@@ -22,9 +22,7 @@
 
 
 import os.path
-import random
-import tensorflow as tf
-from tensorflow.contrib import image as contrib_image
+import tensorflow.compat.v1 as tf
 
 
 def _read_and_decode(filename_queue, image_pixel=28, distort=0):
@@ -61,8 +59,8 @@ def _read_and_decode(filename_queue, image_pixel=28, distort=0):
     image = tf.reshape(image, [28, 28])
     image = tf.random_crop(image, [24, 24])
     # 0.26179938779 is 15 degress in radians
-    image = contrib_image.rotate(image,
-                                 random.uniform(-0.26179938779, 0.26179938779))
+    # image = contrib_image.rotate(image,
+    #                             random.uniform(-0.26179938779, 0.26179938779))
     image = tf.reshape(image, [24, 24, 1])
   elif distort == 2:
     image = tf.reshape(image, [28, 28])

diff --git a/capsule_em/model.py b/capsule_em/model.py
@@ -19,7 +19,7 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
 from capsule_em import em_model
 from capsule_em import layers
 from capsule_em import simple_model
@@ -112,7 +112,7 @@ def multi_gpu_model(features):
     almosts = []
     result = {}
     with tf.variable_scope(tf.get_variable_scope()):
-      for i in xrange(FLAGS.num_gpus):
+      for i in range(FLAGS.num_gpus):
         with tf.device('/gpu:%d' % i):
           with tf.name_scope('tower_%d' % (i)) as scope:
             label_ = features[i]['labels']

diff --git a/capsule_em/norb/norb_record.py b/capsule_em/norb/norb_record.py
@@ -50,7 +50,6 @@ def _read_and_decode(filename_queue, image_pixel=96, distort=0):
   image = tf.reshape(image, tf.stack([depth, height, height]))
   image = tf.transpose(image, [1, 2, 0])
   image = tf.cast(image, tf.float32)
-  print(image.get_shape()[0].value)
   if image_pixel < 96:
     print('image resizing to {}'.format(image_pixel))
     image = tf.image.resize_images(image, [image_pixel, image_pixel])
@@ -123,12 +122,16 @@ def inputs(train_dir,
           capacity=2000 + 3 * batch_size,
           # Ensures a minimum amount of shuffling of examples.
           min_after_dequeue=2000)
+      cc_images = images
+      cc_labels = sparse_labels
     else:
       images, sparse_labels, orig_images = tf.train.batch(
           [image, label, orig_image],
           batch_size=batch_size,
           num_threads=1,
           capacity=1000 + 3 * batch_size)
+      cc_images = images
+      cc_labels = sparse_labels
       if patching:
         t_images = tf.tile(orig_images, [4, 1, 1, 1])
         c_images = tf.image.extract_glimpse(

diff --git a/capsule_em/run_local_gpu.sh b/capsule_em/run_local_gpu.sh
@@ -0,0 +1,22 @@
+# Copyright 2020 The Google Research Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+JOB_NAME="norbcapsule_`date +"%b%d_%H%M%S"`"
+TAG="norbcapsule_local_gpu"
+
+docker build -f DockerfileGPU -t $TAG $PWD
+docker  run -v \
+  $HOME/datasets/smallNORB:/root/datasets/smallNORB \
+  --runtime=nvidia $TAG \
+  --job_name $JOB_NAME
diff --git a/capsule_em/simple_model.py b/capsule_em/simple_model.py
@@ -19,9 +19,8 @@
 from __future__ import division
 from __future__ import print_function
 import numpy as np
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
 from capsule_em import utils
-from tensorflow.contrib import layers as contrib_layers
 FLAGS = tf.app.flags.FLAGS
 
 
@@ -84,8 +83,8 @@ def add_convs(features):
     if FLAGS.verbose:
       tf.summary.histogram('activation', conv1)
     if FLAGS.pooling:
-      pool1 = contrib_layers.max_pool2d(
-          conv1, kernel_size=2, stride=2, data_format='NCHW', padding='SAME')
+      pool1 = tf.nn.max_pool2d(
+          conv1, ksize=2, strides=2, data_format='NCHW', padding='SAME')
       convs = [pool1]
     else:
       convs = [conv1]
@@ -120,10 +119,10 @@ def add_convs(features):
       cur_conv = tf.nn.relu(pre_activation, name=scope.name)
       if FLAGS.pooling:
         convs += [
-            contrib_layers.max_pool2d(
+            tf.nn.max_pool2d(
                 cur_conv,
-                kernel_size=2,
-                stride=2,
+                ksize=2,
+                strides=2,
                 data_format='NCHW',
                 padding='SAME')
         ]
@@ -132,29 +131,3 @@ def add_convs(features):
       if FLAGS.verbose:
         tf.summary.histogram('activation', convs[-1])
   return convs[-1], conv_outputs[-1], position_grid
-
-
-def conv_inference(features):
-  """Inference for a CNN. Conv + FC."""
-  conv, _, _ = add_convs(features)
-  hidden1 = contrib_layers.flatten(conv)
-  if FLAGS.extra_fc > 0:
-    hidden = contrib_layers.fully_connected(
-        hidden1,
-        FLAGS.extra_fc,
-        activation_fn=tf.nn.relu,
-        weights_initializer=tf.truncated_normal_initializer(
-            stddev=0.1, dtype=tf.float32),
-        biases_initializer=tf.constant_initializer(0.1))
-    if FLAGS.dropout and FLAGS.train:
-      hidden = tf.nn.dropout(hidden, 0.5)
-  else:
-    hidden = hidden1
-  logits = contrib_layers.fully_connected(
-      hidden,
-      features['num_classes'],
-      activation_fn=None,
-      weights_initializer=tf.truncated_normal_initializer(
-          stddev=0.1, dtype=tf.float32),
-      biases_initializer=tf.constant_initializer(0.1))
-  return logits, None, None
diff --git a/capsule_em/utils.py b/capsule_em/utils.py
@@ -23,7 +23,7 @@
 from __future__ import division
 from __future__ import print_function
 
-import tensorflow as tf
+import tensorflow.compat.v1 as tf
 
 FLAGS = tf.app.flags.FLAGS
 tf.app.flags.DEFINE_bool('verbose', False, 'If true, adds summary.')
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,3 +23,5 @@ To get 1.3 (1.4 in the paper), enable patching: @@
     ```
     python -m capsule_em.experiment  --train=0 --eval_once=1 --eval_size=24300 --ckpnt=$HOME/model.ckpt-1 --final_beta=0.01 --norb_data_dir=$HOME/smallNORB/ --patching=True
     ```
+    A docker is now added to the repository. Please install docker with NVIDIA support. Then modify the run_local_gpu.sh with your smallNORB directory (-v option). Running ./run_local_gpu.sh will start training the model.