Update interleave hyperparameters

PiperOrigin-RevId: 265780130
hz7715 · Aug 27, 2019 · 85956b1 · 85956b1
1 parent 0fa5ff2
commit 85956b1
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/official/bert/input_pipeline.py b/official/bert/input_pipeline.py
@@ -94,8 +94,12 @@ def create_pretrain_dataset(file_paths,
   dataset = dataset.shuffle(len(file_paths))
 
   # In parallel, create tf record dataset for each train files.
+  # cycle_length = 8 means that up to 8 files will be read and deserialized in
+  # parallel. You may want to increase this number if you have a large number of
+  # CPU cores.
   dataset = dataset.interleave(
-      tf.data.TFRecordDataset, cycle_length=tf.data.experimental.AUTOTUNE)
+      tf.data.TFRecordDataset, cycle_length=8,
+      num_parallel_calls=tf.data.experimental.AUTOTUNE)
 
   decode_fn = lambda record: decode_record(record, name_to_features)
   dataset = dataset.map(