Skip to content

Commit

Permalink
Merge branch 'fix_model_dump_without_infer_graph' into 'master'
Browse files Browse the repository at this point in the history
fix model dump

See merge request data/monolith!2128

GitOrigin-RevId: 2448868d25c75c3063556ea933b09ee8016dd9f1
  • Loading branch information
李博 authored and monolith committed Sep 14, 2023
1 parent c9ffb37 commit 26cba68
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
2 changes: 2 additions & 0 deletions monolith/native_training/cpu_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -1190,6 +1190,8 @@ def create_saver():
# should_do_first_save = self.config.partial_recovery and ckpt_state is None
# Here we just make it false because there are issues with uninitialized iterator.
should_do_first_save = False
if self.config.enable_model_dump:
save_utils.NoFirstSaveCheckpointSaverHook._in_model_dump_mode = True
saver_hook = save_utils.NoFirstSaveCheckpointSaverHook(
model_dir,
save_secs=save_checkpoints_secs,
Expand Down
5 changes: 3 additions & 2 deletions monolith/native_training/save_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ class NoFirstSaveCheckpointSaverHook(tf.estimator.CheckpointSaverHook):
"""A saver hook which won't perform the first save (which happened on after_create_session)."""

_has_dense_only: bool = False
_in_model_dump_mode: bool = False
_last_triggered_step: int = 0

def __init__(self,
Expand Down Expand Up @@ -416,11 +417,11 @@ def _create_or_update_monolith_ckpt_state(self, do_update=False):
logging.info("monolith ckpt state saved")

def end(self, session):
last_step = session.run(self._global_step_tensor)
if self._is_dense_only:
pass
elif self._has_dense_only:
elif self._has_dense_only or self._in_model_dump_mode:
# force save
last_step = session.run(self._global_step_tensor)
self._timer.update_last_triggered_step(last_step)
super()._save(session, last_step)
for l in self._listeners:
Expand Down

0 comments on commit 26cba68

Please sign in to comment.