Skip to content

Commit

Permalink
Update packages and tests in the straight dope nightly (apache#12744)
Browse files Browse the repository at this point in the history
* [apache#12345] Enabling two tests in the Straight Dope Nightly.

Two straight dope notebook tests were disabled due to a timeout so they
were disabled. I've updated one of the notebooks (rnn-gluon) to use the
gpu instead of the cpu so it takes ~ 5 minutes on a p3.2xl, and verified
the other notebook takes a minute and was a false alarm (visual-qa). The
PR in the Straight Dope is:
zackchase/mxnet-the-straight-dope#540

* Add dependency for IPython update.

* Detect errors in notebook execution failure.

* Clean up of naming in retry code.
  • Loading branch information
vishaalkapoor authored and marcoabreu committed Oct 6, 2018
1 parent c2bb012 commit c993ef1
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 10 deletions.
4 changes: 2 additions & 2 deletions ci/docker/install/ubuntu_nightly_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ apt-get -y install time
apt-get install -y subversion maven -y #>/dev/null

# Packages needed for the Straight Dope Nightly tests.
pip2 install pandas scikit-image
pip3 install pandas scikit-image
pip2 install pandas scikit-image prompt_toolkit
pip3 install pandas scikit-image prompt_toolkit
9 changes: 7 additions & 2 deletions tests/nightly/straight_dope/test_notebooks_single_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,11 @@
'chapter02_supervised-learning/environment',
'chapter03_deep-neural-networks/kaggle-gluon-kfold',
'chapter04_convolutional-neural-networks/deep-cnns-alexnet', # > 10 mins.
'chapter05_recurrent-neural-networks/rnns-gluon', # > 10 mins.
'chapter06_optimization/gd-sgd-scratch', # Overflow warning is intended.
'chapter06_optimization/gd-sgd-gluon', # Overflow warning is intended.
'chapter07_distributed-learning/multiple-gpus-scratch',
'chapter07_distributed-learning/multiple-gpus-gluon',
'chapter07_distributed-learning/training-with-multiple-machines',
'chapter08_computer-vision/visual-question-answer', # > 10 mins.
'chapter11_recommender-systems/intro-recommender-systems', # Early draft, non-working.
'chapter12_time-series/intro-forecasting-gluon',
'chapter12_time-series/intro-forecasting-2-gluon',
Expand Down Expand Up @@ -178,6 +176,9 @@ def test_lstm_scratch(self):
def test_gru_scratch(self):
assert _test_notebook('chapter05_recurrent-neural-networks/gru-scratch')

def test_rnn_gluon(self):
assert _test_notebook('chapter05_recurrent-neural-networks/rnns-gluon')

# Chapter 6

def test_optimization_intro(self):
Expand Down Expand Up @@ -227,6 +228,10 @@ def test_object_detection(self):
def test_fine_tuning(self):
assert _test_notebook('chapter08_computer-vision/fine-tuning')

def test_visual_qa(self):
assert _test_notebook('chapter08_computer-vision/visual-question-answer')


# Chapter 9

def test_tree_lstm(self):
Expand Down
17 changes: 11 additions & 6 deletions tests/utils/notebook_test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

IPYTHON_VERSION = 4 # Pin to ipython version 4.
TIME_OUT = 10*60 # Maximum 10 mins/test. Reaching timeout causes test failure.
RETRIES = 8
ATTEMPTS = 8
KERNEL_ERROR_MSG = 'Kernel died before replying to kernel_info'


Expand Down Expand Up @@ -80,23 +80,28 @@ def run_notebook(notebook, notebook_dir, kernel=None, no_cache=False, temp_dir='
else:
eprocessor = ExecutePreprocessor(timeout=TIME_OUT)

success = False
# There is a low (< 1%) chance that starting a notebook executor will fail due to the kernel
# taking to long to start, or a port collision, etc.
for i in range(RETRIES):
for i in range(ATTEMPTS):
try:
nb, _ = eprocessor.preprocess(notebook, {'metadata': {'path': working_dir}})
success = True
except RuntimeError as rte:
# We check if the exception has to do with the Jupyter kernel failing to start. If
# not, we rethrow to prevent the notebook from erring RETRIES times. It is not ideal
# to inspect the exception message, but necessary for retry logic, as Jupyter client
# throws the generic RuntimeError that can be confused with other Runtime errors.
# not, we rethrow to prevent the notebook from erring ATTEMPTS times. It is not
# ideal to inspect the exception message, but necessary for retry logic, as Jupyter
# client throws the generic RuntimeError that can be confused with other Runtime
# errors.
if str(rte) != KERNEL_ERROR_MSG:
raise rte

logging.info("Error starting preprocessor: {}. Attempt {}/{}".format(str(rte), i+1, RETRIES))
logging.info("Error starting preprocessor: {}. Attempt {}/{}".format(str(rte), i+1, ATTEMPTS))
time.sleep(1)
continue
break
if not success:
errors.append("Error: Notebook failed to run after {} attempts.".format(ATTEMPTS))
except Exception as err:
err_msg = str(err)
errors.append(err_msg)
Expand Down

0 comments on commit c993ef1

Please sign in to comment.