Merge pull request RasaHQ#4951 from RasaHQ/3549_no_plot

degiz · web-flow · commit f950f7c734e2 · 2019-12-12T23:54:31.000+01:00
Add a flag to disable plotting in rasa test
diff --git a/changelog/3549.improvement.rst b/changelog/3549.improvement.rst
@@ -0,0 +1 @@
+Added ``--no-plot`` option for ``rasa test`` command, which disables rendering of confusion matrix and histogram. By default plots will be rendered.
diff --git a/rasa/cli/arguments/test.py b/rasa/cli/arguments/test.py
@@ -15,6 +15,7 @@
 
 def set_test_arguments(parser: argparse.ArgumentParser):
     add_model_param(parser, add_positional_arg=False)
+    add_no_plot_param(parser)
 
     core_arguments = parser.add_argument_group("Core Test Arguments")
     add_test_core_argument_group(core_arguments)
@@ -79,6 +80,7 @@ def add_test_core_argument_group(
         "All models in the provided directory are evaluated "
         "and compared against each other.",
     )
+    add_no_plot_param(parser)
 
 
 def add_test_nlu_argument_group(
@@ -162,6 +164,8 @@ def add_test_nlu_argument_group(
         help="Percentages of training data to exclude during comparison.",
     )
 
+    add_no_plot_param(parser)
+
 
 def add_test_core_model_param(parser: argparse.ArgumentParser):
     default_path = get_latest_model(DEFAULT_MODELS_PATH)
@@ -175,3 +179,16 @@ def add_test_core_model_param(parser: argparse.ArgumentParser):
         "will be used (exception: '--evaluate-model-directory' flag is set). If multiple "
         "'tar.gz' files are provided, all those models will be compared.",
     )
+
+
+def add_no_plot_param(
+    parser: argparse.ArgumentParser, default: bool = False, required: bool = False,
+) -> None:
+    parser.add_argument(
+        "--no-plot",
+        dest="disable_plotting",
+        action="store_true",
+        default=default,
+        help=f"Don't render evaluation plots",
+        required=required,
+    )
diff --git a/rasa/core/test.py b/rasa/core/test.py
@@ -488,6 +488,7 @@ async def test(
     out_directory: Optional[Text] = None,
     fail_on_prediction_errors: bool = False,
     e2e: bool = False,
+    disable_plotting: bool = False,
 ):
     """Run the evaluation of the stories, optionally plot the results."""
     from rasa.nlu.test import get_evaluation_metrics
@@ -518,6 +519,7 @@ async def test(
             accuracy,
             story_evaluation.in_training_data_fraction,
             out_directory,
+            disable_plotting,
         )
 
     log_failed_stories(story_evaluation.failed_stories, out_directory)
@@ -566,6 +568,7 @@ def plot_story_evaluation(
     accuracy,
     in_training_data_fraction,
     out_directory,
+    disable_plotting,
 ):
     """Plot the results of story evaluation"""
     from sklearn.metrics import confusion_matrix
@@ -584,6 +587,9 @@ def plot_story_evaluation(
         include_report=True,
     )
 
+    if disable_plotting:
+        return
+
     cnf_matrix = confusion_matrix(test_y, predictions)
 
     plot_confusion_matrix(
diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py
@@ -441,6 +441,7 @@ def evaluate_intents(
     errors: bool,
     confmat_filename: Optional[Text],
     intent_hist_filename: Optional[Text],
+    disable_plotting: bool,
 ) -> Dict:  # pragma: no cover
     """Creates a confusion matrix and summary statistics for intent predictions.
 
@@ -504,26 +505,13 @@ def evaluate_intents(
         # log and save misclassified samples to file for debugging
         collect_nlu_errors(intent_results, errors_filename)
 
-    if confmat_filename:
-        import matplotlib.pyplot as plt
-
-        if output_directory:
-            confmat_filename = os.path.join(output_directory, confmat_filename)
-            intent_hist_filename = os.path.join(output_directory, intent_hist_filename)
-
-        plot_confusion_matrix(
-            cnf_matrix,
-            classes=labels,
-            title="Intent Confusion matrix",
-            out=confmat_filename,
-        )
-        plt.show(block=False)
-
-        plot_attribute_confidences(
-            intent_results, intent_hist_filename, "intent_target", "intent_prediction"
-        )
-
-        plt.show(block=False)
+    if not disable_plotting:
+        if confmat_filename:
+            _plot_confusion_matrix(
+                output_directory, confmat_filename, cnf_matrix, labels
+            )
+        if intent_hist_filename:
+            _plot_histogram(output_directory, intent_hist_filename, intent_results)
 
     predictions = [
         {
@@ -544,6 +532,35 @@ def evaluate_intents(
     }
 
 
+def _plot_confusion_matrix(
+    output_directory: Optional[Text],
+    confmat_filename: Optional[Text],
+    cnf_matrix: np.array,
+    labels: Collection[Text],
+) -> None:
+    if output_directory:
+        confmat_filename = os.path.join(output_directory, confmat_filename)
+
+    plot_confusion_matrix(
+        cnf_matrix,
+        classes=labels,
+        title="Intent Confusion matrix",
+        out=confmat_filename,
+    )
+
+
+def _plot_histogram(
+    output_directory: Optional[Text],
+    intent_hist_filename: Optional[Text],
+    intent_results: List[IntentEvaluationResult],
+) -> None:
+    if output_directory:
+        intent_hist_filename = os.path.join(output_directory, intent_hist_filename)
+        plot_attribute_confidences(
+            intent_results, intent_hist_filename, "intent_target", "intent_prediction"
+        )
+
+
 def merge_labels(
     aligned_predictions: List[Dict], extractor: Optional[Text] = None
 ) -> np.array:
@@ -1037,6 +1054,7 @@ def run_evaluation(
     confmat: Optional[Text] = None,
     histogram: Optional[Text] = None,
     component_builder: Optional[ComponentBuilder] = None,
+    disable_plotting: bool = False,
 ) -> Dict:  # pragma: no cover
     """
     Evaluate intent classification, response selection and entity extraction.
@@ -1049,6 +1067,7 @@ def run_evaluation(
     :param confmat: path to file that will show the confusion matrix
     :param histogram: path fo file that will show a histogram
     :param component_builder: component builder
+    :param disable_plotting: if true confusion matrix and histogram will not be rendered
 
     :return: dictionary containing evaluation results
     """
@@ -1075,7 +1094,13 @@ def run_evaluation(
     if intent_results:
         logger.info("Intent evaluation results:")
         result["intent_evaluation"] = evaluate_intents(
-            intent_results, output_directory, successes, errors, confmat, histogram
+            intent_results,
+            output_directory,
+            successes,
+            errors,
+            confmat,
+            histogram,
+            disable_plotting,
         )
 
     if response_selection_results:
@@ -1168,6 +1193,7 @@ def cross_validate(
     errors: bool = False,
     confmat: Optional[Text] = None,
     histogram: Optional[Text] = None,
+    disable_plotting: bool = False,
 ) -> Tuple[CVEvaluationResult, CVEvaluationResult]:
     """Stratified cross validation on data.
 
@@ -1230,7 +1256,13 @@ def cross_validate(
     if intent_classifier_present:
         logger.info("Accumulated test folds intent evaluation results:")
         evaluate_intents(
-            intent_test_results, output, successes, errors, confmat, histogram
+            intent_test_results,
+            output,
+            successes,
+            errors,
+            confmat,
+            histogram,
+            disable_plotting,
         )
 
     if extractors:
diff --git a/tests/cli/test_rasa_test.py b/tests/cli/test_rasa_test.py
@@ -12,6 +12,12 @@ def test_test_core(run_in_default_project: Callable[..., RunResult]):
     assert os.path.exists("results")
 
 
+def test_test_core_no_plot(run_in_default_project: Callable[..., RunResult]):
+    run_in_default_project("test", "core", "--no-plot")
+
+    assert not os.path.exists("results/story_confmat.pdf")
+
+
 def test_test(run_in_default_project: Callable[..., RunResult]):
     run_in_default_project("test")
 
@@ -20,6 +26,14 @@ def test_test(run_in_default_project: Callable[..., RunResult]):
     assert os.path.exists("results/confmat.png")
 
 
+def test_test_no_plot(run_in_default_project: Callable[..., RunResult]):
+    run_in_default_project("test", "--no-plot")
+
+    assert not os.path.exists("results/hist.png")
+    assert not os.path.exists("results/confmat.png")
+    assert not os.path.exists("results/story_confmat.pdf")
+
+
 def test_test_nlu(run_in_default_project: Callable[..., RunResult]):
     run_in_default_project("test", "nlu", "--nlu", "data", "--successes")
 
@@ -28,6 +42,13 @@ def test_test_nlu(run_in_default_project: Callable[..., RunResult]):
     assert os.path.exists("results/intent_successes.json")
 
 
+def test_test_nlu_no_plot(run_in_default_project: Callable[..., RunResult]):
+    run_in_default_project("test", "nlu", "--no-plot")
+
+    assert not os.path.exists("results/confmat.png")
+    assert not os.path.exists("results/hist.png")
+
+
 def test_test_nlu_cross_validation(run_in_default_project: Callable[..., RunResult]):
     run_in_default_project(
         "test", "nlu", "--cross-validation", "-c", "config.yml", "-f", "2"
@@ -134,7 +155,7 @@ def test_test_help(run: Callable[..., RunResult]):
                  [--successes] [--no-errors] [--histogram HISTOGRAM]
                  [--confmat CONFMAT] [-c CONFIG [CONFIG ...]]
                  [--cross-validation] [-f FOLDS] [-r RUNS]
-                 [-p PERCENTAGES [PERCENTAGES ...]]
+                 [-p PERCENTAGES [PERCENTAGES ...]] [--no-plot]
                  {core,nlu} ..."""
 
     lines = help_text.split("\n")
@@ -150,7 +171,7 @@ def test_test_nlu_help(run: Callable[..., RunResult]):
                      [--successes] [--no-errors] [--histogram HISTOGRAM]
                      [--confmat CONFMAT] [-c CONFIG [CONFIG ...]]
                      [--cross-validation] [-f FOLDS] [-r RUNS]
-                     [-p PERCENTAGES [PERCENTAGES ...]]"""
+                     [-p PERCENTAGES [PERCENTAGES ...]] [--no-plot]"""
 
     lines = help_text.split("\n")
 
@@ -165,7 +186,7 @@ def test_test_core_help(run: Callable[..., RunResult]):
                       [-s STORIES] [--max-stories MAX_STORIES] [--out OUT]
                       [--e2e] [--endpoints ENDPOINTS]
                       [--fail-on-prediction-errors] [--url URL]
-                      [--evaluate-model-directory]"""
+                      [--evaluate-model-directory] [--no-plot]"""
 
     lines = help_text.split("\n")
 
diff --git a/tests/nlu/base/test_evaluation.py b/tests/nlu/base/test_evaluation.py
@@ -305,6 +305,7 @@ def test_intent_evaluation_report(tmpdir_factory):
         errors=False,
         confmat_filename=None,
         intent_hist_filename=None,
+        disable_plotting=False,
     )
 
     report = json.loads(rasa.utils.io.read_file(report_filename))
@@ -357,6 +358,7 @@ def incorrect(label: Text, _label: Text) -> IntentEvaluationResult:
         errors=False,
         confmat_filename=None,
         intent_hist_filename=None,
+        disable_plotting=False,
     )
 
     report = json.loads(rasa.utils.io.read_file(str(report_filename)))

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Added ``--no-plot`` option for ``rasa test`` command, which disables rendering of confusion matrix and histogram. By default plots will be rendered.
Original file line number	Diff line number	Diff line change
`@@ -305,6 +305,7 @@ def test_intent_evaluation_report(tmpdir_factory):`
`305`	`305`	`errors=False,`
`306`	`306`	`confmat_filename=None,`
`307`	`307`	`intent_hist_filename=None,`
	`308`	`+ disable_plotting=False,`
`308`	`309`	`)`
`309`	`310`
`310`	`311`	`report = json.loads(rasa.utils.io.read_file(report_filename))`
`@@ -357,6 +358,7 @@ def incorrect(label: Text, _label: Text) -> IntentEvaluationResult:`
`357`	`358`	`errors=False,`
`358`	`359`	`confmat_filename=None,`
`359`	`360`	`intent_hist_filename=None,`
	`361`	`+ disable_plotting=False,`
`360`	`362`	`)`
`361`	`363`
`362`	`364`	`report = json.loads(rasa.utils.io.read_file(str(report_filename)))`