Use output_dict

MetcalfeTom · MetcalfeTom · commit 6b89d13f9c5c · 2019-01-23T10:48:00.000+01:00
diff --git a/alt_requirements/requirements_spacy_sklearn.txt b/alt_requirements/requirements_spacy_sklearn.txt
@@ -2,6 +2,6 @@
 -r requirements_bare.txt
 
 spacy==2.0.18
-scikit-learn==0.19.1
+scikit-learn==0.20.2
 scipy==1.1.0
 sklearn-crfsuite==0.3.6
diff --git a/alt_requirements/requirements_tensorflow_sklearn.txt b/alt_requirements/requirements_tensorflow_sklearn.txt
@@ -1,7 +1,7 @@
 # Minimum Install Requirements
 -r requirements_bare.txt
 
-scikit-learn==0.19.1
+scikit-learn==0.20.2
 tensorflow==1.12.0
 scipy==1.1.0
 sklearn-crfsuite==0.3.6
diff --git a/rasa_nlu/classifiers/sklearn_intent_classifier.py b/rasa_nlu/classifiers/sklearn_intent_classifier.py
@@ -178,10 +178,10 @@ def process(self, message, **kwargs):
         else:
             X = message.get("text_features").reshape(1, -1)
             intent_ids, probabilities = self.predict(X)
-            intents = self.transform_labels_num2str(intent_ids)
+            intents = self.transform_labels_num2str(np.ravel(intent_ids))
             # `predict` returns a matrix as it is supposed
             # to work for multiple examples as well, hence we need to flatten
-            intents, probabilities = intents.flatten(), probabilities.flatten()
+            probabilities = probabilities.flatten()
 
             if intents.size > 0 and probabilities.size > 0:
                 ranking = list(zip(list(intents),
diff --git a/rasa_nlu/evaluate.py b/rasa_nlu/evaluate.py
@@ -171,14 +171,15 @@ def log_evaluation_table(report,  # type: Text
     logger.info("Classification report: \n{}".format(report))
 
 
-def get_evaluation_metrics(targets, predictions):  # pragma: no cover
+def get_evaluation_metrics(targets, predictions, output_dict=False):  # pragma: no cover
     """Compute the f1, precision, accuracy and summary report from sklearn."""
     from sklearn import metrics
 
     targets = clean_intent_labels(targets)
     predictions = clean_intent_labels(predictions)
 
-    report = metrics.classification_report(targets, predictions)
+    report = metrics.classification_report(targets, predictions,
+                                           output_dict=output_dict)
     precision = metrics.precision_score(targets, predictions,
                                         average='weighted')
     f1 = metrics.f1_score(targets, predictions, average='weighted')
@@ -187,43 +188,6 @@ def get_evaluation_metrics(targets, predictions):  # pragma: no cover
     return report, precision, f1, accuracy
 
 
-def report_to_dict(report, f1, precision, accuracy):
-    """Convert sklearn metrics report into dict"""
-
-    report_dict = {
-        'f1': f1,
-        'precision': precision,
-        'accuracy': accuracy,
-        'intents': []
-    }
-
-    lines = list(filter(None, report.split('\n')))
-    labels = lines[0].split()
-
-    report_dict['intents'] = report_row_to_dict(labels, lines[1:-1])
-
-    return report_dict
-
-
-def report_row_to_dict(labels, lines):
-    """Convert sklearn metrics report row to dict"""
-    import re
-
-    array = []
-    for line in lines:
-        row_data = re.split('\s{2,}', line.strip())
-        name = row_data[0]
-        values = row_data[1:]
-        r = {
-            'name': name
-        }
-        for i in range(len(values)):
-            r[labels[i]] = values[i]
-        array.append(r)
-
-    return array
-
-
 def remove_empty_intent_examples(intent_results):
     """Remove those examples without an intent."""
 
@@ -343,16 +307,21 @@ def evaluate_intents(intent_results,
                 "of {} examples".format(len(intent_results), num_examples))
 
     targets, predictions = _targets_predictions_from(intent_results)
-    report, precision, f1, accuracy = get_evaluation_metrics(targets,
-                                                             predictions)
-
-    log_evaluation_table(report, precision, f1, accuracy)
 
     if report_filename:
-        save_json(report_to_dict(report, f1, precision, accuracy), report_filename)
+        report, precision, f1, accuracy = get_evaluation_metrics(targets,
+                                                                 predictions,
+                                                                 output_dict=True)
+
+        save_json(report, report_filename)
         logger.info("Classification report saved to {}."
                     .format(report_filename))
 
+    else:
+        report, precision, f1, accuracy = get_evaluation_metrics(targets,
+                                                                 predictions)
+        log_evaluation_table(report, precision, f1, accuracy)
+
     if successes_filename:
         # save classified samples to file for debugging
         collect_nlu_successes(intent_results, successes_filename)