Bug 1642492 [wpt PR 23902] - [wptrunner/chromium] Produce full logs a…

…nd screenshots, a=testonly Automatic update from web-platform-tests [wptrunner/chromium] Produce full logs and screenshots 1. artifacts["log"] now contains the full logs, including the harness status and passing subtests. 2. Base64-encoded reftest screenshots, if present, are now stored in artifacts["screenshots"]. R=lpz Change-Id: I6da1ba9e87f0dfbf1b1579d25f173d057c4e79e8 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2225741 Commit-Queue: Robert Ma <[email protected]> Reviewed-by: Luke Z <[email protected]> Cr-Commit-Position: refs/heads/master@{#776739} -- wpt-commits: 090f052c18cb3a0cb919f921749801f30f9f62a8 wpt-pr: 23902
iaxat · Jun 12, 2020 · 790c6e1 · 790c6e1
1 parent 1acb0a5
commit 790c6e1
Show file tree

Hide file tree

Showing 2 changed files with 109 additions and 45 deletions.
diff --git a/testing/web-platform/tests/tools/wptrunner/wptrunner/formatters/chromium.py b/testing/web-platform/tests/tools/wptrunner/wptrunner/formatters/chromium.py
@@ -1,5 +1,6 @@
 import json
 import time
+import six
 
 from collections import defaultdict
 from mozlog.formatters import base
@@ -8,6 +9,12 @@
 class ChromiumFormatter(base.BaseFormatter):
     """Formatter to produce results matching the Chromium JSON Test Results format.
     https://chromium.googlesource.com/chromium/src/+/master/docs/testing/json_test_results_format.md
+
+    Notably, each test has an "artifacts" field that is a dict consisting of
+        "log": a list of strings (one per subtest + one for harness status, see
+            _append_test_message for the format)
+        "screenshots": a list of strings in the format of "url: base64"
+
     """
 
     def __init__(self):
@@ -24,9 +31,9 @@ def __init__(self):
         # the trie and the leaf contains the dict of per-test data.
         self.tests = {}
 
-        # Message dictionary, keyed by test name. Value is a concatenation of
-        # the subtest messages for this test.
-        self.messages = defaultdict(str)
+        # Message dictionary, keyed by test name. Value is a list of strings:
+        # see _append_test_message for the format.
+        self.messages = defaultdict(list)
 
         # List of tests that have failing subtests.
         self.tests_with_subtest_fails = set()
@@ -40,20 +47,11 @@ def _append_test_message(self, test, subtest, status, expected, message):
         :param str expected: the expected subtest statuses
         :param str message: the string to append to the message for this test
 
-        Here's an example of a message:
-        [TIMEOUT expected FAIL] Test Name foo: assert_equals: expected 1 but got 2
+        Example:
+        [subtest foo] [FAIL expected PASS] message
         """
-        if not message:
-            return
-        # Add the prefix, with the test status and subtest name (if available)
-        prefix = "[%s" % status
-        if expected and status not in expected:
-            prefix += " expected %s] " % expected
-        else:
-            prefix += "] "
-        if subtest:
-            prefix += "%s: " % subtest
-        self.messages[test] += prefix + message + "\n"
+        self.messages[test].append("[%s] [%s expected %s] %s" %
+                                   (subtest, status, expected, message))
 
     def _append_artifact(self, cur_dict, artifact_name, artifact_value):
         """
@@ -62,21 +60,22 @@ def _append_artifact(self, cur_dict, artifact_name, artifact_value):
         :param str artifact_name: the name of the artifact
         :param str artifact_value: the value of the artifact
         """
+        assert isinstance(artifact_value, six.string_types), "artifact_value must be a str"
         if "artifacts" not in cur_dict.keys():
-            cur_dict["artifacts"] = {}
-        # Artifacts are all expected to be lists, so even though we only have a
-        # single |artifact_value| we still put it in a list.
-        cur_dict["artifacts"][artifact_name] = [artifact_value]
+            cur_dict["artifacts"] = defaultdict(list)
+        cur_dict["artifacts"][artifact_name].append(artifact_value)
 
-    def _store_test_result(self, name, actual, expected, message, wpt_actual, subtest_failure):
+    def _store_test_result(self, name, actual, expected, messages, wpt_actual,
+                           subtest_failure, reftest_screenshots):
         """
         Stores the result of a single test in |self.tests|
         :param str name: name of the test.
         :param str actual: actual status of the test.
         :param str expected: expected statuses of the test.
-        :param str message: test output, such as status, subtest, errors etc.
+        :param list messages: a list of test messages.
         :param str wpt_actual: actual status reported by wpt, may differ from |actual|.
-        :param bool subtest_failure: whether this test failed because of subtests
+        :param bool subtest_failure: whether this test failed because of subtests.
+        :param Optional[list] reftest_screenshots: see executors/base.py for definition.
         """
         # The test name can contain a leading / which will produce an empty
         # string in the first position of the list returned by split. We use
@@ -91,9 +90,17 @@ def _store_test_result(self, name, actual, expected, message, wpt_actual, subtes
             self._append_artifact(cur_dict, "wpt_subtest_failure", "true")
         if wpt_actual != actual:
             self._append_artifact(cur_dict, "wpt_actual_status", wpt_actual)
-        if message != "":
+        for message in messages:
             self._append_artifact(cur_dict, "log", message)
 
+        # Store screenshots (if any).
+        for item in reftest_screenshots or []:
+            if not isinstance(item, dict):
+                # Skip the relation string.
+                continue
+            data = "%s: %s" % (item["url"], item["screenshot"])
+            self._append_artifact(cur_dict, "screenshots", data)
+
         # Figure out if there was a regression or unexpected status. This only
         # happens for tests that were run
         if actual != "SKIP":
@@ -172,13 +179,14 @@ def test_status(self, data):
         is_unexpected = actual_status not in expected_statuses
         if is_unexpected and test_name not in self.tests_with_subtest_fails:
             self.tests_with_subtest_fails.add(test_name)
-        if "message" in data:
-            self._append_test_message(test_name, data["subtest"], actual_status, expected_statuses, data["message"])
+        self._append_test_message(test_name, data.get("subtest", ""),
+                                  actual_status, expected_statuses,
+                                  data.get("message", ""))
 
     def test_end(self, data):
         test_name = data["test"]
-        # Save the status reported by WPT since we might change it when reporting
-        # to Chromium.
+        # Save the status reported by WPT since we might change it when
+        # reporting to Chromium.
         wpt_actual_status = data["status"]
         actual_status = self._map_status_name(wpt_actual_status)
         expected_statuses = self._get_expected_status_from_data(actual_status, data)
@@ -193,12 +201,16 @@ def test_end(self, data):
             if actual_status == "PASS":
                 actual_status = "FAIL"
 
-        if "message" in data:
-            self._append_test_message(test_name, None, actual_status,
-                                      expected_statuses, data["message"])
-        self._store_test_result(test_name, actual_status, expected_statuses,
-                                self.messages[test_name], wpt_actual_status,
-                                subtest_failure)
+        self._append_test_message(test_name, "",
+                                  actual_status, expected_statuses,
+                                  data.get("message", ""))
+        self._store_test_result(test_name,
+                                actual_status,
+                                expected_statuses,
+                                self.messages[test_name],
+                                wpt_actual_status,
+                                subtest_failure,
+                                data.get("extra", {}).get("reftest_screenshots"))
 
         # Remove the test from messages dict to avoid accumulating too many.
         self.messages.pop(test_name)

diff --git a/testing/web-platform/tests/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py b/testing/web-platform/tests/tools/wptrunner/wptrunner/formatters/tests/test_chromium.py
@@ -139,7 +139,7 @@ def test_subtest_messages(capfd):
                        message="t1_b_message")
     logger.test_end("t1", status="PASS", expected="PASS")
     logger.test_start("t2")
-    # Currently, subtests with empty messages will be ignored
+    # Subtests with empty messages should not be ignored.
     logger.test_status("t2", status="PASS", subtest="t2_a")
     # A test-level message will also be appended
     logger.test_end("t2", status="TIMEOUT", expected="PASS",
@@ -157,11 +157,17 @@ def test_subtest_messages(capfd):
     output_json = json.load(output)
 
     t1_artifacts = output_json["tests"]["t1"]["artifacts"]
-    assert t1_artifacts["log"] == ["[FAIL expected PASS] t1_a: t1_a_message\n"
-                                   "[PASS] t1_b: t1_b_message\n"]
+    assert t1_artifacts["log"] == [
+        "[t1_a] [FAIL expected PASS] t1_a_message",
+        "[t1_b] [PASS expected PASS] t1_b_message",
+        "[] [FAIL expected PASS] ",
+    ]
     assert t1_artifacts["wpt_subtest_failure"] == ["true"]
     t2_artifacts = output_json["tests"]["t2"]["artifacts"]
-    assert t2_artifacts["log"] == ["[TIMEOUT expected PASS] t2_message\n"]
+    assert t2_artifacts["log"] == [
+        "[t2_a] [PASS expected PASS] ",
+        "[] [TIMEOUT expected PASS] t2_message",
+    ]
     assert "wpt_subtest_failure" not in t2_artifacts.keys()
 
 
@@ -204,9 +210,12 @@ def test_subtest_failure(capfd):
 
     test_obj = output_json["tests"]["t1"]
     t1_artifacts = test_obj["artifacts"]
-    assert t1_artifacts["log"] == ["[FAIL expected PASS] t1_a: t1_a_message\n"
-                                   "[PASS] t1_b: t1_b_message\n"
-                                   "[TIMEOUT expected PASS] t1_c: t1_c_message\n"]
+    assert t1_artifacts["log"] == [
+        "[t1_a] [FAIL expected PASS] t1_a_message",
+        "[t1_b] [PASS expected PASS] t1_b_message",
+        "[t1_c] [TIMEOUT expected PASS] t1_c_message",
+        "[] [FAIL expected PASS] ",
+    ]
     assert t1_artifacts["wpt_subtest_failure"] == ["true"]
     # The status of the test in the output is a failure because subtests failed,
     # despite the harness reporting that the test passed. But the harness status
@@ -258,9 +267,12 @@ def test_expected_subtest_failure(capfd):
 
     test_obj = output_json["tests"]["t1"]
     t1_log = test_obj["artifacts"]["log"]
-    assert t1_log == ["[FAIL] t1_a: t1_a_message\n"
-                      "[PASS] t1_b: t1_b_message\n"
-                      "[TIMEOUT] t1_c: t1_c_message\n"]
+    assert t1_log == [
+        "[t1_a] [FAIL expected FAIL] t1_a_message",
+        "[t1_b] [PASS expected PASS] t1_b_message",
+        "[t1_c] [TIMEOUT expected TIMEOUT] t1_c_message",
+        "[] [PASS expected PASS] ",
+    ]
     # The status of the test in the output is a pass because the subtest
     # failures were all expected.
     assert test_obj["actual"] == "PASS"
@@ -303,7 +315,10 @@ def test_unexpected_subtest_pass(capfd):
 
     test_obj = output_json["tests"]["t1"]
     t1_artifacts = test_obj["artifacts"]
-    assert t1_artifacts["log"] == ["[PASS expected FAIL] t1_a: t1_a_message\n"]
+    assert t1_artifacts["log"] == [
+        "[t1_a] [PASS expected FAIL] t1_a_message",
+        "[] [FAIL expected PASS] ",
+    ]
     assert t1_artifacts["wpt_subtest_failure"] == ["true"]
     # Since the subtest status is unexpected, we fail the test. But we report
     # wpt_actual_status as an artifact
@@ -518,3 +533,40 @@ def test_known_intermittent_empty(capfd):
     # anywhere.
     assert test_obj["actual"] == "PASS"
     assert test_obj["expected"] == "PASS"
+
+
+def test_reftest_screenshots(capfd):
+    # reftest_screenshots, if present, should be plumbed into artifacts.
+
+    # Set up the handler.
+    output = StringIO()
+    logger = structuredlog.StructuredLogger("test_a")
+    logger.add_handler(handlers.StreamHandler(output, ChromiumFormatter()))
+
+    # Run a reftest with reftest_screenshots.
+    logger.suite_start(["t1"], run_info={}, time=123)
+    logger.test_start("t1")
+    logger.test_end("t1", status="FAIL", expected="PASS", extra={
+        "reftest_screenshots": [
+            {"url": "foo.html", "hash": "HASH1", "screenshot": "DATA1"},
+            "!=",
+            {"url": "foo-ref.html", "hash": "HASH2", "screenshot": "DATA2"},
+        ]
+    })
+    logger.suite_end()
+
+    # check nothing got output to stdout/stderr
+    # (note that mozlog outputs exceptions during handling to stderr!)
+    captured = capfd.readouterr()
+    assert captured.out == ""
+    assert captured.err == ""
+
+    # check the actual output of the formatter
+    output.seek(0)
+    output_json = json.load(output)
+
+    test_obj = output_json["tests"]["t1"]
+    assert test_obj["artifacts"]["screenshots"] == [
+        "foo.html: DATA1",
+        "foo-ref.html: DATA2",
+    ]