Loosen bounds on individual tests (semgrep#3025)

* Loosen bounds on individual tests * Don't compute json-time in earlier version * Fix json-time syntax
xiaolushuo · Apr 30, 2021 · a34a96e · a34a96e
1 parent fff6dd7
commit a34a96e
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 12 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -178,9 +178,9 @@ jobs:
           python3 -m semgrep --version
           export PATH=/github/home/.local/bin:$PATH
 
-          python3 perf/run-benchmarks --small-only --std-only --save-to semgrep/baseline_timing1.txt
+          python3 perf/run-benchmarks --small-only --std-only --save-to semgrep/baseline_timing1.txt --no-time
           cat semgrep/baseline_timing1.txt
-          python3 perf/run-benchmarks --small-only --std-only --save-to semgrep/baseline_timing2.txt
+          python3 perf/run-benchmarks --small-only --std-only --save-to semgrep/baseline_timing2.txt --no-time
           cat semgrep/baseline_timing2.txt
           pip3 uninstall -y semgrep
       - name: Download artifacts

diff --git a/perf/run-benchmarks b/perf/run-benchmarks
@@ -253,7 +253,7 @@ def upload_result(
     metric_url = f"{DASHBOARD_URL}/api/metric/{metric_name}"
     upload(metric_url, str(value))
 
-    if variant_name == STD:
+    if variant_name == STD and timings is not None:
         # Compute bps from semgrep timing data
         assert "rules" in timings
         assert "total_time" in timings
@@ -288,21 +288,21 @@ def upload_result(
         print(r.content)
 
 
-def standardize_findings(findings: dict) -> Tuple[dict, dict]:
+def standardize_findings(findings: dict, include_time: bool) -> Tuple[dict, dict]:
     if "errors" not in findings:
         msg = json.dumps(findings, indent=4) + "\n\nDid not find expected key 'errors'"
         raise Exception(msg)
     if "results" not in findings:
         msg = json.dumps(findings, indent=4) + "\n\nDid not find expected key 'results'"
         raise Exception(msg)
-    if "time" not in findings:
+    if include_time and "time" not in findings:
         msg = json.dumps(findings, indent=4) + "\n\nDid not find expected key 'time'"
         raise Exception(msg)
     results = {
         "errors": findings["errors"],
         "results": {SemgrepResult(i) for i in findings["results"]},
     }
-    timings = findings["time"]
+    timings = findings["time"] if include_time else None
     return results, timings
 
 
@@ -325,13 +325,12 @@ def output_differences(
 
 
 def run_semgrep(
-    docker: str, corpus: Corpus, variant: SemgrepVariant
+    docker: str, corpus: Corpus, variant: SemgrepVariant, include_time: bool
 ) -> Tuple[float, bytes]:
     args = []
     common_args = [
         "--strict",
         "--json",
-        "--json-time",
         "--timeout",
         "0",
         "--verbose",
@@ -365,6 +364,8 @@ def run_semgrep(
     args.extend(common_args)
     if variant.semgrep_extra != "":
         args.extend([variant.semgrep_extra])
+    if include_time:
+        args.extend(["--json-time"])
 
     print(f"current directory: {os.getcwd()}")
     print("semgrep command: {}".format(" ".join(args)))
@@ -402,6 +403,7 @@ def run_benchmarks(
     filter_variant: str,
     plot_benchmarks: bool,
     upload: bool,
+    include_time: bool,
     summary_file_path: str,
     called_dir: str,
 ) -> None:
@@ -438,7 +440,9 @@ def run_benchmarks(
                 name = ".".join(["semgrep", "bench", corpus.name, variant.name])
                 metric_name = ".".join([name, "duration"])
                 print(f"------ {name} ------")
-                duration, findings_bytes = run_semgrep(docker, corpus, variant)
+                duration, findings_bytes = run_semgrep(
+                    docker, corpus, variant, include_time
+                )
 
                 # Report results
                 msg = f"{metric_name} = {duration:.3f} s"
@@ -447,7 +451,9 @@ def run_benchmarks(
                 durations += f"{duration:.3f}\n"
                 results[variant.name].append(duration)
 
-                findings, timings = standardize_findings(json.loads(findings_bytes))
+                findings, timings = standardize_findings(
+                    json.loads(findings_bytes), include_time
+                )
 
                 if upload:
                     upload_result(variant.name, metric_name, duration, timings)
@@ -548,6 +554,7 @@ def main() -> None:
     parser.add_argument(
         "--semgrep-core", help="run semgrep-core benchmarks", action="store_true"
     )
+    parser.add_argument("--no-time", help="disable time-checking", action="store_true")
     args = parser.parse_args()
 
     cur_dir = os.path.dirname(os.path.abspath(__file__))
@@ -567,6 +574,7 @@ def main() -> None:
                 args.filter_variant,
                 args.plot_benchmarks,
                 args.upload,
+                not args.no_time,
                 args.save_to,
                 called_dir,
             )

diff --git a/scripts/compare_perf.py b/scripts/compare_perf.py
@@ -26,12 +26,19 @@ def main() -> None:
     )
     latest_times = [min(t1, t2) for t1, t2 in latest_times]
 
+    total_baseline = 0.0
+    total_latest = 0.0
     for baseline_time, latest_time in zip(baseline_times, latest_times):
         print(f"Baseline: {baseline_time}, Latest: {latest_time}")
 
-        # Assert latest time is not more than 6% slower than baseline
+        # Assert latest time is not more than 20% slower than baseline
         # or is within a fixed "probably environmental" range
-        assert latest_time < baseline_time * 1.06 or latest_time - baseline_time < 0.5
+        assert latest_time < baseline_time * 1.2 or latest_time - baseline_time < 1.0
+        total_baseline += baseline_time
+        total_latest += latest_time
+
+    # Assert the rules in aggregate are not more than 6% slower than baseline
+    assert total_latest < total_baseline * 1.06
 
 
 if __name__ == "__main__":