Skip to content

Commit

Permalink
Loosen bounds on individual tests (semgrep#3025)
Browse files Browse the repository at this point in the history
* Loosen bounds on individual tests

* Don't compute json-time in earlier version

* Fix json-time syntax
  • Loading branch information
emjin authored Apr 30, 2021
1 parent fff6dd7 commit a34a96e
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 12 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,9 @@ jobs:
python3 -m semgrep --version
export PATH=/github/home/.local/bin:$PATH
python3 perf/run-benchmarks --small-only --std-only --save-to semgrep/baseline_timing1.txt
python3 perf/run-benchmarks --small-only --std-only --save-to semgrep/baseline_timing1.txt --no-time
cat semgrep/baseline_timing1.txt
python3 perf/run-benchmarks --small-only --std-only --save-to semgrep/baseline_timing2.txt
python3 perf/run-benchmarks --small-only --std-only --save-to semgrep/baseline_timing2.txt --no-time
cat semgrep/baseline_timing2.txt
pip3 uninstall -y semgrep
- name: Download artifacts
Expand Down
24 changes: 16 additions & 8 deletions perf/run-benchmarks
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def upload_result(
metric_url = f"{DASHBOARD_URL}/api/metric/{metric_name}"
upload(metric_url, str(value))

if variant_name == STD:
if variant_name == STD and timings is not None:
# Compute bps from semgrep timing data
assert "rules" in timings
assert "total_time" in timings
Expand Down Expand Up @@ -288,21 +288,21 @@ def upload_result(
print(r.content)


def standardize_findings(findings: dict) -> Tuple[dict, dict]:
def standardize_findings(findings: dict, include_time: bool) -> Tuple[dict, dict]:
if "errors" not in findings:
msg = json.dumps(findings, indent=4) + "\n\nDid not find expected key 'errors'"
raise Exception(msg)
if "results" not in findings:
msg = json.dumps(findings, indent=4) + "\n\nDid not find expected key 'results'"
raise Exception(msg)
if "time" not in findings:
if include_time and "time" not in findings:
msg = json.dumps(findings, indent=4) + "\n\nDid not find expected key 'time'"
raise Exception(msg)
results = {
"errors": findings["errors"],
"results": {SemgrepResult(i) for i in findings["results"]},
}
timings = findings["time"]
timings = findings["time"] if include_time else None
return results, timings


Expand All @@ -325,13 +325,12 @@ def output_differences(


def run_semgrep(
docker: str, corpus: Corpus, variant: SemgrepVariant
docker: str, corpus: Corpus, variant: SemgrepVariant, include_time: bool
) -> Tuple[float, bytes]:
args = []
common_args = [
"--strict",
"--json",
"--json-time",
"--timeout",
"0",
"--verbose",
Expand Down Expand Up @@ -365,6 +364,8 @@ def run_semgrep(
args.extend(common_args)
if variant.semgrep_extra != "":
args.extend([variant.semgrep_extra])
if include_time:
args.extend(["--json-time"])

print(f"current directory: {os.getcwd()}")
print("semgrep command: {}".format(" ".join(args)))
Expand Down Expand Up @@ -402,6 +403,7 @@ def run_benchmarks(
filter_variant: str,
plot_benchmarks: bool,
upload: bool,
include_time: bool,
summary_file_path: str,
called_dir: str,
) -> None:
Expand Down Expand Up @@ -438,7 +440,9 @@ def run_benchmarks(
name = ".".join(["semgrep", "bench", corpus.name, variant.name])
metric_name = ".".join([name, "duration"])
print(f"------ {name} ------")
duration, findings_bytes = run_semgrep(docker, corpus, variant)
duration, findings_bytes = run_semgrep(
docker, corpus, variant, include_time
)

# Report results
msg = f"{metric_name} = {duration:.3f} s"
Expand All @@ -447,7 +451,9 @@ def run_benchmarks(
durations += f"{duration:.3f}\n"
results[variant.name].append(duration)

findings, timings = standardize_findings(json.loads(findings_bytes))
findings, timings = standardize_findings(
json.loads(findings_bytes), include_time
)

if upload:
upload_result(variant.name, metric_name, duration, timings)
Expand Down Expand Up @@ -548,6 +554,7 @@ def main() -> None:
parser.add_argument(
"--semgrep-core", help="run semgrep-core benchmarks", action="store_true"
)
parser.add_argument("--no-time", help="disable time-checking", action="store_true")
args = parser.parse_args()

cur_dir = os.path.dirname(os.path.abspath(__file__))
Expand All @@ -567,6 +574,7 @@ def main() -> None:
args.filter_variant,
args.plot_benchmarks,
args.upload,
not args.no_time,
args.save_to,
called_dir,
)
Expand Down
11 changes: 9 additions & 2 deletions scripts/compare_perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,19 @@ def main() -> None:
)
latest_times = [min(t1, t2) for t1, t2 in latest_times]

total_baseline = 0.0
total_latest = 0.0
for baseline_time, latest_time in zip(baseline_times, latest_times):
print(f"Baseline: {baseline_time}, Latest: {latest_time}")

# Assert latest time is not more than 6% slower than baseline
# Assert latest time is not more than 20% slower than baseline
# or is within a fixed "probably environmental" range
assert latest_time < baseline_time * 1.06 or latest_time - baseline_time < 0.5
assert latest_time < baseline_time * 1.2 or latest_time - baseline_time < 1.0
total_baseline += baseline_time
total_latest += latest_time

# Assert the rules in aggregate are not more than 6% slower than baseline
assert total_latest < total_baseline * 1.06


if __name__ == "__main__":
Expand Down

0 comments on commit a34a96e

Please sign in to comment.