forked from semgrep/semgrep
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculate_ci_perf.py
149 lines (130 loc) · 4.78 KB
/
calculate_ci_perf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import contextlib
import json
import os
import shutil
import subprocess
import time
from pathlib import Path
from typing import Generator
from typing import List
from typing import Optional
from typing import Union
@contextlib.contextmanager
def chdir(dirname: Path = None) -> Generator: # type: ignore
curdir = os.getcwd()
try:
if dirname is not None:
os.chdir(dirname)
yield
finally:
os.chdir(curdir)
def clone_github_repo(
repo_url: str, sha: Optional[str] = None, retries: int = 3
) -> Path:
"""
Wraps `_github_repo` function with retries. If the `_github_repo` throws an exception,
it will delete `repo_destination` and retry up to `retries` times.
"""
sha_str = sha or "latest"
repo_dir = "-".join(repo_url.split("/")[-2:]) + "-" + sha_str
repo_destination = Path(repo_dir)
try:
return _github_repo(repo_url, sha, repo_destination)
except (GitError, subprocess.CalledProcessError) as ex:
print(f"Failed to clone github repo for tests {ex}")
if repo_destination.exists():
shutil.rmtree(repo_destination)
if retries == 0:
raise
else:
return clone_github_repo(repo_url, sha, retries - 1)
class GitError(BaseException):
pass
def _github_repo(repo_url: str, sha: Optional[str], repo_destination: Path) -> Path:
"""
Internal fixture function. Use the `clone_github_repo` fixture.
Clones the github repo at repo_url into `repo_destination` and checks out `sha`.
If `repo_destination` already exists, it will validate that the correct repo is present at that location.
"""
if not repo_destination.exists():
if sha is None:
subprocess.check_output(
["git", "clone", "--depth=1", repo_url, repo_destination],
stderr=subprocess.STDOUT,
)
else:
repo_destination.mkdir()
# Sadly, no fast way to clone a specific commit without a super
# modern git client
subprocess.check_output(
["git", "clone", repo_url, repo_destination], stderr=subprocess.STDOUT
)
with chdir(repo_destination):
subprocess.check_output(
["git", "checkout", sha], stderr=subprocess.STDOUT
)
# validate that the repo seems setup properly
with chdir(repo_destination):
# some tests modify it, lets put everything back to normal
subprocess.check_output(["git", "clean", "-fd"], stderr=subprocess.STDOUT)
subprocess.check_output(["git", "reset", "--hard"], stderr=subprocess.STDOUT)
all_clean = (
subprocess.check_output(
["git", "status", "--porcelain"], stderr=subprocess.DEVNULL
).strip()
== b""
)
if not all_clean:
raise GitError("Couldn't clean the repo, something is wrong. Deleting.")
repo_sha = subprocess.check_output(
["git", "rev-parse", "HEAD"], stderr=subprocess.STDOUT
)
if sha:
if not repo_sha.startswith(sha.encode("utf-8")):
shutil.rmtree(repo_destination)
raise GitError(
f"Github repo is broken (not set to correct sha: {repo_sha.decode('utf-8')}"
)
return repo_destination
def test_perf() -> None:
"""
Simple test that njsscan finishes below a given threshold of time on juice-shop and dvna
this will alert us of significant performance regressions
"""
rules_path = clone_github_repo(
repo_url="https://github.com/ajinabraham/njsscan",
sha="d1c5df41393ba512cbd362874a7a0bdc7dbf43fc",
)
njsscan_rules_path = str(rules_path / "njsscan/rules/semantic_grep")
targets = [
( # Dvna takes about ~30 sec
"https://github.com/appsecco/dvna",
"c637437d6515bd4c732e91c58e62d38e88260d3c",
["jquery-3.2.1.min.js", "showdown.min.js"],
40,
),
( # Juice Shop takes about ~150 sec on 2019MBP, ~270 sec on GHA
"https://github.com/bkimminich/juice-shop",
"98633f5ef242bf943608324a562058b22eca6dfe",
["three.js"],
300,
),
]
for repo_url, sha, excludes, _expected_duration in targets:
target_path = clone_github_repo(repo_url=repo_url, sha=sha)
args = [
"python3",
"-m",
"semgrep",
"--config",
njsscan_rules_path,
str(target_path),
]
for ex in excludes:
args.extend(["--exclude", ex])
start = time.time()
subprocess.check_output(args)
duration = time.time() - start
print(duration)
# assert duration < expected_duration
test_perf()