-
Notifications
You must be signed in to change notification settings - Fork 798
[UR][Benchmarks] Add flamegraphs to benchmark results #19678
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: sycl
Are you sure you want to change the base?
Conversation
Signed-off-by: Mateusz P. Nowak <[email protected]>
Signed-off-by: Mateusz P. Nowak <[email protected]>
run_unitrace=False, | ||
extra_unitrace_opt=None, | ||
run_flamegraph=False, | ||
extra_perf_opt=None, # VERIFY |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You already added a tracing type enum. I'd extend this to be some sort of generic "TraceTool", and here, in run_bench, I suggest simply accepting a generic trace tool (I imagine you wouldn't want to enable two at the same time).
@@ -24,24 +25,67 @@ def _write_output_to_file( | |||
|
|||
if options.output_html == "local": | |||
data_path = os.path.join(html_path, f"{filename}.js") | |||
|
|||
# Check if the file exists and has flamegraph data that we need to preserve | |||
existing_flamegraph_data = None |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do we need to do this? we store all the other results separately from the html output.
options.workdir, | ||
"flamegraph-repo", | ||
"https://github.com/brendangregg/FlameGraph.git", | ||
"master", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
don't clone master, use a fixed commit.
"master", | ||
) | ||
|
||
# FlameGraph doesn't need building, just verify scripts exist and are executable |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't check this anywhere else. I'm not sure if this would ever find an issue?
) | ||
|
||
def _prune_flamegraph_dirs(self, res_dir: str, FILECNT: int = 10): | ||
"""Keep only the last FILECNT files in the flamegraphs directory.""" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this seems similar to what you have for unitrace. can we share code?
"record", | ||
"-g", # Enable call-graph recording | ||
"-F", | ||
"99", # Sample frequency |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this seems low. we should experiment with different values and pick what gives us the best flamecharts.
def handle_output(self, bench_name: str, perf_data_file: str): | ||
""" | ||
Generate SVG flamegraph from perf data file. | ||
Returns the path to the generated SVG file. | ||
""" | ||
if not os.path.exists(perf_data_file) or os.path.getsize(perf_data_file) == 0: | ||
raise FileNotFoundError( | ||
f"Perf data file not found or empty: {perf_data_file}" | ||
) | ||
|
||
# Generate output SVG filename following same pattern as perf data | ||
svg_file = perf_data_file.replace(".perf.data", ".svg") | ||
folded_file = perf_data_file.replace(".perf.data", ".folded") | ||
|
||
try: | ||
# Step 1: Convert perf script to folded format | ||
log.debug(f"Converting perf data to folded format: {folded_file}") | ||
with open(folded_file, "w") as f_folded: | ||
# Run perf script to get the stack traces | ||
perf_script_proc = subprocess.Popen( | ||
["perf", "script", "-i", perf_data_file], | ||
stdout=subprocess.PIPE, | ||
stderr=subprocess.DEVNULL, | ||
text=True, | ||
) | ||
|
||
# Pipe through stackcollapse-perf.pl | ||
stackcollapse_perf_path = os.path.join( | ||
self.repo_dir, "stackcollapse-perf.pl" | ||
) | ||
stackcollapse_proc = subprocess.Popen( | ||
[stackcollapse_perf_path], | ||
stdin=perf_script_proc.stdout, | ||
stdout=f_folded, | ||
stderr=subprocess.DEVNULL, | ||
text=True, | ||
) | ||
|
||
perf_script_proc.stdout.close() | ||
stackcollapse_proc.wait() | ||
perf_script_proc.wait() | ||
|
||
# Step 2: Generate flamegraph SVG | ||
log.debug(f"Generating flamegraph SVG: {svg_file}") | ||
flamegraph_pl_path = os.path.join(self.repo_dir, "flamegraph.pl") | ||
with open(folded_file, "r") as f_folded, open(svg_file, "w") as f_svg: | ||
flamegraph_proc = subprocess.Popen( | ||
[ | ||
flamegraph_pl_path, | ||
"--title", | ||
f"{options.save_name} - {bench_name}", | ||
"--width", | ||
str( | ||
self.FLAMEGRAPH_WIDTH | ||
), # Fit within container without scrollbars | ||
], | ||
stdin=f_folded, | ||
stdout=f_svg, | ||
stderr=subprocess.DEVNULL, | ||
text=True, | ||
) | ||
flamegraph_proc.wait() | ||
|
||
# Clean up intermediate files | ||
if os.path.exists(folded_file): | ||
os.remove(folded_file) | ||
|
||
if not os.path.exists(svg_file) or os.path.getsize(svg_file) == 0: | ||
raise RuntimeError(f"Failed to generate flamegraph SVG: {svg_file}") | ||
|
||
log.debug(f"Generated flamegraph: {svg_file}") | ||
|
||
# Create symlink immediately after SVG generation | ||
self._create_immediate_symlink(svg_file) | ||
|
||
# Prune old flamegraph directories | ||
self._prune_flamegraph_dirs(os.path.dirname(perf_data_file)) | ||
|
||
return svg_file | ||
|
||
except Exception as e: | ||
# Clean up on failure | ||
for temp_file in [folded_file, svg_file]: | ||
if os.path.exists(temp_file): | ||
os.remove(temp_file) | ||
raise RuntimeError(f"Failed to generate flamegraph for {bench_name}: {e}") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use run helpers... I suggest expanding run
to support redirecting stdout to a file (and then not printing it to the console).
except Exception as e: | ||
log.debug(f"Failed to create immediate symlink for {svg_file}: {e}") | ||
|
||
def _update_flamegraph_manifest( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I genuinely don't understand the idea here. data.js is purely an output file. we should not parse it.
Adds presentation of perf results as flamegraphs