Skip to content

Set up data for ui - WIP #87

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
10 changes: 4 additions & 6 deletions src/guidellm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ class Environment(str, Enum):

ENV_REPORT_MAPPING = {
Environment.PROD: "https://guidellm.neuralmagic.com/local-report/index.html",
Environment.STAGING: "https://staging.guidellm.neuralmagic.com/local-report/index.html",
Environment.DEV: "https://dev.guidellm.neuralmagic.com/local-report/index.html",
Environment.LOCAL: "tests/dummy/report.html",
Environment.STAGING: "https://review.neuralmagic.com/guidellm-ui/staging/index.html",
Environment.DEV: "https://review.neuralmagic.com/guidellm-ui/dev/index.html",
Environment.LOCAL: "http://localhost:3000/index.html",
}


Expand Down Expand Up @@ -112,8 +112,6 @@ class ReportGenerationSettings(BaseModel):
"""

source: str = ""
report_html_match: str = "window.report_data = {};"
report_html_placeholder: str = "{}"


class Settings(BaseSettings):
Expand All @@ -138,7 +136,7 @@ class Settings(BaseSettings):
)

# general settings
env: Environment = Environment.PROD
env: Environment = Environment.DEV
request_timeout: int = 60 * 5 # 5 minutes
request_http2: bool = True
max_concurrency: int = 512
Expand Down
16 changes: 16 additions & 0 deletions src/guidellm/core/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,22 @@ def output_token_throughput(self) -> float:

return output_tokens / self.duration if self.duration else 0.0

@property
def output_token_throughput_distribution(self) -> Distribution:
"""
Get the distribution for output token throughput.

:return: The distribution of output token throughput.
:rtype: Distribution
"""
throughputs = []
for r in self.results:
duration = (r.end_time or 0) - (r.start_time or 0)
if duration > 0:
throughputs.append(r.output_token_count / duration)

return Distribution(data=throughputs)

Comment on lines +404 to +418
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The UI relies on the output throughput distribution, and I didn't find any methods/properties that were in the token/(unit of time) shape the UI expects so I added this.

@property
def prompt_token_distribution(self) -> Distribution:
"""
Expand Down
6 changes: 5 additions & 1 deletion src/guidellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any, Literal, Mapping, Optional, Union, get_args

import click
from guidellm.utils.injector import create_report
from loguru import logger
from transformers import AutoTokenizer # type: ignore[import-untyped]

Expand All @@ -15,6 +16,7 @@
)
from guidellm.request.base import RequestGenerator
from guidellm.utils import BenchmarkReportProgress, cli_params
from guidellm.utils.generate_ui_data import generate_ui_api_data

__all__ = ["generate_benchmark_report"]

Expand Down Expand Up @@ -184,7 +186,6 @@ def generate_benchmark_report_cli(
cont_refresh_table=enable_continuous_refresh,
)


def generate_benchmark_report(
target: str,
data: Optional[str],
Expand Down Expand Up @@ -290,6 +291,9 @@ def generate_benchmark_report(
)
report = asyncio.run(_run_executor_for_result(executor))

js_data = generate_ui_api_data(report)
create_report(js_data, 'guidellm_report')

# Save and print report
guidance_report = GuidanceReport()
guidance_report.benchmarks.append(report)
Expand Down
3 changes: 3 additions & 0 deletions src/guidellm/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .generate_ui_data import generate_ui_api_data
from .injector import create_report, inject_data
from .progress import BenchmarkReportProgress
from .text import (
Expand All @@ -24,6 +25,7 @@
"clean_text",
"create_report",
"filter_text",
"generate_ui_api_data",
"inject_data",
"is_path",
"is_path_like",
Expand All @@ -37,4 +39,5 @@
"resolve_transformers_dataset_split",
"split_lines_by_punctuation",
"split_text",
"stretch_list",
]
188 changes: 188 additions & 0 deletions src/guidellm/utils/generate_ui_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import os
import json
import random
import math
from typing import Any, Dict, List
from guidellm.core.distribution import Distribution
from guidellm.core import TextGenerationBenchmarkReport, TextGenerationBenchmark

def generate_metric_report(dist: Distribution, metric_label: str, n_buckets: int = 18):
total = dist.__len__()
mean = dist.mean
median = dist.median
minv = dist.min
maxv = dist.max
std_dev = dist.std_deviation

pvals = dist.percentiles([50, 90, 95, 99])

percentile_list = [
{"percentile": "p50", "value": pvals[0]},
{"percentile": "p90", "value": pvals[1]},
{"percentile": "p95", "value": pvals[2]},
{"percentile": "p99", "value": pvals[3]},
]

if dist.range == 0:
buckets = [{"value": minv, "count": total}]
bucket_width = 0
else:
bucket_width = dist.range / n_buckets
bucket_counts = [0] * n_buckets

for val in dist.data:

idx = int((val - minv) // bucket_width)
if idx == n_buckets:
idx = n_buckets - 1
bucket_counts[idx] += 1

buckets = []
for i, count in enumerate(bucket_counts):
bucket_start = minv + i * bucket_width
buckets.append({
"value": bucket_start,
"count": count
})
Comment on lines +30 to +46
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure the proper way to generate these buckets or if there is code somewhere else in guidellm that could manage this and I missed it.

But this code assumes we have a set number of buckets we want to generate and then determines the bucket width based off of that. It is a hard coded approach, and some data analysis first might result in a better number of buckets or bucket size. But generally I figured the UI would look good with there being a set number of buckets so our histograms conveniently look the same and take up a comfortable amount of space.


return {
metric_label: {
"statistics": {
"total": total,
"mean": mean,
"median": median,
"min": minv,
"max": maxv,
"std": std_dev,
},
"percentiles": percentile_list,
"buckets": buckets,
"bucketWidth": bucket_width,
}
}

def generate_run_info(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
timestamp = max(bm.start_time for bm in benchmarks if bm.start_time is not None)
return {
"model": {
"name": report.args.get('model', 'N/A'),
"size": 0
},
"task": "N/A",
"dataset": {
"name": "N/A"
},
"timestamp": timestamp
}

def generate_request_over_time_data(benchmarks: List[TextGenerationBenchmark]) -> List[Dict[str, Any]]:
filtered_benchmarks = filter(lambda bm: bm.start_time is not None, benchmarks)
sorted_benchmarks = list(sorted(filtered_benchmarks, key=lambda bm: bm.start_time))
min_start_time = sorted_benchmarks[0].start_time

all_request_times = [
result.start_time - min_start_time
for benchmark in sorted_benchmarks
for result in benchmark.results
if result.start_time is not None
]

request_distribution = Distribution(data=all_request_times)
final_result = generate_metric_report(request_distribution, "requestsOverTime")
return { "numBenchmarks": len(sorted_benchmarks), **final_result }


def generate_workload_details(report: TextGenerationBenchmarkReport, benchmarks: List[TextGenerationBenchmark]) -> Dict[str, Any]:
all_prompt_token_data = [data for benchmark in benchmarks for data in benchmark.prompt_token_distribution.data]
all_prompt_token_distribution = Distribution(data=all_prompt_token_data)
all_output_token_data = [data for benchmark in benchmarks for data in benchmark.output_token_distribution.data]
all_output_token_distribution = Distribution(data=all_output_token_data)

prompt_token_data = generate_metric_report(all_prompt_token_distribution, "tokenDistributions")
output_token_data = generate_metric_report(all_output_token_distribution, "tokenDistributions")

prompt_token_samples = [result.request.prompt for benchmark in benchmarks for result in benchmark.results]
output_token_samples = [result.output for benchmark in benchmarks for result in benchmark.results]

num_samples = min(5, len(prompt_token_samples), len(output_token_samples))
sample_indices = random.sample(range(len(prompt_token_samples)), num_samples)

sample_prompts = [prompt_token_samples[i] for i in sample_indices]
"""
Need a wholistic approach to parsing out characters in the prompt that don't covert well into the format we need
"""
sample_prompts = list(map(lambda prompt: prompt.replace("\n", " ").replace("\"", "'"), sample_prompts))

sample_outputs = [output_token_samples[i] for i in sample_indices]
sample_outputs = list(map(lambda output: output.replace("\n", " ").replace("\"", "'"), sample_outputs))

request_over_time_results = generate_request_over_time_data(benchmarks)

return {
"prompts": {
"samples": sample_prompts,
**prompt_token_data
},
"generations": {
"samples": sample_outputs,
**output_token_data
},
"requestsOverTime": request_over_time_results,
"rateType": report.args["mode"],
"server": {
"target": report.args.get('target', 'N/A')
}
}

def generate_benchmark_json(bm: TextGenerationBenchmark) -> Dict[str, Any]:
ttft_dist_ms = Distribution(data=bm.ttft_distribution.data)
ttft_data = generate_metric_report(ttft_dist_ms, 'ttft')
itl_dist_ms = Distribution(data=bm.itl_distribution.data)
itl_data = generate_metric_report(itl_dist_ms, 'tpot')
throughput_dist_ms = Distribution(data=bm.output_token_throughput_distribution.data)
throughput_data = generate_metric_report(throughput_dist_ms, 'throughput')
latency_dist_ms = Distribution(data=[val * 1000 for val in bm.request_latency_distribution.data])
latency__data = generate_metric_report(latency_dist_ms, 'timePerRequest')
return {
"requestsPerSecond": bm.completed_request_rate,
**itl_data,
**ttft_data,
**throughput_data,
**latency__data,
}

def generate_benchmarks_json(benchmarks: List[TextGenerationBenchmark]):
benchmark_json = []
for benchmark in benchmarks:
benchmarks_report = generate_benchmark_json(benchmark)
benchmark_json.append(benchmarks_report)

return { "benchmarks": benchmark_json }

def generate_js_variable(variable_name: str, data: dict) -> str:
json_data = json.dumps(data, indent=2)
return f'window.{variable_name} = {json_data};'

def generate_ui_api_data(report: TextGenerationBenchmarkReport):
filtered_benchmarks = list(filter(lambda bm: (bm.completed_request_rate > 0) and bm.mode != 'throughput', report.benchmarks))
run_info_data = generate_run_info(report, filtered_benchmarks)
workload_details_data = generate_workload_details(report, filtered_benchmarks)
benchmarks_data = generate_benchmarks_json(filtered_benchmarks)
run_info_script = generate_js_variable("run_info", run_info_data)
workload_details_script = generate_js_variable("workload_details", workload_details_data)
benchmarks_script = generate_js_variable("benchmarks", benchmarks_data)

os.makedirs("ben_test", exist_ok=True)
# generate json files based off of api specs, https://codepen.io/dalthecow/pen/bNGVQbq, for consumption by UI
with open("ben_test/run_info.js", "w") as f:
f.write(run_info_script)
with open("ben_test/workload_details.js", "w") as f:
f.write(workload_details_script)
with open("ben_test/benchmarks.js", "w") as f:
f.write(benchmarks_script)

return {
"window.run_info = {};": run_info_script,
"window.workload_details = {};": workload_details_script,
"window.benchmarks = {};": benchmarks_script,
}
41 changes: 14 additions & 27 deletions src/guidellm/utils/injector.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
from pathlib import Path
from typing import Union

from pydantic import BaseModel

from guidellm.config import settings
from guidellm.utils.text import load_text

__all__ = ["create_report", "inject_data"]


def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:
def create_report(js_data: dict, output_path: Union[str, Path]) -> Path:
"""
Creates a report from the model and saves it to the output path.
Creates a report from the dictionary and saves it to the output path.

:param model: the model to serialize and inject
:type model: BaseModel
:param js_data: dict with match str and json data to inject
:type js_data: dict
:param output_path: the path, either a file or a directory,
to save the report to. If a directory, the report will be saved
as "report.html" inside of the directory.
Expand All @@ -27,10 +25,8 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:

html_content = load_text(settings.report_generation.source)
report_content = inject_data(
model,
js_data,
html_content,
settings.report_generation.report_html_match,
settings.report_generation.report_html_placeholder,
)

if not output_path.suffix:
Expand All @@ -39,32 +35,23 @@ def create_report(model: BaseModel, output_path: Union[str, Path]) -> Path:

output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(report_content)

print(f'Report saved to {output_path}')
return output_path


def inject_data(
model: BaseModel,
js_data: dict,
html: str,
match: str,
placeholder: str,
) -> str:
"""
Injects the data from the model into the HTML while replacing the placeholder.
Injects the json data into the HTML while replacing the placeholder.

:param model: the model to serialize and inject
:type model: BaseModel
:param js_data: the json data to inject
:type js_data: dict
:param html: the html to inject the data into
:type html: str
:param match: the string to match in the html to find the placeholder
:type match: str
:param placeholder: the placeholder to replace with the model data
inside of the placeholder
:type placeholder: str
:return: the html with the model data injected
:return: the html with the json data injected
:rtype: str
"""
model_str = model.json()
inject_str = match.replace(placeholder, model_str)

return html.replace(match, inject_str)
for placeholder, script in js_data.items():
html = html.replace(placeholder, script)
return html
Loading