From 272fdd5fcd58c2b917b36fdaa1495dfcd1b237e2 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Thu, 17 Apr 2025 10:18:39 -0400 Subject: [PATCH 1/2] Increase the max rate determined by sweep calibration --- src/guidellm/benchmark/profile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/guidellm/benchmark/profile.py b/src/guidellm/benchmark/profile.py index 53232320..d4958d2e 100644 --- a/src/guidellm/benchmark/profile.py +++ b/src/guidellm/benchmark/profile.py @@ -298,6 +298,8 @@ def next_strategy(self) -> Optional[SchedulingStrategy]: min_rate = self.measured_rates[0] max_rate = self.measured_rates[1] + # Increase max rate by one step to hopefully find a better effective rate + max_rate += (max_rate - min_rate) / self.sweep_size rates = np.linspace(min_rate, max_rate, self.sweep_size - 1)[1:] if self.rate_type == "constant": From 0bc17a2cc8b4993b075b7f9134c38e0f79f4fb86 Mon Sep 17 00:00:00 2001 From: Samuel Monson Date: Thu, 17 Apr 2025 10:33:42 -0400 Subject: [PATCH 2/2] Round rates to next 0.25 --- src/guidellm/benchmark/profile.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/guidellm/benchmark/profile.py b/src/guidellm/benchmark/profile.py index d4958d2e..93a51c9f 100644 --- a/src/guidellm/benchmark/profile.py +++ b/src/guidellm/benchmark/profile.py @@ -302,6 +302,15 @@ def next_strategy(self) -> Optional[SchedulingStrategy]: max_rate += (max_rate - min_rate) / self.sweep_size rates = np.linspace(min_rate, max_rate, self.sweep_size - 1)[1:] + # Round to the next 0.25 + rates = (np.ceil(rates * 8) + 1 ) // 2 / 4 + # Remove duplicates caused by rounding + rates = np.unique(rates) + + # End early if we don't have enough rates + if self.completed_strategies - 1 > len(rates): + return None + if self.rate_type == "constant": return AsyncConstantStrategy( rate=rates[self.completed_strategies - 2],