Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Prefill with Prefix Cache] Improve the efficiency of prefilling with prefix cache by allowing a larger batch size #3402

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
Merge branch 'master' into chunk_batch
  • Loading branch information
MeloYang05 committed Mar 26, 2024
commit c15e0396ca21e5c1b2ba2b2286602c46b78ffa67
33 changes: 33 additions & 0 deletions tests/core/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,39 @@ def test_scheduler_max_seqs():
assert set(out.scheduled_seq_groups) == set([all_seq_groups[1]])


def test_scheduler_delay_factor():

block_size = 4
scheduler_config = SchedulerConfig(100, 64, 16, delay_factor=0.5)
cache_config = CacheConfig(block_size, 1.0, 1, "auto")
cache_config.num_cpu_blocks = 8
cache_config.num_gpu_blocks = 8
scheduler = Scheduler(scheduler_config, cache_config, None)

# schedule first prompt
_, seq_group = create_dummy_prompt("0", prompt_length=block_size)
scheduler.add_seq_group(seq_group)
seq_group_meta, out = scheduler.schedule()
assert out.prompt_run
assert seq_group_meta[0].request_id == '0'

# wait for a second before scheduling next prompt
time.sleep(1)
_, seq_group = create_dummy_prompt("1", prompt_length=block_size)
scheduler.add_seq_group(seq_group)

# second prompt should *not* be scheduled
seq_group_meta, out = scheduler.schedule()
assert not out.prompt_run
assert seq_group_meta[0].request_id == '0'

# wait for more than 0.5 second and try again
time.sleep(0.6)
seq_group_meta, out = scheduler.schedule()
assert out.prompt_run
assert seq_group_meta[0].request_id == '1'


def test_scheduler_with_cache():
# Initialize the scheduler
max_batched_tokens = 96
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.