diff --git a/vllm-benchmarks/benchmarks/serving-tests.json b/vllm-benchmarks/benchmarks/serving-tests.json index 9456bb8..02d2ed6 100644 --- a/vllm-benchmarks/benchmarks/serving-tests.json +++ b/vllm-benchmarks/benchmarks/serving-tests.json @@ -117,5 +117,125 @@ "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json", "num_prompts": 200 } + }, + { + "test_name": "serving_qwen3_30B-A3B_tp8_in200_out200", + "qps_list": ["inf"], + "server_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 200, + "random_output_len": 200 + } + }, + { + "test_name": "serving_qwen3_30B-A3B_tp8_in1k_out2k", + "qps_list": ["inf"], + "server_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_qwen3_30B-A3B_tp8_in5k_out1k", + "qps_list": ["inf"], + "server_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "Qwen/Qwen3-30B-A3B", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 5120, + "random_output_len": 1024 + } + }, + { + "test_name": "serving_gemma_3_27b_it_tp8_in200_out200", + "qps_list": ["inf"], + "server_parameters": { + "model": "google/gemma-3-27b-it", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "google/gemma-3-27b-it", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 200, + "random_output_len": 200 + } + }, + { + "test_name": "serving_gemma_3_27b_it_tp8_in1k_out2k", + "qps_list": ["inf"], + "server_parameters": { + "model": "google/gemma-3-27b-it", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "google/gemma-3-27b-it", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 1024, + "random_output_len": 2048 + } + }, + { + "test_name": "serving_gemma_3_27b_it_tp8_in5k_out1k", + "qps_list": ["inf"], + "server_parameters": { + "model": "google/gemma-3-27b-it", + "tensor_parallel_size": 8, + "swap_space": 16, + "disable_log_stats": "", + "disable_log_requests": "", + "load_format": "dummy", + "max_model_len": 8192 + }, + "client_parameters": { + "model": "google/gemma-3-27b-it", + "backend": "vllm", + "dataset_name": "random", + "random_input_len": 5120, + "random_output_len": 1024 + } } ]