Closed
Description
Bug description
When integrating the locally deployed model of our own, there was an error when accessing it in a streaming manner, but there was no problem when accessing it in a non-streaming manner.
org.springframework.web.reactive.function.client.WebClientResponseException$UnprocessableEntity: 422 Unprocessable Entity from POST http://152.136.158.152:9002/v1/chat/completions
at org.springframework.web.reactive.function.client.WebClientResponseException.create(WebClientResponseException.java:331) ~[spring-webflux-6.2.7.jar:6.2.7]
Suppressed: reactor.core.publisher.FluxOnAssembly$OnAssemblyException:
Error has been observed at the following site(s):
*__checkpoint ⇢ 422 UNPROCESSABLE_ENTITY from POST http://152.136.158.152:9002/v1/chat/completions [DefaultWebClient]
Swift deployed Qwen 2.5 - 0.5B
CUDA_VISIBLE_DEVICES=0 swift deploy \
--model Qwen/Qwen2.5-0.5B-Instruct \
--infer_backend vllm \
--max_new_tokens 2048 \
--served_model_name qwen
Environment
springboot:3.4.5
springai:1.0.0
code
pom
<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-openai</artifactId>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-dependencies</artifactId>
<version>3.4.5</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-bom</artifactId>
<version>1.0.0</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<repositories>
<repository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>https://repo.spring.io/snapshot</url>
<releases>
<enabled>false</enabled>
</releases>
</repository>
<repository>
<name>Central Portal Snapshots</name>
<id>central-portal-snapshots</id>
<url>https://central.sonatype.com/repository/maven-snapshots/</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<compilerArgs>
<arg>-parameters</arg>
</compilerArgs>
</configuration>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
application.yaml
spring:
ai:
openai:
api-key: 111
# base-url: https://api.deepseek.com
base-url: http://152.136.158.152:9002
chat:
options:
model: qwen
# model: deepseek-chat
http:
client:
factory: simple
MainTest.java
@SpringBootTest
class MainTest {
@Autowired
ChatClient.Builder builder;
// Error
@Test
public void testA() {
Flux<String> content1 = builder.build().prompt(Prompt.builder().messages(List.of(new UserMessage("你是谁"))).build()).stream().content();
System.out.println(content1.blockLast());
}
// success
@Test
public void testB() {
String content = builder.build().prompt(Prompt.builder().messages(List.of(new UserMessage("你是谁"))).build()).call().content();
System.out.println(content);
}
}