diff --git a/README.adoc b/README.adoc index 43d26ddbd..2d4d38a06 100644 --- a/README.adoc +++ b/README.adoc @@ -11,6 +11,7 @@ We have separate folders for the samples of individual modules: * `example` - Shows core Spring Data support for Apache Cassandra. * `kotlin` - Example for using Cassandra with Kotlin. * `reactive` - Example project to show reactive template and repository support. +* `vector-search` - Example how to do vector search with a Spring Data Cassandra repository. == Spring Data Elasticsearch @@ -46,6 +47,7 @@ Contains also examples running on Virtual Threads. * `security` - Example of how to integrate Spring Data JPA Repositories with Spring Security. * `showcase` - Refactoring show case of how to improve a plain-JPA-based persistence layer by using Spring Data JPA (read: removing close to all of the implementation code).Follow the `demo.txt` file for detailed instructions. * `vavr` - Shows the support of https://www.vavr.io[Vavr] collection types as return types for query methods. +* `vector-search` - Example how to do vector search with a Spring Data JPA repository and `hibernate-vector`. == Spring Data LDAP @@ -68,6 +70,7 @@ Contains also examples running on Virtual Threads. * `security` - Example project showing usage of Spring Security with MongoDB. * `text-search` - Example project showing usage of MongoDB text search feature. * `transactions` - Example project for imperative and reactive MongoDB 4.0 transaction support. +* `vector-search` - Example how to do vector search with a Spring Data MongoDB repository. == Spring Data Neo4j diff --git a/cassandra/pom.xml b/cassandra/pom.xml index a07a22fc9..7bb7201f1 100644 --- a/cassandra/pom.xml +++ b/cassandra/pom.xml @@ -21,6 +21,7 @@ example kotlin reactive + vector-search diff --git a/cassandra/util/src/main/java/example/springdata/cassandra/util/CassandraExtension.java b/cassandra/util/src/main/java/example/springdata/cassandra/util/CassandraExtension.java index 8cb4f0ff7..aaea57e5c 100644 --- a/cassandra/util/src/main/java/example/springdata/cassandra/util/CassandraExtension.java +++ b/cassandra/util/src/main/java/example/springdata/cassandra/util/CassandraExtension.java @@ -54,6 +54,7 @@ public void beforeAll(ExtensionContext context) { CassandraContainer container = runTestcontainer(); System.setProperty("spring.cassandra.port", "" + container.getMappedPort(9042)); System.setProperty("spring.cassandra.contact-points", "" + container.getHost()); + System.setProperty("spring.cassandra.local-datacenter", container.getLocalDatacenter()); return new CassandraServer(container.getHost(), container.getMappedPort(9042), CassandraServer.RuntimeMode.EMBEDDED_IF_NOT_RUNNING); @@ -109,6 +110,6 @@ private CassandraContainer runTestcontainer() { private String getCassandraDockerImageName() { return String.format("cassandra:%s", - Optional.ofNullable(System.getenv("CASSANDRA_VERSION")).filter(StringUtils::hasText).orElse("3.11.10")); + Optional.ofNullable(System.getenv("CASSANDRA_VERSION")).filter(StringUtils::hasText).orElse("5.0.4")); } } diff --git a/cassandra/vector-search/README.md b/cassandra/vector-search/README.md new file mode 100644 index 000000000..d47924fdc --- /dev/null +++ b/cassandra/vector-search/README.md @@ -0,0 +1,37 @@ +# Spring Data for Apache Cassandra - Vector Search Example + +This project +contains [Vector Search](https://docs.spring.io/spring-data/cassandra/reference/5.0/cassandra/repositories/vector-search.html) +with Spring Data for Apache Cassandra. + +## Vector Support + +The Spring Data `Vector` type can be used in repository query methods. +Domain type properties of managed domain types are required to use a numeric array representation for embeddings. + +```java + +@Table +public class Comment { + + @Id + private String id; + + private String country; + private String description; + + @SaiIndexed + @VectorType(dimensions = 5) + private Vector embedding; + + // ... +} + + +public interface CommentRepository extends Repository { + + SearchResults searchTop10ByEmbeddingNear(Vector embedding, ScoringFunction function); +} +``` + +This example contains a test class to illustrate vector search with a Repository in `CassandraVectorSearchTest`. diff --git a/cassandra/vector-search/pom.xml b/cassandra/vector-search/pom.xml new file mode 100644 index 000000000..3db191981 --- /dev/null +++ b/cassandra/vector-search/pom.xml @@ -0,0 +1,85 @@ + + + 4.0.0 + + org.springframework.data.examples + spring-data-cassandra-examples + 2.0.0.BUILD-SNAPSHOT + + + org.example + spring-data-cassandra-vector-search + + + UTF-8 + 7.0.0-M5 + 2025.1.0-M3 + + + + + org.springframework.data + spring-data-cassandra + 5.0.0-M3 + + + + org.jspecify + jspecify + 1.0.0 + + + + org.springframework.boot + spring-boot-starter-test + test + + + + org.springframework.boot + spring-boot-testcontainers + test + + + + org.testcontainers + junit-jupiter + test + + + + org.testcontainers + cassandra + + + com.datastax.cassandra + cassandra-driver-core + + + + + + org.springframework.data.examples + spring-data-cassandra-example-utils + ${project.version} + test + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.springframework.boot + spring-boot-maven-plugin + + + + + diff --git a/cassandra/vector-search/src/main/java/example/springdata/vector/Comment.java b/cassandra/vector-search/src/main/java/example/springdata/vector/Comment.java new file mode 100644 index 000000000..7b9310afc --- /dev/null +++ b/cassandra/vector-search/src/main/java/example/springdata/vector/Comment.java @@ -0,0 +1,76 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import java.util.UUID; + +import org.springframework.data.annotation.Id; +import org.springframework.data.cassandra.core.mapping.SaiIndexed; +import org.springframework.data.cassandra.core.mapping.Table; +import org.springframework.data.cassandra.core.mapping.VectorType; +import org.springframework.data.domain.Vector; + +/** + * Sample entity containing a {@link Vector vector} {@link #embedding}. + */ +@Table +public class Comment { + + @Id + private String id; + + private String country; + private String description; + + @SaiIndexed + @VectorType(dimensions = 5) + private Vector embedding; + + public Comment() { + } + + public Comment(String country, String description, Vector embedding) { + this.id = UUID.randomUUID().toString(); + this.country = country; + this.description = description; + this.embedding = embedding; + } + + public static Comment of(Comment source) { + return new Comment(source.getCountry(), source.getDescription(), source.getEmbedding()); + } + + public String getId() { + return id; + } + + public String getCountry() { + return country; + } + + public String getDescription() { + return description; + } + + public Vector getEmbedding() { + return embedding; + } + + @Override + public String toString() { + return "%s (%s)".formatted(getDescription(), getCountry()); + } +} diff --git a/cassandra/vector-search/src/main/java/example/springdata/vector/CommentRepository.java b/cassandra/vector-search/src/main/java/example/springdata/vector/CommentRepository.java new file mode 100644 index 000000000..018022b99 --- /dev/null +++ b/cassandra/vector-search/src/main/java/example/springdata/vector/CommentRepository.java @@ -0,0 +1,32 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import org.springframework.data.cassandra.repository.Query; +import org.springframework.data.domain.Limit; +import org.springframework.data.domain.Score; +import org.springframework.data.domain.ScoringFunction; +import org.springframework.data.domain.SearchResults; +import org.springframework.data.domain.Vector; +import org.springframework.data.repository.CrudRepository; + +public interface CommentRepository extends CrudRepository { + + SearchResults searchTop10ByEmbeddingNear(Vector embedding, ScoringFunction function); + + @Query("SELECT id, description, country, similarity_cosine(embedding,:embedding) AS score FROM comment ORDER BY embedding ANN OF :embedding LIMIT :limit") + SearchResults searchAnnotated(Vector embedding, Score distance, Limit limit); +} diff --git a/cassandra/vector-search/src/main/java/example/springdata/vector/VectorApp.java b/cassandra/vector-search/src/main/java/example/springdata/vector/VectorApp.java new file mode 100644 index 000000000..cb6649ecc --- /dev/null +++ b/cassandra/vector-search/src/main/java/example/springdata/vector/VectorApp.java @@ -0,0 +1,52 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import org.springframework.boot.CommandLineRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.data.cassandra.core.CassandraTemplate; +import org.springframework.data.domain.Vector; +import org.springframework.stereotype.Component; + +@SpringBootApplication +public class VectorApp { + + public static void main(String[] args) { + SpringApplication.run(VectorApp.class, args); + } + + @Component + static class DbInitializer implements CommandLineRunner { + + private final CassandraTemplate template; + + DbInitializer(CassandraTemplate template) { + this.template = template; + } + + @Override + public void run(String... args) { + + template.truncate(Comment.class); + + template.insert(new Comment("de", "comment 'one'", Vector.of(0.1001f, 0.22345f, 0.33456f, 0.44567f, 0.55678f))); + template.insert(new Comment("de", "comment 'two'", Vector.of(0.2001f, 0.32345f, 0.43456f, 0.54567f, 0.65678f))); + template.insert(new Comment("en", "comment 'three'", Vector.of(0.9001f, 0.82345f, 0.73456f, 0.64567f, 0.55678f))); + template.insert(new Comment("de", "comment 'four'", Vector.of(0.9001f, 0.92345f, 0.93456f, 0.94567f, 0.95678f))); + } + } +} diff --git a/cassandra/vector-search/src/main/resources/application.properties b/cassandra/vector-search/src/main/resources/application.properties new file mode 100644 index 000000000..a3ac04871 --- /dev/null +++ b/cassandra/vector-search/src/main/resources/application.properties @@ -0,0 +1,6 @@ +logging.level.org=WARN +logging.level.com.datastax=WARN + +spring.cassandra.schema-action=recreate +spring.cassandra. +spring.cassandra.keyspace-name=vector_search_keyspace diff --git a/cassandra/vector-search/src/test/java/example/springdata/vector/CassandraDBConfiguration.java b/cassandra/vector-search/src/test/java/example/springdata/vector/CassandraDBConfiguration.java new file mode 100644 index 000000000..20f56331b --- /dev/null +++ b/cassandra/vector-search/src/test/java/example/springdata/vector/CassandraDBConfiguration.java @@ -0,0 +1,43 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import java.net.InetSocketAddress; + +import com.datastax.oss.driver.api.core.CqlSession; +import org.springframework.boot.autoconfigure.cassandra.CassandraProperties; +import org.springframework.boot.autoconfigure.cassandra.CqlSessionBuilderCustomizer; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class CassandraDBConfiguration { + + @Bean + CqlSessionBuilderCustomizer sessionBuilderCustomizer(CassandraProperties properties) { + return sessionBuilder -> { + + InetSocketAddress contactPoint = new InetSocketAddress(properties.getContactPoints().iterator().next(), properties.getPort()); + + CqlSession session = CqlSession.builder().addContactPoint(contactPoint) + .withLocalDatacenter(properties.getLocalDatacenter()).build(); + + session.execute("CREATE KEYSPACE IF NOT EXISTS " + properties.getKeyspaceName() + " WITH replication = \n" + + "{'class':'SimpleStrategy','replication_factor':'1'};"); + session.close(); + }; + } +} diff --git a/cassandra/vector-search/src/test/java/example/springdata/vector/CassandraVectorSearchTest.java b/cassandra/vector-search/src/test/java/example/springdata/vector/CassandraVectorSearchTest.java new file mode 100644 index 000000000..7c0cdb13e --- /dev/null +++ b/cassandra/vector-search/src/test/java/example/springdata/vector/CassandraVectorSearchTest.java @@ -0,0 +1,64 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import static org.springframework.data.domain.ScoringFunction.cosine; + +import example.springdata.cassandra.util.CassandraKeyspace; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.Limit; +import org.springframework.data.domain.Score; +import org.springframework.data.domain.ScoringFunction; +import org.springframework.data.domain.SearchResult; +import org.springframework.data.domain.Vector; + +@CassandraKeyspace +@SpringBootTest +class CassandraVectorSearchTest { + + @Autowired + CommentRepository repository; + + @BeforeEach + void beforeAll() throws InterruptedException { + Thread.sleep(5000); // a little time to think + } + + @Test + void vectorSearchUsingQueryMethod() { + + Vector vector = Vector.of(0.2001f, 0.32345f, 0.43456f, 0.54567f, 0.65678f); + + repository.searchTop10ByEmbeddingNear(vector, ScoringFunction.cosine()) + .forEach(CassandraVectorSearchTest::printResult); + } + + @Test + void vectorSearchUsingRawAtQuery() { + + Vector vector = Vector.of(0.2001f, 0.32345f, 0.43456f, 0.54567f, 0.65678f); + + repository.searchAnnotated(vector, Score.of(0.5, cosine()), Limit.of(10)) + .forEach(CassandraVectorSearchTest::printResult); + } + + private static void printResult(SearchResult result) { + System.out.printf("score: %s - %s\n", result.getScore(), result.getContent()); + } +} diff --git a/jpa/pom.xml b/jpa/pom.xml index 9b94d5869..956979935 100644 --- a/jpa/pom.xml +++ b/jpa/pom.xml @@ -30,6 +30,7 @@ vavr multitenant graalvm-native + vector-search diff --git a/jpa/vector-search/README.md b/jpa/vector-search/README.md new file mode 100644 index 000000000..6522d2da1 --- /dev/null +++ b/jpa/vector-search/README.md @@ -0,0 +1,36 @@ +# Spring Data JPA - Vector Search Example + +This project contains [Vector Search](https://docs.spring.io/spring-data/jpa/reference/4.0/repositories/vector-search.html) with Spring Data JPA and the `hibernate-vector` module. + +## Vector Support + +The Spring Data `Vector` type can be used in repository query methods. +Domain type properties of managed domain types are required to use a numeric array representation for embeddings. + +```java + +@Entity +@Table(name = "jpa_comment") +public class Comment { + + @Id + @GeneratedValue private Long id; + + private String country; + private String description; + + @JdbcTypeCode(SqlTypes.VECTOR) + @Array(length = 5) + private float[] embedding; + + // ... +} + + +public interface CommentRepository extends Repository { + + SearchResults searchTop10ByCountryAndEmbeddingNear(String country, Vector vector, Score distance); +} +``` + +This example contains a test class to illustrate vector search with a Repository in `JpaVectorSearchTest`. diff --git a/jpa/vector-search/pom.xml b/jpa/vector-search/pom.xml new file mode 100644 index 000000000..96d314f49 --- /dev/null +++ b/jpa/vector-search/pom.xml @@ -0,0 +1,120 @@ + + + 4.0.0 + + org.springframework.data.examples + spring-data-jpa-examples + 2.0.0.BUILD-SNAPSHOT + + + org.example + spring-data-jpa-vector-search + + + UTF-8 + 7.0.0-M5 + 7.0.0.CR2 + 2025.1.0-M3 + + + + + org.jspecify + jspecify + 1.0.0 + + + + jakarta.persistence + jakarta.persistence-api + 3.2.0 + + + + org.springframework + spring-orm + ${spring.version} + + + + org.springframework + spring-beans + ${spring.version} + + + + org.springframework + spring-core + ${spring.version} + + + + org.springframework + spring-jdbc + ${spring.version} + + + + org.springframework + spring-tx + ${spring.version} + + + + org.springframework + spring-context + ${spring.version} + + + + org.hibernate.orm + hibernate-vector + ${hibernate.version} + + + + org.postgresql + postgresql + + + + org.springframework.boot + spring-boot-starter-test + test + + + + org.springframework.boot + spring-boot-testcontainers + test + + + + org.testcontainers + junit-jupiter + test + + + + org.testcontainers + postgresql + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.springframework.boot + spring-boot-maven-plugin + + + + + diff --git a/jpa/vector-search/src/main/java/example/springdata/vector/Comment.java b/jpa/vector-search/src/main/java/example/springdata/vector/Comment.java new file mode 100644 index 000000000..5fb272f11 --- /dev/null +++ b/jpa/vector-search/src/main/java/example/springdata/vector/Comment.java @@ -0,0 +1,76 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.Id; +import jakarta.persistence.Table; +import org.hibernate.annotations.Array; +import org.hibernate.annotations.JdbcTypeCode; +import org.hibernate.type.SqlTypes; + +/** + * Sample entity containing a {@link SqlTypes#VECTOR vector} {@link #embedding}. + */ +@Entity +@Table(name = "jpa_comment") +public class Comment { + + @Id + @GeneratedValue private Long id; + + private String country; + private String description; + + @JdbcTypeCode(SqlTypes.VECTOR) + @Array(length = 5) + private float[] embedding; + + public Comment() { + } + + public Comment(String country, String description, float[] embedding) { + this.country = country; + this.description = description; + this.embedding = embedding; + } + + public static Comment of(Comment source) { + return new Comment(source.getCountry(), source.getDescription(), source.getEmbedding()); + } + + public long getId() { + return id; + } + + public String getCountry() { + return country; + } + + public String getDescription() { + return description; + } + + public float[] getEmbedding() { + return embedding; + } + + @Override + public String toString() { + return "%s (%s)".formatted(getDescription(), getCountry()); + } +} diff --git a/jpa/vector-search/src/main/java/example/springdata/vector/CommentRepository.java b/jpa/vector-search/src/main/java/example/springdata/vector/CommentRepository.java new file mode 100644 index 000000000..5d8d40483 --- /dev/null +++ b/jpa/vector-search/src/main/java/example/springdata/vector/CommentRepository.java @@ -0,0 +1,34 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import org.springframework.data.domain.Score; +import org.springframework.data.domain.SearchResults; +import org.springframework.data.domain.Vector; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.CrudRepository; + +public interface CommentRepository extends CrudRepository { + + SearchResults searchTop10ByCountryAndEmbeddingNear(String country, Vector vector, Score distance); + + @Query(""" + SELECT c, cosine_distance(c.embedding, :embedding) as distance FROM Comment c + WHERE c.country = :country + AND cosine_distance(c.embedding, :embedding) <= :distance + ORDER BY distance asc""") + SearchResults searchAnnotated(String country, Vector embedding, Score distance); +} diff --git a/jpa/vector-search/src/main/java/example/springdata/vector/VectorApp.java b/jpa/vector-search/src/main/java/example/springdata/vector/VectorApp.java new file mode 100644 index 000000000..2fe0a1449 --- /dev/null +++ b/jpa/vector-search/src/main/java/example/springdata/vector/VectorApp.java @@ -0,0 +1,50 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import org.springframework.boot.CommandLineRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.stereotype.Component; + +@SpringBootApplication +public class VectorApp { + + public static void main(String[] args) { + SpringApplication.run(VectorApp.class, args); + } + + @Component + static class DbInitializer implements CommandLineRunner { + + private final CommentRepository repository; + + DbInitializer(CommentRepository repository) { + this.repository = repository; + } + + @Override + public void run(String... args) { + + repository.deleteAll(); + + repository.save(new Comment("de", "comment 'one'", new float[]{0.1001f, 0.22345f, 0.33456f, 0.44567f, 0.55678f})); + repository.save(new Comment("de", "comment 'two'", new float[]{0.2001f, 0.32345f, 0.43456f, 0.54567f, 0.65678f})); + repository.save(new Comment("en", "comment 'three'", new float[]{0.9001f, 0.82345f, 0.73456f, 0.64567f, 0.55678f})); + repository.save(new Comment("de", "comment 'four'", new float[]{0.9001f, 0.92345f, 0.93456f, 0.94567f, 0.95678f})); + } + } +} diff --git a/jpa/vector-search/src/main/resources/application.properties b/jpa/vector-search/src/main/resources/application.properties new file mode 100644 index 000000000..405d5dc2f --- /dev/null +++ b/jpa/vector-search/src/main/resources/application.properties @@ -0,0 +1,4 @@ +spring.sql.init.schema-locations=pgvector.sql +spring.sql.init.mode=always + +logging.level.org=WARN diff --git a/jpa/vector-search/src/main/resources/pgvector.sql b/jpa/vector-search/src/main/resources/pgvector.sql new file mode 100644 index 000000000..a0afa4ce4 --- /dev/null +++ b/jpa/vector-search/src/main/resources/pgvector.sql @@ -0,0 +1,11 @@ +CREATE EXTENSION IF NOT EXISTS vector; + +DROP TABLE IF EXISTS jpa_comment; + +DROP SEQUENCE IF EXISTS jpa_comment_seq; + +CREATE TABLE IF NOT EXISTS jpa_comment (id bigserial PRIMARY KEY, country varchar(10), description varchar(20), embedding vector(5)); + +CREATE SEQUENCE jpa_comment_seq INCREMENT 50; + +CREATE INDEX ON jpa_comment USING hnsw (embedding vector_l2_ops); diff --git a/jpa/vector-search/src/test/java/example/springdata/vector/JpaVectorSearchTest.java b/jpa/vector-search/src/test/java/example/springdata/vector/JpaVectorSearchTest.java new file mode 100644 index 000000000..e6742d036 --- /dev/null +++ b/jpa/vector-search/src/test/java/example/springdata/vector/JpaVectorSearchTest.java @@ -0,0 +1,54 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import static org.springframework.data.domain.ScoringFunction.cosine; + +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.Score; +import org.springframework.data.domain.SearchResult; +import org.springframework.data.domain.Vector; + +@SpringBootTest +class JpaVectorSearchTest { + + @Autowired + CommentRepository repository; + + @Test + void vectorSearchUsingQueryMethod() { + + Vector vector = Vector.of(0.2001f, 0.32345f, 0.43456f, 0.54567f, 0.65678f); + + repository.searchTop10ByCountryAndEmbeddingNear("de", vector, Score.of(0.5, cosine())) + .forEach(JpaVectorSearchTest::printResult); + } + + @Test + void vectorSearchUsingRawAtQuery() { + + Vector vector = Vector.of(0.2001f, 0.32345f, 0.43456f, 0.54567f, 0.65678f); + + repository.searchAnnotated("de", vector, Score.of(0.5, cosine())) + .forEach(JpaVectorSearchTest::printResult); + } + + private static void printResult(SearchResult result) { + System.out.printf("score: %s - %s\n", result.getScore(), result.getContent()); + } +} diff --git a/jpa/vector-search/src/test/java/example/springdata/vector/PGVectorConfiguration.java b/jpa/vector-search/src/test/java/example/springdata/vector/PGVectorConfiguration.java new file mode 100644 index 000000000..92fd8d63d --- /dev/null +++ b/jpa/vector-search/src/test/java/example/springdata/vector/PGVectorConfiguration.java @@ -0,0 +1,35 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import org.springframework.boot.testcontainers.service.connection.ServiceConnection; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.utility.DockerImageName; + +/** + * Configuration to use PGvector with Testcontainers. + */ +@Configuration +class PGVectorConfiguration { + + @Bean + @ServiceConnection + PostgreSQLContainer pgVectorContainer() { + return new PostgreSQLContainer<>(DockerImageName.parse("pgvector/pgvector:pg17")).withReuse(true); + } +} diff --git a/mongodb/pom.xml b/mongodb/pom.xml index 04d2c282c..e733a04ba 100644 --- a/mongodb/pom.xml +++ b/mongodb/pom.xml @@ -37,6 +37,7 @@ linking util fragment-spi + vector-search diff --git a/mongodb/util/pom.xml b/mongodb/util/pom.xml index 90da78ade..f8e7d0926 100644 --- a/mongodb/util/pom.xml +++ b/mongodb/util/pom.xml @@ -29,6 +29,10 @@ mongodb ${testcontainers.version} + + org.springframework.boot + spring-boot-testcontainers + diff --git a/mongodb/util/src/main/java/example/springdata/mongodb/util/AtlasContainerConnectionDetailsFactory.java b/mongodb/util/src/main/java/example/springdata/mongodb/util/AtlasContainerConnectionDetailsFactory.java new file mode 100644 index 000000000..b3921f951 --- /dev/null +++ b/mongodb/util/src/main/java/example/springdata/mongodb/util/AtlasContainerConnectionDetailsFactory.java @@ -0,0 +1,49 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.mongodb.util; + +import com.mongodb.ConnectionString; +import org.springframework.boot.autoconfigure.mongo.MongoConnectionDetails; +import org.springframework.boot.ssl.SslBundle; +import org.springframework.boot.testcontainers.service.connection.ContainerConnectionDetailsFactory; +import org.springframework.boot.testcontainers.service.connection.ContainerConnectionSource; + +public class AtlasContainerConnectionDetailsFactory extends ContainerConnectionDetailsFactory { + + AtlasContainerConnectionDetailsFactory() { + super(ANY_CONNECTION_NAME, new String[]{"com.mongodb.ConnectionString"}); + } + + protected MongoConnectionDetails getContainerConnectionDetails(ContainerConnectionSource source) { + return new MongoContainerConnectionDetails(source); + } + + private static final class MongoContainerConnectionDetails extends ContainerConnectionDetailsFactory.ContainerConnectionDetails implements MongoConnectionDetails { + + private MongoContainerConnectionDetails(ContainerConnectionSource source) { + super(source); + } + + public ConnectionString getConnectionString() { + return new ConnectionString(this.getContainer().getConnectionString()); + } + + public SslBundle getSslBundle() { + return super.getSslBundle(); + } + } +} + diff --git a/mongodb/util/src/main/resources/META-INF/spring.factories b/mongodb/util/src/main/resources/META-INF/spring.factories new file mode 100644 index 000000000..8c6952234 --- /dev/null +++ b/mongodb/util/src/main/resources/META-INF/spring.factories @@ -0,0 +1,3 @@ +# Connection Details Factories +org.springframework.boot.autoconfigure.service.connection.ConnectionDetailsFactory=\ +example.springdata.mongodb.util.AtlasContainerConnectionDetailsFactory diff --git a/mongodb/vector-search/README.md b/mongodb/vector-search/README.md new file mode 100644 index 000000000..cabe97014 --- /dev/null +++ b/mongodb/vector-search/README.md @@ -0,0 +1,36 @@ +# Spring Data MongoDB - Vector Search Example + +This project +contains [Vector Search](https://docs.spring.io/spring-data/mongodb/reference/5.0/mongodb/repositories/vector-search.html) +with Spring Data MongoDB. + +## Vector Support + +The Spring Data `Vector` type can be used in repository query methods. +Domain type properties of managed domain types are required to use a numeric array representation for embeddings. + +```java + +@Document +public class Comment { + + @Id + private ObjectId id; + + private String country; + private String description; + + private Vector embedding; + + // ... +} + + +public interface CommentRepository extends Repository { + + @VectorSearch(indexName = "cosine-index", searchType = VectorSearchOperation.SearchType.ANN) + SearchResults searchTop10ByCountryAndEmbeddingNear(String country, Vector vector, Score distance); +} +``` + +This example contains a test class to illustrate vector search with a Repository in `MongoDBVectorSearchTest`. diff --git a/mongodb/vector-search/pom.xml b/mongodb/vector-search/pom.xml new file mode 100644 index 000000000..31f9bcd05 --- /dev/null +++ b/mongodb/vector-search/pom.xml @@ -0,0 +1,71 @@ + + + 4.0.0 + + org.springframework.data.examples + spring-data-mongodb-examples + 2.0.0.BUILD-SNAPSHOT + + + org.example + spring-data-mongodb-vector-search + + + UTF-8 + 7.0.0-M5 + 2025.1.0-M3 + + + + + org.jspecify + jspecify + 1.0.0 + + + + org.springframework.boot + spring-boot-starter-test + test + + + + org.springframework.boot + spring-boot-testcontainers + test + + + + org.testcontainers + junit-jupiter + test + + + + org.testcontainers + mongodb + + + org.springframework.data.examples + spring-data-mongodb-example-utils + test + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + + org.springframework.boot + spring-boot-maven-plugin + + + + + diff --git a/mongodb/vector-search/src/main/java/example/springdata/vector/Comment.java b/mongodb/vector-search/src/main/java/example/springdata/vector/Comment.java new file mode 100644 index 000000000..1fbc945c5 --- /dev/null +++ b/mongodb/vector-search/src/main/java/example/springdata/vector/Comment.java @@ -0,0 +1,69 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import org.bson.types.ObjectId; +import org.springframework.data.domain.Vector; +import org.springframework.data.mongodb.core.mapping.Document; + +/** + * Sample entity containing a {@link Vector vector} {@link #embedding}. + */ +@Document +public class Comment { + + private ObjectId id; + + private String country; + private String description; + + private Vector embedding; + + public Comment() { + } + + public Comment(String country, String description, Vector embedding) { + this.country = country; + this.description = description; + this.embedding = embedding; + } + + public static Comment of(Comment source) { + return new Comment(source.getCountry(), source.getDescription(), source.getEmbedding()); + } + + public ObjectId getId() { + return id; + } + + public String getCountry() { + return country; + } + + public String getDescription() { + return description; + } + + public Vector getEmbedding() { + return embedding; + } + + + @Override + public String toString() { + return "%s (%s)".formatted(getDescription(), getCountry()); + } +} diff --git a/mongodb/vector-search/src/main/java/example/springdata/vector/CommentRepository.java b/mongodb/vector-search/src/main/java/example/springdata/vector/CommentRepository.java new file mode 100644 index 000000000..2070abc70 --- /dev/null +++ b/mongodb/vector-search/src/main/java/example/springdata/vector/CommentRepository.java @@ -0,0 +1,34 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import org.springframework.data.domain.Limit; +import org.springframework.data.domain.Score; +import org.springframework.data.domain.SearchResults; +import org.springframework.data.domain.Vector; +import org.springframework.data.mongodb.core.aggregation.VectorSearchOperation; +import org.springframework.data.mongodb.repository.VectorSearch; +import org.springframework.data.repository.CrudRepository; + +public interface CommentRepository extends CrudRepository { + + @VectorSearch(indexName = "cosine-index", searchType = VectorSearchOperation.SearchType.ANN) + SearchResults searchTop10ByCountryAndEmbeddingNear(String country, Vector vector, Score distance); + + @VectorSearch(indexName = "cosine-index", filter = "{country: ?0}", numCandidates = "#{#limit.max*10}", + searchType = VectorSearchOperation.SearchType.ANN) + SearchResults searchAnnotated(String country, Vector vector, Score distance, Limit limit); +} diff --git a/mongodb/vector-search/src/main/java/example/springdata/vector/VectorApp.java b/mongodb/vector-search/src/main/java/example/springdata/vector/VectorApp.java new file mode 100644 index 000000000..f6c255918 --- /dev/null +++ b/mongodb/vector-search/src/main/java/example/springdata/vector/VectorApp.java @@ -0,0 +1,51 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import org.springframework.boot.CommandLineRunner; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.data.domain.Vector; +import org.springframework.stereotype.Component; + +@SpringBootApplication +public class VectorApp { + + public static void main(String[] args) { + SpringApplication.run(VectorApp.class, args); + } + + @Component + static class DbInitializer implements CommandLineRunner { + + private final CommentRepository repository; + + DbInitializer(CommentRepository repository) { + this.repository = repository; + } + + @Override + public void run(String... args) { + + repository.deleteAll(); + + repository.save(new Comment("de", "comment 'one'", Vector.of(0.1001f, 0.22345f, 0.33456f, 0.44567f, 0.55678f))); + repository.save(new Comment("de", "comment 'two'", Vector.of(0.2001f, 0.32345f, 0.43456f, 0.54567f, 0.65678f))); + repository.save(new Comment("en", "comment 'three'", Vector.of(0.9001f, 0.82345f, 0.73456f, 0.64567f, 0.55678f))); + repository.save(new Comment("de", "comment 'four'", Vector.of(0.9001f, 0.92345f, 0.93456f, 0.94567f, 0.95678f))); + } + } +} diff --git a/mongodb/vector-search/src/main/resources/application.properties b/mongodb/vector-search/src/main/resources/application.properties new file mode 100644 index 000000000..d0f1c59b3 --- /dev/null +++ b/mongodb/vector-search/src/main/resources/application.properties @@ -0,0 +1 @@ +logging.level.org=WARN diff --git a/mongodb/vector-search/src/test/java/example/springdata/vector/MongoDBConfiguration.java b/mongodb/vector-search/src/test/java/example/springdata/vector/MongoDBConfiguration.java new file mode 100644 index 000000000..fee5246da --- /dev/null +++ b/mongodb/vector-search/src/test/java/example/springdata/vector/MongoDBConfiguration.java @@ -0,0 +1,32 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import example.springdata.mongodb.util.AtlasContainer; +import example.springdata.mongodb.util.MongoContainers; +import org.springframework.boot.testcontainers.service.connection.ServiceConnection; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class MongoDBConfiguration { + + @Bean + @ServiceConnection(name = "mongo") + AtlasContainer atlasContainer() { + return MongoContainers.getAtlasContainer(); + } +} diff --git a/mongodb/vector-search/src/test/java/example/springdata/vector/MongoDBVectorSearchTest.java b/mongodb/vector-search/src/test/java/example/springdata/vector/MongoDBVectorSearchTest.java new file mode 100644 index 000000000..6dc750425 --- /dev/null +++ b/mongodb/vector-search/src/test/java/example/springdata/vector/MongoDBVectorSearchTest.java @@ -0,0 +1,61 @@ +/* + * Copyright 2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.vector; + +import static org.springframework.data.domain.ScoringFunction.cosine; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.data.domain.Limit; +import org.springframework.data.domain.Score; +import org.springframework.data.domain.SearchResult; +import org.springframework.data.domain.Vector; + +@SpringBootTest +class MongoDBVectorSearchTest { + + @Autowired + CommentRepository repository; + + @BeforeEach + void beforeAll() throws InterruptedException { + Thread.sleep(5000); // a little time to think + } + + @Test + void vectorSearchUsingQueryMethod() { + + Vector vector = Vector.of(0.2001f, 0.32345f, 0.43456f, 0.54567f, 0.65678f); + + repository.searchTop10ByCountryAndEmbeddingNear("de", vector, Score.of(0.5, cosine())) + .forEach(MongoDBVectorSearchTest::printResult); + } + + @Test + void vectorSearchUsingRawAtQuery() { + + Vector vector = Vector.of(0.2001f, 0.32345f, 0.43456f, 0.54567f, 0.65678f); + + repository.searchAnnotated("de", vector, Score.of(0.5, cosine()), Limit.of(10)) + .forEach(MongoDBVectorSearchTest::printResult); + } + + private static void printResult(SearchResult result) { + System.out.printf("score: %s - %s\n", result.getScore(), result.getContent()); + } +}