diff --git a/examples/store/memory-similarity-search.php b/examples/store/memory-similarity-search.php new file mode 100644 index 00000000..58cf064c --- /dev/null +++ b/examples/store/memory-similarity-search.php @@ -0,0 +1,75 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +use Symfony\AI\Agent\Agent; +use Symfony\AI\Agent\Toolbox\AgentProcessor; +use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch; +use Symfony\AI\Agent\Toolbox\Toolbox; +use Symfony\AI\Platform\Bridge\OpenAI\Embeddings; +use Symfony\AI\Platform\Bridge\OpenAI\GPT; +use Symfony\AI\Platform\Bridge\OpenAI\PlatformFactory; +use Symfony\AI\Platform\Message\Message; +use Symfony\AI\Platform\Message\MessageBag; +use Symfony\AI\Store\Document\Metadata; +use Symfony\AI\Store\Document\TextDocument; +use Symfony\AI\Store\Document\Vectorizer; +use Symfony\AI\Store\Indexer; +use Symfony\AI\Store\InMemoryStore; +use Symfony\Component\Dotenv\Dotenv; +use Symfony\Component\Uid\Uuid; + +require_once dirname(__DIR__).'/vendor/autoload.php'; +(new Dotenv())->loadEnv(dirname(__DIR__).'/.env'); + +if (!isset($_SERVER['OPENAI_API_KEY'])) { + echo 'Please set OPENAI_API_KEY environment variable.'.\PHP_EOL; + exit(1); +} + +// initialize the store +$store = new InMemoryStore(); + +// our data +$movies = [ + ['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'director' => 'Christopher Nolan'], + ['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'director' => 'The Wachowskis'], + ['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'director' => 'Francis Ford Coppola'], +]; + +// create embeddings and documents +foreach ($movies as $i => $movie) { + $documents[] = new TextDocument( + id: Uuid::v4(), + content: 'Title: '.$movie['title'].\PHP_EOL.'Director: '.$movie['director'].\PHP_EOL.'Description: '.$movie['description'], + metadata: new Metadata($movie), + ); +} + +// create embeddings for documents +$platform = PlatformFactory::create($_SERVER['OPENAI_API_KEY']); +$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings()); +$indexer = new Indexer($vectorizer, $store); +$indexer->index($documents); + +$model = new GPT(GPT::GPT_4O_MINI); + +$similaritySearch = new SimilaritySearch($platform, $embeddings, $store); +$toolbox = Toolbox::create($similaritySearch); +$processor = new AgentProcessor($toolbox); +$agent = new Agent($platform, $model, [$processor], [$processor]); + +$messages = new MessageBag( + Message::forSystem('Please answer all user questions only using SimilaritySearch function.'), + Message::ofUser('Which movie fits the theme of the mafia?') +); +$response = $agent->call($messages); + +echo $response->getContent().\PHP_EOL; diff --git a/src/store/doc/index.rst b/src/store/doc/index.rst index 4a1c46c7..02003bbe 100644 --- a/src/store/doc/index.rst +++ b/src/store/doc/index.rst @@ -41,6 +41,7 @@ You can find more advanced usage in combination with an Agent using the store fo * `Similarity Search with MongoDB (RAG)`_ * `Similarity Search with Pinecone (RAG)`_ * `Similarity Search with Meilisearch (RAG)`_ +* `Similarity Search with memory storage (RAG)`_ Supported Stores ---------------- @@ -52,6 +53,7 @@ Supported Stores * `Pinecone`_ (requires `probots-io/pinecone-php` as additional dependency) * `Postgres`_ (requires `ext-pdo`) * `Meilisearch`_ +* `InMemory`_ .. note:: @@ -89,6 +91,7 @@ This leads to a store implementing two methods:: .. _`Similarity Search with MongoDB (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/mongodb-similarity-search.php .. _`Similarity Search with Pinecone (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/pinecone-similarity-search.php .. _`Similarity Search with Meilisearch (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/meilisearch-similarity-search.php +.. _`Similarity Search with memory storage (RAG)`: https://github.com/symfony/ai/blob/main/examples/store/memory-similarity-search.php .. _`Azure AI Search`: https://azure.microsoft.com/products/ai-services/ai-search .. _`Chroma`: https://www.trychroma.com/ .. _`MariaDB`: https://mariadb.org/projects/mariadb-vector/ @@ -96,4 +99,5 @@ This leads to a store implementing two methods:: .. _`Pinecone`: https://www.pinecone.io/ .. _`Postgres`: https://www.postgresql.org/about/news/pgvector-070-released-2852/ .. _`Meilisearch`: https://www.meilisearch.com/ +.. _`InMemory`: https://www.php.net/manual/en/language.types.array.php .. _`GitHub`: https://github.com/symfony/ai/issues/16 diff --git a/src/store/src/InMemoryStore.php b/src/store/src/InMemoryStore.php new file mode 100644 index 00000000..4592da4d --- /dev/null +++ b/src/store/src/InMemoryStore.php @@ -0,0 +1,145 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store; + +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\Exception\InvalidArgumentException; + +/** + * @author Guillaume Loulier + */ +final class InMemoryStore implements VectorStoreInterface +{ + public const COSINE_SIMILARITY = 'cosine'; + public const ANGULAR_DISTANCE = 'angular'; + public const EUCLIDEAN_DISTANCE = 'euclidean'; + public const MANHATTAN_DISTANCE = 'manhattan'; + public const CHEBYSHEV_DISTANCE = 'chebyshev'; + + /** + * @var VectorDocument[] + */ + private array $documents = []; + + public function __construct( + private readonly string $similarity = self::COSINE_SIMILARITY, + ) { + } + + public function add(VectorDocument ...$documents): void + { + array_push($this->documents, ...$documents); + } + + /** + * @param array{ + * maxItems?: positive-int + * } $options If maxItems is provided, only the top N results will be returned + */ + public function query(Vector $vector, array $options = [], ?float $minScore = null): array + { + $strategy = match ($this->similarity) { + self::COSINE_SIMILARITY => $this->cosineSimilarity(...), + self::ANGULAR_DISTANCE => $this->angularDistance(...), + self::EUCLIDEAN_DISTANCE => $this->euclideanDistance(...), + self::MANHATTAN_DISTANCE => $this->manhattanDistance(...), + self::CHEBYSHEV_DISTANCE => $this->chebyshevDistance(...), + default => throw new InvalidArgumentException(\sprintf('Unsupported similarity strategy "%s"', $this->similarity)), + }; + + $currentEmbeddings = array_map( + static fn (VectorDocument $vectorDocument): array => [ + 'distance' => $strategy($vectorDocument, $vector), + 'document' => $vectorDocument, + ], + $this->documents, + ); + + usort( + $currentEmbeddings, + static fn (array $embedding, array $nextEmbedding): int => $embedding['distance'] <=> $nextEmbedding['distance'], + ); + + if (\array_key_exists('maxItems', $options) && $options['maxItems'] < \count($currentEmbeddings)) { + $currentEmbeddings = \array_slice($currentEmbeddings, 0, $options['maxItems']); + } + + return array_map( + static fn (array $embedding): VectorDocument => $embedding['document'], + $currentEmbeddings, + ); + } + + private function cosineSimilarity(VectorDocument $embedding, Vector $against): float + { + $currentEmbeddingVectors = $embedding->vector->getData(); + + $dotProduct = array_sum(array: array_map( + static fn (float $a, float $b): float => $a * $b, + $currentEmbeddingVectors, + $against->getData(), + )); + + $currentEmbeddingLength = sqrt(array_sum(array_map( + static fn (float $value): float => $value ** 2, + $currentEmbeddingVectors, + ))); + + $againstLength = sqrt(array_sum(array_map( + static fn (float $value): float => $value ** 2, + $against->getData(), + ))); + + return fdiv($dotProduct, $currentEmbeddingLength * $againstLength); + } + + private function angularDistance(VectorDocument $embedding, Vector $against): float + { + $cosineSimilarity = $this->cosineSimilarity($embedding, $against); + + return fdiv(acos($cosineSimilarity), \M_PI); + } + + private function euclideanDistance(VectorDocument $embedding, Vector $against): float + { + return sqrt(array_sum(array_map( + static fn (float $a, float $b): float => ($a - $b) ** 2, + $embedding->vector->getData(), + $against->getData(), + ))); + } + + private function manhattanDistance(VectorDocument $embedding, Vector $against): float + { + return array_sum(array_map( + static fn (float $a, float $b): float => abs($a - $b), + $embedding->vector->getData(), + $against->getData(), + )); + } + + private function chebyshevDistance(VectorDocument $embedding, Vector $against): float + { + $embeddingsAsPower = array_map( + static fn (float $currentValue, float $againstValue): float => abs($currentValue - $againstValue), + $embedding->vector->getData(), + $against->getData(), + ); + + return array_reduce( + array: $embeddingsAsPower, + callback: static fn (float $value, float $current): float => max($value, $current), + initial: 0.0, + ); + } +} diff --git a/src/store/tests/InMemoryStoreTest.php b/src/store/tests/InMemoryStoreTest.php new file mode 100644 index 00000000..ed548736 --- /dev/null +++ b/src/store/tests/InMemoryStoreTest.php @@ -0,0 +1,113 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\AI\Store\Tests; + +use PHPUnit\Framework\Attributes\CoversClass; +use PHPUnit\Framework\TestCase; +use Symfony\AI\Platform\Vector\Vector; +use Symfony\AI\Store\Document\VectorDocument; +use Symfony\AI\Store\InMemoryStore; +use Symfony\Component\Uid\Uuid; + +#[CoversClass(InMemoryStore::class)] +final class InMemoryStoreTest extends TestCase +{ + public function testStoreCanSearchUsingCosineSimilarity(): void + { + $store = new InMemoryStore(); + $store->add( + new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5])), + new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0])), + new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1])), + ); + + self::assertCount(3, $store->query(new Vector([0.0, 0.1, 0.6]))); + + $store->add( + new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5])), + new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0])), + new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1])), + ); + + self::assertCount(6, $store->query(new Vector([0.0, 0.1, 0.6]))); + } + + public function testStoreCanSearchUsingCosineSimilarityWithMaxItems(): void + { + $store = new InMemoryStore(); + $store->add( + new VectorDocument(Uuid::v4(), new Vector([0.1, 0.1, 0.5])), + new VectorDocument(Uuid::v4(), new Vector([0.7, -0.3, 0.0])), + new VectorDocument(Uuid::v4(), new Vector([0.3, 0.7, 0.1])), + ); + + self::assertCount(1, $store->query(new Vector([0.0, 0.1, 0.6]), [ + 'maxItems' => 1, + ])); + } + + public function testStoreCanSearchUsingAngularDistance(): void + { + $store = new InMemoryStore(InMemoryStore::ANGULAR_DISTANCE); + $store->add( + new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0])), + new VectorDocument(Uuid::v4(), new Vector([1.0, 5.0, 7.0])), + ); + + $result = $store->query(new Vector([1.2, 2.3, 3.4])); + + self::assertCount(2, $result); + self::assertSame([1.0, 2.0, 3.0], $result[0]->vector->getData()); + } + + public function testStoreCanSearchUsingEuclideanDistance(): void + { + $store = new InMemoryStore(InMemoryStore::EUCLIDEAN_DISTANCE); + $store->add( + new VectorDocument(Uuid::v4(), new Vector([1.0, 5.0, 7.0])), + new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0])), + ); + + $result = $store->query(new Vector([1.2, 2.3, 3.4])); + + self::assertCount(2, $result); + self::assertSame([1.0, 2.0, 3.0], $result[0]->vector->getData()); + } + + public function testStoreCanSearchUsingManhattanDistance(): void + { + $store = new InMemoryStore(InMemoryStore::MANHATTAN_DISTANCE); + $store->add( + new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0])), + new VectorDocument(Uuid::v4(), new Vector([1.0, 5.0, 7.0])), + ); + + $result = $store->query(new Vector([1.2, 2.3, 3.4])); + + self::assertCount(2, $result); + self::assertSame([1.0, 2.0, 3.0], $result[0]->vector->getData()); + } + + public function testStoreCanSearchUsingChebyshevDistance(): void + { + $store = new InMemoryStore(InMemoryStore::CHEBYSHEV_DISTANCE); + $store->add( + new VectorDocument(Uuid::v4(), new Vector([1.0, 2.0, 3.0])), + new VectorDocument(Uuid::v4(), new Vector([1.0, 5.0, 7.0])), + ); + + $result = $store->query(new Vector([1.2, 2.3, 3.4])); + + self::assertCount(2, $result); + self::assertSame([1.0, 2.0, 3.0], $result[0]->vector->getData()); + } +}