Skip to content

Commit

Permalink
Embedding Foundations & OpenAI Embeddings (echolabsdev#70)
Browse files Browse the repository at this point in the history
  • Loading branch information
sixlive authored Nov 26, 2024
1 parent 8241fc2 commit d2304ea
Show file tree
Hide file tree
Showing 22 changed files with 3,789 additions and 8 deletions.
4 changes: 4 additions & 0 deletions docs/.vitepress/config.mts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ export default defineConfig({
text: "Tool & Function Calling",
link: "/core-concepts/tools-function-calling",
},
{
text: "Embeddings",
link: "/core-concepts/embeddings",
},
{
text: "Prism Server",
link: "/core-concepts/prism-server",
Expand Down
107 changes: 107 additions & 0 deletions docs/core-concepts/embeddings.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Embeddings

Transform your text into powerful vector representations! Embeddings let you add semantic search, recommendation systems, and other advanced natural language features to your applications.

## Quick Start

Here's how to generate embeddings with just a few lines of code:

```php
use EchoLabs\Prism\Facades\Prism;
use EchoLabs\Prism\Enums\Provider;

$response = Prism::embeddings()
->using(Provider::OpenAI, 'text-embedding-3-large')
->fromInput('Your text goes here')
->generate();

// Get your embeddings vector
$embeddings = $response->embeddings;

// Check token usage
echo $response->usage->tokens;
```

## Input Methods

You've got two convenient ways to feed text into the embeddings generator:

### Direct Text Input

```php
$response = Prism::embeddings()
->using(Provider::OpenAI, 'text-embedding-3-large')
->fromInput('Analyze this text')
->generate();
```

### From File

Need to analyze a larger document? No problem:

```php
$response = Prism::embeddings()
->using(Provider::OpenAI, 'text-embedding-3-large')
->fromFile('/path/to/your/document.txt')
->generate();
```

> [!NOTE]
> Make sure your file exists and is readable. The generator will throw a helpful `PrismException` if there's any issue accessing the file.
## Common Settings

Just like with text generation, you can fine-tune your embeddings requests:

```php
$response = Prism::embeddings()
->using(Provider::OpenAI, 'text-embedding-3-large')
->fromInput('Your text here')
->withClientOptions(['timeout' => 30]) // Adjust request timeout
->withClientRetry(3, 100) // Add automatic retries
->generate();
```

## Response Handling

The embeddings response gives you everything you need:

```php
// Get the embeddings vector
$vector = $response->embeddings;

// Check token usage
$tokenCount = $response->usage->tokens;
```

## Error Handling

Always handle potential errors gracefully:

```php
use EchoLabs\Prism\Exceptions\PrismException;

try {
$response = Prism::embeddings()
->using(Provider::OpenAI, 'text-embedding-3-large')
->fromInput('Your text here')
->generate();
} catch (PrismException $e) {
Log::error('Embeddings generation failed:', [
'error' => $e->getMessage()
]);
}
```

## Pro Tips 🌟

**Vector Storage**: Consider using a vector database like Milvus, Qdrant, or pgvector to store and query your embeddings efficiently.

**Text Preprocessing**: For best results, clean and normalize your text before generating embeddings. This might include:
- Removing unnecessary whitespace
- Converting to lowercase
- Removing special characters
- Handling Unicode normalization

> [!IMPORTANT]
> Different providers and models produce vectors of different dimensions. Always check your provider's documentation for specific details about the embedding model you're using.
4 changes: 4 additions & 0 deletions src/Contracts/Provider.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

namespace EchoLabs\Prism\Contracts;

use EchoLabs\Prism\Embeddings\Request as EmbeddingsRequest;
use EchoLabs\Prism\Embeddings\Response as EmbeddingsResponse;
use EchoLabs\Prism\Providers\ProviderResponse;
use EchoLabs\Prism\Structured\Request as StructuredRequest;
use EchoLabs\Prism\Text\Request as TextRequest;
Expand All @@ -13,4 +15,6 @@ interface Provider
public function text(TextRequest $request): ProviderResponse;

public function structured(StructuredRequest $request): ProviderResponse;

public function embeddings(EmbeddingsRequest $request): EmbeddingsResponse;
}
74 changes: 74 additions & 0 deletions src/Embeddings/Generator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?php

declare(strict_types=1);

namespace EchoLabs\Prism\Embeddings;

use Closure;
use EchoLabs\Prism\Contracts\Provider;
use EchoLabs\Prism\Enums\Provider as ProviderEnum;
use EchoLabs\Prism\Exceptions\PrismException;
use EchoLabs\Prism\PrismManager;

class Generator
{
protected string $input = '';

/** @var array<string, mixed> */
protected array $clientOptions = [];

/** @var array{0: array<int, int>|int, 1?: Closure|int, 2?: ?callable, 3?: bool} */
protected array $clientRetry = [0];

protected Provider $provider;

protected string $model;

public function using(string|ProviderEnum $provider, string $model): self
{
$this->provider = resolve(PrismManager::class)
->resolve($provider);

$this->model = $model;

return $this;
}

public function fromInput(string $input): self
{
$this->input = $input;

return $this;
}

public function fromFile(string $path): self
{
if (! is_file($path)) {
throw new PrismException(sprintf('%s is not a valid file', $path));
}

$contents = file_get_contents($path);

if ($contents === false) {
throw new PrismException(sprintf('%s contents could not be read', $path));
}

$this->input = $contents;

return $this;
}

public function generate(): Response
{
if ($this->input === '' || $this->input === '0') {
throw new PrismException('Embeddings input is required');
}

return $this->provider->embeddings(new Request(
model: $this->model,
input: $this->input,
clientOptions: $this->clientOptions,
clientRetry: $this->clientRetry,
));
}
}
21 changes: 21 additions & 0 deletions src/Embeddings/Request.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

declare(strict_types=1);

namespace EchoLabs\Prism\Embeddings;

use Closure;

class Request
{
/**
* @param array<string, mixed> $clientOptions
* @param array{0: array<int, int>|int, 1?: Closure|int, 2?: ?callable, 3?: bool} $clientRetry
*/
public function __construct(
public readonly string $model,
public readonly string $input,
public readonly array $clientOptions,
public readonly array $clientRetry,
) {}
}
18 changes: 18 additions & 0 deletions src/Embeddings/Response.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

declare(strict_types=1);

namespace EchoLabs\Prism\Embeddings;

use EchoLabs\Prism\ValueObjects\EmbeddingsUsage;

class Response
{
/**
* @param array<int, int|string> $embeddings
*/
public function __construct(
public readonly array $embeddings,
public readonly EmbeddingsUsage $usage,
) {}
}
6 changes: 6 additions & 0 deletions src/Prism.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
namespace EchoLabs\Prism;

use EchoLabs\Prism\Contracts\Provider;
use EchoLabs\Prism\Embeddings\Generator as EmbeddingsGenerator;
use EchoLabs\Prism\Enums\Provider as ProviderEnum;
use EchoLabs\Prism\Providers\ProviderResponse;
use EchoLabs\Prism\Structured\Generator as StructuredGenerator;
Expand Down Expand Up @@ -44,4 +45,9 @@ public static function structured(): StructuredGenerator
{
return new StructuredGenerator;
}

public static function embeddings(): \EchoLabs\Prism\Embeddings\Generator
{
return new EmbeddingsGenerator;
}
}
8 changes: 8 additions & 0 deletions src/Providers/Anthropic/Anthropic.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
namespace EchoLabs\Prism\Providers\Anthropic;

use EchoLabs\Prism\Contracts\Provider;
use EchoLabs\Prism\Embeddings\Request as EmbeddingRequest;
use EchoLabs\Prism\Embeddings\Response as EmbeddingResponse;
use EchoLabs\Prism\Providers\Anthropic\Handlers\Text;
use EchoLabs\Prism\Providers\ProviderResponse;
use EchoLabs\Prism\Structured\Request as StructuredRequest;
Expand Down Expand Up @@ -33,6 +35,12 @@ public function structured(StructuredRequest $request): ProviderResponse
throw new \Exception(sprintf('%s does not support structured mode', class_basename($this)));
}

#[\Override]
public function embeddings(EmbeddingRequest $request): EmbeddingResponse
{
throw new \Exception(sprintf('%s does not support embeddings', class_basename($this)));
}

/**
* @param array<string, mixed> $options
* @param array<mixed> $retry
Expand Down
8 changes: 8 additions & 0 deletions src/Providers/Groq/Groq.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
namespace EchoLabs\Prism\Providers\Groq;

use EchoLabs\Prism\Contracts\Provider;
use EchoLabs\Prism\Embeddings\Request as EmbeddingRequest;
use EchoLabs\Prism\Embeddings\Response as EmbeddingResponse;
use EchoLabs\Prism\Providers\Groq\Handlers\Text;
use EchoLabs\Prism\Providers\ProviderResponse;
use EchoLabs\Prism\Structured\Request as StructuredRequest;
Expand Down Expand Up @@ -33,6 +35,12 @@ public function structured(StructuredRequest $request): ProviderResponse
throw new \Exception(sprintf('%s does not support structured mode', class_basename($this)));
}

#[\Override]
public function embeddings(EmbeddingRequest $request): EmbeddingResponse
{
throw new \Exception(sprintf('%s does not support embeddings', class_basename($this)));
}

/**
* @param array<string, mixed> $options
* @param array<mixed> $retry
Expand Down
8 changes: 8 additions & 0 deletions src/Providers/Mistral/Mistral.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
namespace EchoLabs\Prism\Providers\Mistral;

use EchoLabs\Prism\Contracts\Provider;
use EchoLabs\Prism\Embeddings\Request as EmbeddingRequest;
use EchoLabs\Prism\Embeddings\Response as EmbeddingResponse;
use EchoLabs\Prism\Providers\Mistral\Handlers\Text;
use EchoLabs\Prism\Providers\ProviderResponse;
use EchoLabs\Prism\Structured\Request as StructuredRequest;
Expand Down Expand Up @@ -33,6 +35,12 @@ public function structured(StructuredRequest $request): ProviderResponse
throw new \Exception(sprintf('%s does not support structured mode', class_basename($this)));
}

#[\Override]
public function embeddings(EmbeddingRequest $request): EmbeddingResponse
{
throw new \Exception(sprintf('%s does not support embeddings', class_basename($this)));
}

/**
* @param array<string, mixed> $options
* @param array<mixed> $retry
Expand Down
8 changes: 8 additions & 0 deletions src/Providers/Ollama/Ollama.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
namespace EchoLabs\Prism\Providers\Ollama;

use EchoLabs\Prism\Contracts\Provider;
use EchoLabs\Prism\Embeddings\Request as EmbeddingRequest;
use EchoLabs\Prism\Embeddings\Response as EmbeddingResponse;
use EchoLabs\Prism\Providers\Ollama\Handlers\Text;
use EchoLabs\Prism\Providers\ProviderResponse;
use EchoLabs\Prism\Structured\Request as StructuredRequest;
Expand Down Expand Up @@ -33,6 +35,12 @@ public function structured(StructuredRequest $request): ProviderResponse
throw new \Exception(sprintf('%s does not support structured mode', class_basename($this)));
}

#[\Override]
public function embeddings(EmbeddingRequest $request): EmbeddingResponse
{
throw new \Exception(sprintf('%s does not support embeddings', class_basename($this)));
}

/**
* @param array<string, mixed> $options
* @param array<mixed> $retry
Expand Down
Loading

0 comments on commit d2304ea

Please sign in to comment.