forked from echolabsdev/prism
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Embedding Foundations & OpenAI Embeddings (echolabsdev#70)
- Loading branch information
Showing
22 changed files
with
3,789 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
# Embeddings | ||
|
||
Transform your text into powerful vector representations! Embeddings let you add semantic search, recommendation systems, and other advanced natural language features to your applications. | ||
|
||
## Quick Start | ||
|
||
Here's how to generate embeddings with just a few lines of code: | ||
|
||
```php | ||
use EchoLabs\Prism\Facades\Prism; | ||
use EchoLabs\Prism\Enums\Provider; | ||
|
||
$response = Prism::embeddings() | ||
->using(Provider::OpenAI, 'text-embedding-3-large') | ||
->fromInput('Your text goes here') | ||
->generate(); | ||
|
||
// Get your embeddings vector | ||
$embeddings = $response->embeddings; | ||
|
||
// Check token usage | ||
echo $response->usage->tokens; | ||
``` | ||
|
||
## Input Methods | ||
|
||
You've got two convenient ways to feed text into the embeddings generator: | ||
|
||
### Direct Text Input | ||
|
||
```php | ||
$response = Prism::embeddings() | ||
->using(Provider::OpenAI, 'text-embedding-3-large') | ||
->fromInput('Analyze this text') | ||
->generate(); | ||
``` | ||
|
||
### From File | ||
|
||
Need to analyze a larger document? No problem: | ||
|
||
```php | ||
$response = Prism::embeddings() | ||
->using(Provider::OpenAI, 'text-embedding-3-large') | ||
->fromFile('/path/to/your/document.txt') | ||
->generate(); | ||
``` | ||
|
||
> [!NOTE] | ||
> Make sure your file exists and is readable. The generator will throw a helpful `PrismException` if there's any issue accessing the file. | ||
## Common Settings | ||
|
||
Just like with text generation, you can fine-tune your embeddings requests: | ||
|
||
```php | ||
$response = Prism::embeddings() | ||
->using(Provider::OpenAI, 'text-embedding-3-large') | ||
->fromInput('Your text here') | ||
->withClientOptions(['timeout' => 30]) // Adjust request timeout | ||
->withClientRetry(3, 100) // Add automatic retries | ||
->generate(); | ||
``` | ||
|
||
## Response Handling | ||
|
||
The embeddings response gives you everything you need: | ||
|
||
```php | ||
// Get the embeddings vector | ||
$vector = $response->embeddings; | ||
|
||
// Check token usage | ||
$tokenCount = $response->usage->tokens; | ||
``` | ||
|
||
## Error Handling | ||
|
||
Always handle potential errors gracefully: | ||
|
||
```php | ||
use EchoLabs\Prism\Exceptions\PrismException; | ||
|
||
try { | ||
$response = Prism::embeddings() | ||
->using(Provider::OpenAI, 'text-embedding-3-large') | ||
->fromInput('Your text here') | ||
->generate(); | ||
} catch (PrismException $e) { | ||
Log::error('Embeddings generation failed:', [ | ||
'error' => $e->getMessage() | ||
]); | ||
} | ||
``` | ||
|
||
## Pro Tips 🌟 | ||
|
||
**Vector Storage**: Consider using a vector database like Milvus, Qdrant, or pgvector to store and query your embeddings efficiently. | ||
|
||
**Text Preprocessing**: For best results, clean and normalize your text before generating embeddings. This might include: | ||
- Removing unnecessary whitespace | ||
- Converting to lowercase | ||
- Removing special characters | ||
- Handling Unicode normalization | ||
|
||
> [!IMPORTANT] | ||
> Different providers and models produce vectors of different dimensions. Always check your provider's documentation for specific details about the embedding model you're using. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace EchoLabs\Prism\Embeddings; | ||
|
||
use Closure; | ||
use EchoLabs\Prism\Contracts\Provider; | ||
use EchoLabs\Prism\Enums\Provider as ProviderEnum; | ||
use EchoLabs\Prism\Exceptions\PrismException; | ||
use EchoLabs\Prism\PrismManager; | ||
|
||
class Generator | ||
{ | ||
protected string $input = ''; | ||
|
||
/** @var array<string, mixed> */ | ||
protected array $clientOptions = []; | ||
|
||
/** @var array{0: array<int, int>|int, 1?: Closure|int, 2?: ?callable, 3?: bool} */ | ||
protected array $clientRetry = [0]; | ||
|
||
protected Provider $provider; | ||
|
||
protected string $model; | ||
|
||
public function using(string|ProviderEnum $provider, string $model): self | ||
{ | ||
$this->provider = resolve(PrismManager::class) | ||
->resolve($provider); | ||
|
||
$this->model = $model; | ||
|
||
return $this; | ||
} | ||
|
||
public function fromInput(string $input): self | ||
{ | ||
$this->input = $input; | ||
|
||
return $this; | ||
} | ||
|
||
public function fromFile(string $path): self | ||
{ | ||
if (! is_file($path)) { | ||
throw new PrismException(sprintf('%s is not a valid file', $path)); | ||
} | ||
|
||
$contents = file_get_contents($path); | ||
|
||
if ($contents === false) { | ||
throw new PrismException(sprintf('%s contents could not be read', $path)); | ||
} | ||
|
||
$this->input = $contents; | ||
|
||
return $this; | ||
} | ||
|
||
public function generate(): Response | ||
{ | ||
if ($this->input === '' || $this->input === '0') { | ||
throw new PrismException('Embeddings input is required'); | ||
} | ||
|
||
return $this->provider->embeddings(new Request( | ||
model: $this->model, | ||
input: $this->input, | ||
clientOptions: $this->clientOptions, | ||
clientRetry: $this->clientRetry, | ||
)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace EchoLabs\Prism\Embeddings; | ||
|
||
use Closure; | ||
|
||
class Request | ||
{ | ||
/** | ||
* @param array<string, mixed> $clientOptions | ||
* @param array{0: array<int, int>|int, 1?: Closure|int, 2?: ?callable, 3?: bool} $clientRetry | ||
*/ | ||
public function __construct( | ||
public readonly string $model, | ||
public readonly string $input, | ||
public readonly array $clientOptions, | ||
public readonly array $clientRetry, | ||
) {} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace EchoLabs\Prism\Embeddings; | ||
|
||
use EchoLabs\Prism\ValueObjects\EmbeddingsUsage; | ||
|
||
class Response | ||
{ | ||
/** | ||
* @param array<int, int|string> $embeddings | ||
*/ | ||
public function __construct( | ||
public readonly array $embeddings, | ||
public readonly EmbeddingsUsage $usage, | ||
) {} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.