Skip to content

feat: add audio & document input support for Gemini #33

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions examples/google/audio-input.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\AI\Agent\Agent;
use Symfony\AI\Platform\Bridge\Google\Gemini;
use Symfony\AI\Platform\Bridge\Google\PlatformFactory;
use Symfony\AI\Platform\Message\Content\Audio;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\Component\Dotenv\Dotenv;

require_once dirname(__DIR__).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');

if (empty($_ENV['GOOGLE_API_KEY'])) {
echo 'Please set the GOOGLE_API_KEY environment variable.'.\PHP_EOL;
exit(1);
}

$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']);
$model = new Gemini(Gemini::GEMINI_1_5_FLASH);

$agent = new Agent($platform, $model);
$messages = new MessageBag(
Message::ofUser(
'What is this recording about?',
Audio::fromFile(dirname(__DIR__, 2).'/fixtures/audio.mp3'),
),
);
$response = $agent->call($messages);

echo $response->getContent().\PHP_EOL;
40 changes: 40 additions & 0 deletions examples/google/pdf-input-binary.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\AI\Agent\Agent;
use Symfony\AI\Platform\Bridge\Google\Gemini;
use Symfony\AI\Platform\Bridge\Google\PlatformFactory;
use Symfony\AI\Platform\Message\Content\Document;
use Symfony\AI\Platform\Message\Message;
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\Component\Dotenv\Dotenv;

require_once dirname(__DIR__).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');

if (empty($_ENV['GOOGLE_API_KEY'])) {
echo 'Please set the GOOGLE_API_KEY environment variable.'.\PHP_EOL;
exit(1);
}

$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']);
$model = new Gemini(Gemini::GEMINI_1_5_FLASH);

$agent = new Agent($platform, $model);
$messages = new MessageBag(
Message::ofUser(
Document::fromFile(dirname(__DIR__, 2).'/fixtures/document.pdf'),
'What is this document about?',
),
);
$response = $agent->call($messages);

echo $response->getContent().\PHP_EOL;
4 changes: 2 additions & 2 deletions examples/google/toolcall.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
use Symfony\AI\Platform\Message\MessageBag;
use Symfony\Component\Dotenv\Dotenv;

require_once dirname(__DIR__, 2).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');
require_once dirname(__DIR__).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');

if (empty($_ENV['GOOGLE_API_KEY'])) {
echo 'Please set the GOOGLE_API_KEY environment variable.'.\PHP_EOL;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

use Symfony\AI\Platform\Bridge\Google\Gemini;
use Symfony\AI\Platform\Contract\Normalizer\ModelContractNormalizer;
use Symfony\AI\Platform\Message\Content\Image;
use Symfony\AI\Platform\Message\Content\File;
use Symfony\AI\Platform\Message\Content\Text;
use Symfony\AI\Platform\Message\UserMessage;
use Symfony\AI\Platform\Model;
Expand Down Expand Up @@ -45,7 +45,7 @@ public function normalize(mixed $data, ?string $format = null, array $context =
if ($content instanceof Text) {
$parts[] = ['text' => $content->text];
}
if ($content instanceof Image) {
if ($content instanceof File) {
$parts[] = ['inline_data' => [
'mime_type' => $content->getFormat(),
'data' => $content->asBase64(),
Expand Down
2 changes: 2 additions & 0 deletions src/platform/src/Bridge/Google/Gemini.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public function __construct(string $name = self::GEMINI_2_PRO, array $options =
$capabilities = [
Capability::INPUT_MESSAGES,
Capability::INPUT_IMAGE,
Capability::INPUT_AUDIO,
Capability::INPUT_PDF,
Capability::OUTPUT_STREAMING,
Capability::TOOL_CALLING,
];
Expand Down
4 changes: 2 additions & 2 deletions src/platform/src/Bridge/Google/ModelHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -197,13 +197,13 @@ private function convertChoice(array $choice): Choice

/**
* @param array{
* id: string,
* id?: string,
* name: string,
* args: mixed[]
* } $toolCall
*/
private function convertToolCall(array $toolCall): ToolCall
{
return new ToolCall($toolCall['id'], $toolCall['name'], $toolCall['args']);
return new ToolCall($toolCall['id'] ?? '', $toolCall['name'], $toolCall['args']);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,16 @@
namespace Symfony\AI\Platform\Tests\Bridge\Google\Contract;

use PHPUnit\Framework\Attributes\CoversClass;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\Attributes\Small;
use PHPUnit\Framework\Attributes\Test;
use PHPUnit\Framework\Attributes\UsesClass;
use PHPUnit\Framework\TestCase;
use Symfony\AI\Platform\Bridge\Google\Contract\UserMessageNormalizer;
use Symfony\AI\Platform\Bridge\Google\Gemini;
use Symfony\AI\Platform\Contract;
use Symfony\AI\Platform\Message\Content\Audio;
use Symfony\AI\Platform\Message\Content\Document;
use Symfony\AI\Platform\Message\Content\File;
use Symfony\AI\Platform\Message\Content\Image;
use Symfony\AI\Platform\Message\Content\Text;
Expand All @@ -30,6 +33,9 @@
#[UsesClass(UserMessage::class)]
#[UsesClass(Text::class)]
#[UsesClass(File::class)]
#[UsesClass(Image::class)]
#[UsesClass(Document::class)]
#[UsesClass(Audio::class)]
final class UserMessageNormalizerTest extends TestCase
{
#[Test]
Expand Down Expand Up @@ -62,22 +68,32 @@ public function normalizeTextContent(): void
self::assertSame([['text' => 'Write a story about a magic backpack.']], $normalized);
}

#[DataProvider('binaryContentProvider')]
#[Test]
public function normalizeImageContent(): void
public function normalizeBinaryContent(File $content, string $expectedMimeType, string $expectedPrefix): void
{
$normalizer = new UserMessageNormalizer();
$imageContent = Image::fromFile(\dirname(__DIR__, 6).'/fixtures/image.jpg');
$message = new UserMessage(new Text('Tell me about this instrument'), $imageContent);
$message = new UserMessage(new Text('Tell me about this instrument'), $content);

$normalized = $normalizer->normalize($message);

self::assertCount(2, $normalized);
self::assertSame(['text' => 'Tell me about this instrument'], $normalized[0]);
self::assertArrayHasKey('inline_data', $normalized[1]);
self::assertSame('image/jpeg', $normalized[1]['inline_data']['mime_type']);
self::assertSame($expectedMimeType, $normalized[1]['inline_data']['mime_type']);
self::assertNotEmpty($normalized[1]['inline_data']['data']);

// Verify that the base64 data string starts correctly for a JPEG
self::assertStringStartsWith('/9j/', $normalized[1]['inline_data']['data']);
// Verify that the base64 data string starts correctly
self::assertStringStartsWith($expectedPrefix, $normalized[1]['inline_data']['data']);
}

/**
* @return iterable<string, array{0: File, 1: string, 2: string}>
*/
public static function binaryContentProvider(): iterable
{
yield 'image' => [Image::fromFile(\dirname(__DIR__, 6).'/fixtures/image.jpg'), 'image/jpeg', '/9j/'];
yield 'document' => [Document::fromFile(\dirname(__DIR__, 6).'/fixtures/document.pdf'), 'application/pdf', 'JVBE'];
yield 'audio' => [Audio::fromFile(\dirname(__DIR__, 6).'/fixtures/audio.mp3'), 'audio/mpeg', 'SUQz'];
}
}