Skip to content
This repository was archived by the owner on Jul 16, 2025. It is now read-only.

Commit 9182c34

Browse files
authored
feat: add audio & document input support for Gemini (#339)
1 parent 488d762 commit 9182c34

File tree

5 files changed

+88
-8
lines changed

5 files changed

+88
-8
lines changed

examples/google/audio-input.php

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?php
2+
3+
use PhpLlm\LlmChain\Chain\Chain;
4+
use PhpLlm\LlmChain\Platform\Bridge\Google\Gemini;
5+
use PhpLlm\LlmChain\Platform\Bridge\Google\PlatformFactory;
6+
use PhpLlm\LlmChain\Platform\Message\Content\Audio;
7+
use PhpLlm\LlmChain\Platform\Message\Message;
8+
use PhpLlm\LlmChain\Platform\Message\MessageBag;
9+
use Symfony\Component\Dotenv\Dotenv;
10+
11+
require_once dirname(__DIR__, 2).'/vendor/autoload.php';
12+
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');
13+
14+
if (empty($_ENV['GOOGLE_API_KEY'])) {
15+
echo 'Please set the GOOGLE_API_KEY environment variable.'.\PHP_EOL;
16+
exit(1);
17+
}
18+
19+
$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']);
20+
$model = new Gemini(Gemini::GEMINI_1_5_FLASH);
21+
22+
$chain = new Chain($platform, $model);
23+
$messages = new MessageBag(
24+
Message::ofUser(
25+
'What is this recording about?',
26+
Audio::fromFile(dirname(__DIR__, 2).'/tests/Fixture/audio.mp3'),
27+
),
28+
);
29+
$response = $chain->call($messages);
30+
31+
echo $response->getContent().\PHP_EOL;

examples/google/pdf-input-binary.php

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?php
2+
3+
use PhpLlm\LlmChain\Chain\Chain;
4+
use PhpLlm\LlmChain\Platform\Bridge\Google\Gemini;
5+
use PhpLlm\LlmChain\Platform\Bridge\Google\PlatformFactory;
6+
use PhpLlm\LlmChain\Platform\Message\Content\Document;
7+
use PhpLlm\LlmChain\Platform\Message\Message;
8+
use PhpLlm\LlmChain\Platform\Message\MessageBag;
9+
use Symfony\Component\Dotenv\Dotenv;
10+
11+
require_once dirname(__DIR__, 2).'/vendor/autoload.php';
12+
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');
13+
14+
if (empty($_ENV['GOOGLE_API_KEY'])) {
15+
echo 'Please set the GOOGLE_API_KEY environment variable.'.\PHP_EOL;
16+
exit(1);
17+
}
18+
19+
$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']);
20+
$model = new Gemini(Gemini::GEMINI_1_5_FLASH);
21+
22+
$chain = new Chain($platform, $model);
23+
$messages = new MessageBag(
24+
Message::ofUser(
25+
Document::fromFile(dirname(__DIR__, 2).'/tests/Fixture/document.pdf'),
26+
'What is this document about?',
27+
),
28+
);
29+
$response = $chain->call($messages);
30+
31+
echo $response->getContent().\PHP_EOL;

src/Platform/Bridge/Google/Contract/UserMessageNormalizer.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
use PhpLlm\LlmChain\Platform\Bridge\Google\Gemini;
88
use PhpLlm\LlmChain\Platform\Contract\Normalizer\ModelContractNormalizer;
9-
use PhpLlm\LlmChain\Platform\Message\Content\Image;
9+
use PhpLlm\LlmChain\Platform\Message\Content\File;
1010
use PhpLlm\LlmChain\Platform\Message\Content\Text;
1111
use PhpLlm\LlmChain\Platform\Message\UserMessage;
1212
use PhpLlm\LlmChain\Platform\Model;
@@ -38,7 +38,7 @@ public function normalize(mixed $data, ?string $format = null, array $context =
3838
if ($content instanceof Text) {
3939
$parts[] = ['text' => $content->text];
4040
}
41-
if ($content instanceof Image) {
41+
if ($content instanceof File) {
4242
$parts[] = ['inline_data' => [
4343
'mime_type' => $content->getFormat(),
4444
'data' => $content->asBase64(),

src/Platform/Bridge/Google/Gemini.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ public function __construct(string $name = self::GEMINI_2_PRO, array $options =
2626
$capabilities = [
2727
Capability::INPUT_MESSAGES,
2828
Capability::INPUT_IMAGE,
29+
Capability::INPUT_AUDIO,
30+
Capability::INPUT_PDF,
2931
Capability::OUTPUT_STREAMING,
3032
Capability::TOOL_CALLING,
3133
];

tests/Platform/Bridge/Google/Contract/UserMessageNormalizerTest.php

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77
use PhpLlm\LlmChain\Platform\Bridge\Google\Contract\UserMessageNormalizer;
88
use PhpLlm\LlmChain\Platform\Bridge\Google\Gemini;
99
use PhpLlm\LlmChain\Platform\Contract;
10+
use PhpLlm\LlmChain\Platform\Message\Content\Audio;
11+
use PhpLlm\LlmChain\Platform\Message\Content\Document;
1012
use PhpLlm\LlmChain\Platform\Message\Content\File;
1113
use PhpLlm\LlmChain\Platform\Message\Content\Image;
1214
use PhpLlm\LlmChain\Platform\Message\Content\Text;
1315
use PhpLlm\LlmChain\Platform\Message\UserMessage;
1416
use PHPUnit\Framework\Attributes\CoversClass;
17+
use PHPUnit\Framework\Attributes\DataProvider;
1518
use PHPUnit\Framework\Attributes\Small;
1619
use PHPUnit\Framework\Attributes\Test;
1720
use PHPUnit\Framework\Attributes\UsesClass;
@@ -23,6 +26,9 @@
2326
#[UsesClass(UserMessage::class)]
2427
#[UsesClass(Text::class)]
2528
#[UsesClass(File::class)]
29+
#[UsesClass(Image::class)]
30+
#[UsesClass(Document::class)]
31+
#[UsesClass(Audio::class)]
2632
final class UserMessageNormalizerTest extends TestCase
2733
{
2834
#[Test]
@@ -55,22 +61,32 @@ public function normalizeTextContent(): void
5561
self::assertSame([['text' => 'Write a story about a magic backpack.']], $normalized);
5662
}
5763

64+
#[DataProvider('binaryContentProvider')]
5865
#[Test]
59-
public function normalizeImageContent(): void
66+
public function normalizeBinaryContent(File $content, string $expectedMimeType, string $expectedPrefix): void
6067
{
6168
$normalizer = new UserMessageNormalizer();
62-
$imageContent = Image::fromFile(\dirname(__DIR__, 4).'/Fixture/image.jpg');
63-
$message = new UserMessage(new Text('Tell me about this instrument'), $imageContent);
69+
$message = new UserMessage(new Text('Tell me about this instrument'), $content);
6470

6571
$normalized = $normalizer->normalize($message);
6672

6773
self::assertCount(2, $normalized);
6874
self::assertSame(['text' => 'Tell me about this instrument'], $normalized[0]);
6975
self::assertArrayHasKey('inline_data', $normalized[1]);
70-
self::assertSame('image/jpeg', $normalized[1]['inline_data']['mime_type']);
76+
self::assertSame($expectedMimeType, $normalized[1]['inline_data']['mime_type']);
7177
self::assertNotEmpty($normalized[1]['inline_data']['data']);
7278

73-
// Verify that the base64 data string starts correctly for a JPEG
74-
self::assertStringStartsWith('/9j/', $normalized[1]['inline_data']['data']);
79+
// Verify that the base64 data string starts correctly
80+
self::assertStringStartsWith($expectedPrefix, $normalized[1]['inline_data']['data']);
81+
}
82+
83+
/**
84+
* @return iterable<string, array{0: File, 1: string, 2: string}>
85+
*/
86+
public static function binaryContentProvider(): iterable
87+
{
88+
yield 'image' => [Image::fromFile(\dirname(__DIR__, 4).'/Fixture/image.jpg'), 'image/jpeg', '/9j/'];
89+
yield 'document' => [Document::fromFile(\dirname(__DIR__, 4).'/Fixture/document.pdf'), 'application/pdf', 'JVBE'];
90+
yield 'audio' => [Audio::fromFile(\dirname(__DIR__, 4).'/Fixture/audio.mp3'), 'audio/mpeg', 'SUQz'];
7591
}
7692
}

0 commit comments

Comments
 (0)