Skip to content

Commit 321f20a

Browse files
committed
feat: implement document loader & transformer for store indexing (#343)
1 parent b1dc740 commit 321f20a

19 files changed

+780
-126
lines changed

examples/store/document-splitting.php

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Store\Document\Loader\TextFileLoader;
13+
use Symfony\AI\Store\Document\Transformer\TextSplitTransformer;
14+
15+
require_once dirname(__DIR__).'/vendor/autoload.php';
16+
17+
$loader = new TextFileLoader();
18+
$splitter = new TextSplitTransformer();
19+
$source = dirname(__DIR__, 2).'/fixtures/lorem.txt';
20+
21+
$documents = iterator_to_array($splitter($loader($source)));
22+
23+
dump($documents);
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\AI\Platform\Bridge\OpenAI\Embeddings;
13+
use Symfony\AI\Platform\Bridge\OpenAI\PlatformFactory;
14+
use Symfony\AI\Store\Document\TextDocument;
15+
use Symfony\AI\Store\Document\VectorDocument;
16+
use Symfony\AI\Store\Document\Vectorizer;
17+
use Symfony\Component\Dotenv\Dotenv;
18+
use Symfony\Component\Uid\Uuid;
19+
20+
require_once dirname(__DIR__).'/vendor/autoload.php';
21+
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');
22+
23+
if (empty($_ENV['OPENAI_API_KEY'])) {
24+
echo 'Please set the OPENAI_API_KEY environment variable.'.\PHP_EOL;
25+
exit(1);
26+
}
27+
28+
$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']);
29+
$embeddings = new Embeddings(Embeddings::TEXT_3_LARGE);
30+
31+
$textDocuments = [
32+
new TextDocument(Uuid::v4(), 'Hello World'),
33+
new TextDocument(Uuid::v4(), 'Lorem ipsum dolor sit amet'),
34+
new TextDocument(Uuid::v4(), 'PHP Hypertext Preprocessor'),
35+
];
36+
37+
$vectorizer = new Vectorizer($platform, $embeddings);
38+
$vectorDocuments = $vectorizer->vectorizeDocuments($textDocuments);
39+
40+
dump(array_map(fn (VectorDocument $document) => $document->vector->getDimensions(), $vectorDocuments));

examples/store/mariadb-similarity-search-gemini.php

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,21 @@
1111

1212
use Doctrine\DBAL\DriverManager;
1313
use Doctrine\DBAL\Tools\DsnParser;
14-
use PhpLlm\LlmChain\Chain\Chain;
15-
use PhpLlm\LlmChain\Chain\Toolbox\ChainProcessor;
16-
use PhpLlm\LlmChain\Chain\Toolbox\Tool\SimilaritySearch;
17-
use PhpLlm\LlmChain\Chain\Toolbox\Toolbox;
18-
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings;
19-
use PhpLlm\LlmChain\Platform\Bridge\Google\Embeddings\TaskType;
20-
use PhpLlm\LlmChain\Platform\Bridge\Google\Gemini;
21-
use PhpLlm\LlmChain\Platform\Bridge\Google\PlatformFactory;
22-
use PhpLlm\LlmChain\Platform\Message\Message;
23-
use PhpLlm\LlmChain\Platform\Message\MessageBag;
24-
use PhpLlm\LlmChain\Store\Bridge\MariaDB\Store;
25-
use PhpLlm\LlmChain\Store\Document\Metadata;
26-
use PhpLlm\LlmChain\Store\Document\TextDocument;
27-
use PhpLlm\LlmChain\Store\Indexer;
14+
use Symfony\AI\Agent\Agent;
15+
use Symfony\AI\Agent\Toolbox\AgentProcessor;
16+
use Symfony\AI\Agent\Toolbox\Tool\SimilaritySearch;
17+
use Symfony\AI\Agent\Toolbox\Toolbox;
18+
use Symfony\AI\Platform\Bridge\Google\Embeddings;
19+
use Symfony\AI\Platform\Bridge\Google\Embeddings\TaskType;
20+
use Symfony\AI\Platform\Bridge\Google\Gemini;
21+
use Symfony\AI\Platform\Bridge\Google\PlatformFactory;
22+
use Symfony\AI\Platform\Message\Message;
23+
use Symfony\AI\Platform\Message\MessageBag;
24+
use Symfony\AI\Store\Bridge\MariaDB\Store;
25+
use Symfony\AI\Store\Document\Metadata;
26+
use Symfony\AI\Store\Document\TextDocument;
27+
use Symfony\AI\Store\Document\Vectorizer;
28+
use Symfony\AI\Store\Indexer;
2829
use Symfony\Component\Dotenv\Dotenv;
2930
use Symfony\Component\Uid\Uuid;
3031

@@ -66,20 +67,21 @@
6667
// create embeddings for documents
6768
$platform = PlatformFactory::create($_ENV['GOOGLE_API_KEY']);
6869
$embeddings = new Embeddings(options: ['dimensions' => 768, 'task_type' => TaskType::SemanticSimilarity]);
69-
$indexer = new Indexer($platform, $embeddings, $store);
70+
$vectorizer = new Vectorizer($platform, $embeddings);
71+
$indexer = new Indexer($vectorizer, $store);
7072
$indexer->index($documents);
7173

7274
$model = new Gemini(Gemini::GEMINI_2_FLASH_LITE);
7375

7476
$similaritySearch = new SimilaritySearch($platform, $embeddings, $store);
7577
$toolbox = Toolbox::create($similaritySearch);
76-
$processor = new ChainProcessor($toolbox);
77-
$chain = new Chain($platform, $model, [$processor], [$processor]);
78+
$processor = new AgentProcessor($toolbox);
79+
$agent = new Agent($platform, $model, [$processor], [$processor]);
7880

7981
$messages = new MessageBag(
8082
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
8183
Message::ofUser('Which movie fits the theme of the mafia?')
8284
);
83-
$response = $chain->call($messages);
85+
$response = $agent->call($messages);
8486

8587
echo $response->getContent().\PHP_EOL;

examples/store/mariadb-similarity-search.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
use Symfony\AI\Store\Bridge\MariaDB\Store;
2222
use Symfony\AI\Store\Document\Metadata;
2323
use Symfony\AI\Store\Document\TextDocument;
24+
use Symfony\AI\Store\Document\Vectorizer;
2425
use Symfony\AI\Store\Indexer;
2526
use Symfony\Component\Dotenv\Dotenv;
2627
use Symfony\Component\Uid\Uuid;
@@ -62,7 +63,8 @@
6263

6364
// create embeddings for documents
6465
$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']);
65-
$indexer = new Indexer($platform, $embeddings = new Embeddings(), $store);
66+
$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings());
67+
$indexer = new Indexer($vectorizer, $store);
6668
$indexer->index($documents);
6769

6870
$model = new GPT(GPT::GPT_4O_MINI);

examples/store/mongodb-similarity-search.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
use Symfony\AI\Store\Bridge\MongoDB\Store;
2323
use Symfony\AI\Store\Document\Metadata;
2424
use Symfony\AI\Store\Document\TextDocument;
25+
use Symfony\AI\Store\Document\Vectorizer;
2526
use Symfony\AI\Store\Indexer;
2627
use Symfony\Component\Dotenv\Dotenv;
2728
use Symfony\Component\Uid\Uuid;
@@ -61,7 +62,8 @@
6162

6263
// create embeddings for documents
6364
$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']);
64-
$indexer = new Indexer($platform, $embeddings = new Embeddings(), $store);
65+
$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings());
66+
$indexer = new Indexer($vectorizer, $store);
6567
$indexer->index($documents);
6668

6769
// initialize the index

examples/store/pinecone-similarity-search.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
use Symfony\AI\Store\Bridge\Pinecone\Store;
2323
use Symfony\AI\Store\Document\Metadata;
2424
use Symfony\AI\Store\Document\TextDocument;
25+
use Symfony\AI\Store\Document\Vectorizer;
2526
use Symfony\AI\Store\Indexer;
2627
use Symfony\Component\Dotenv\Dotenv;
2728
use Symfony\Component\Uid\Uuid;
@@ -55,7 +56,8 @@
5556

5657
// create embeddings for documents
5758
$platform = PlatformFactory::create($_ENV['OPENAI_API_KEY']);
58-
$indexer = new Indexer($platform, $embeddings = new Embeddings(), $store);
59+
$vectorizer = new Vectorizer($platform, $embeddings = new Embeddings());
60+
$indexer = new Indexer($vectorizer, $store);
5961
$indexer->index($documents);
6062

6163
$model = new GPT(GPT::GPT_4O_MINI);

fixtures/lorem.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa.
2+
Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis,
3+
ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo,
4+
fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae,
5+
justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper
6+
nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim.
7+
Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius
8+
laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies
9+
nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero,
10+
sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem.
11+
Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis
12+
ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec
13+
sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc, quis gravida
14+
magna mi a libero. Fusce vulputate eleifend sapien. Vestibulum purus quam, scelerisque ut, mollis sed,
15+
nonummy id, met
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Document\Loader;
13+
14+
use Symfony\AI\Store\Document\LoaderInterface;
15+
use Symfony\AI\Store\Document\Metadata;
16+
use Symfony\AI\Store\Document\TextDocument;
17+
use Symfony\AI\Store\Exception\RuntimeException;
18+
use Symfony\Component\Uid\Uuid;
19+
20+
/**
21+
* @author Christopher Hertel <[email protected]>
22+
*/
23+
final readonly class TextFileLoader implements LoaderInterface
24+
{
25+
public function __invoke(string $source, array $options = []): iterable
26+
{
27+
if (!is_file($source)) {
28+
throw new RuntimeException(\sprintf('File "%s" does not exist.', $source));
29+
}
30+
31+
$content = file_get_contents($source);
32+
33+
if (false === $content) {
34+
throw new RuntimeException(\sprintf('Unable to read file "%s"', $source));
35+
}
36+
37+
yield new TextDocument(Uuid::v4(), trim($content), new Metadata([
38+
'source' => $source,
39+
]));
40+
}
41+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Document;
13+
14+
/**
15+
* @author Christopher Hertel <[email protected]>
16+
*/
17+
interface LoaderInterface
18+
{
19+
/**
20+
* @param string $source Identifier for the loader to load the documents from, e.g. file path, folder, or URL.
21+
* @param array<string, mixed> $options loader specific set of options to control the loading process
22+
*
23+
* @return iterable<TextDocument> iterable of TextDocuments loaded from the source
24+
*/
25+
public function __invoke(string $source, array $options = []): iterable;
26+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\AI\Store\Document\Transformer;
13+
14+
use Symfony\AI\Store\Document\TransformerInterface;
15+
16+
final readonly class ChainTransformer implements TransformerInterface
17+
{
18+
/**
19+
* @var TransformerInterface[]
20+
*/
21+
private array $transformers;
22+
23+
/**
24+
* @param iterable<TransformerInterface> $transformers
25+
*/
26+
public function __construct(iterable $transformers)
27+
{
28+
$this->transformers = $transformers instanceof \Traversable ? iterator_to_array($transformers) : $transformers;
29+
}
30+
31+
public function __invoke(iterable $documents, array $options = []): iterable
32+
{
33+
foreach ($this->transformers as $transformer) {
34+
$documents = $transformer($documents, $options);
35+
}
36+
37+
return $documents;
38+
}
39+
}

0 commit comments

Comments
 (0)