Skip to content

Commit

Permalink
Use an entire data directory now to also persist the stateset for opc…
Browse files Browse the repository at this point in the history
…ode cache loading
  • Loading branch information
Toflar committed Oct 5, 2023
1 parent 98df8ff commit a70d596
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 65 deletions.
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"doctrine/lexer": "^2.0 || ^3.0",
"voku/portable-utf8": "^6.0",
"mjaschen/phpgeo": "^4.2",
"toflar/state-set-index": "^2.0",
"toflar/state-set-index": "dev-main",
"psr/log": "^3.0",
"nitotm/efficient-language-detector": "^2.0"
},
Expand Down
14 changes: 7 additions & 7 deletions src/Exception/InvalidConfigurationException.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,23 @@ public static function becauseAttributeNotSortable(string $attributeName): self
return new self(sprintf('Cannot sort on this type of attribute value for attribute "%s".', $attributeName));
}

public static function becauseInvalidAttributeName(string $attributeName): self
public static function becauseCouldNotCreateDataDir(string $folder): self
{
return new self(
sprintf(
'A valid attribute name starts with a letter, followed by any number of letters, numbers, or underscores. It must not exceed %d characters. "%s" given.',
Configuration::MAX_ATTRIBUTE_NAME_LENGTH,
$attributeName
'Could not create data directory at "%s".',
$folder
)
);
}

public static function becauseInvalidDbPath(string $dbPath): self
public static function becauseInvalidAttributeName(string $attributeName): self
{
return new self(
sprintf(
'"%s" does not exist, create an empty database file first.',
$dbPath
'A valid attribute name starts with a letter, followed by any number of letters, numbers, or underscores. It must not exceed %d characters. "%s" given.',
Configuration::MAX_ATTRIBUTE_NAME_LENGTH,
$attributeName
)
);
}
Expand Down
12 changes: 11 additions & 1 deletion src/Internal/Engine.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ public function __construct(
private Configuration $configuration,
private Tokenizer $tokenizer,
private Highlighter $highlighter,
private Parser $filterParser
private Parser $filterParser,
private ?string $dataDir = null
) {
$this->indexInfo = new IndexInfo($this);
$this->stateSetIndex = new StateSetIndex(
Expand Down Expand Up @@ -93,6 +94,11 @@ public function getConnection(): Connection
return $this->connection;
}

public function getDataDir(): ?string
{
return $this->dataDir;
}

/**
* @return array<string, mixed>|null
*/
Expand Down Expand Up @@ -232,6 +238,10 @@ public function upsert(
return $insertIdColumn !== '' ? (int) $existing[$insertIdColumn] : null;
}

/**
* @param array<string|int, mixed> $data
* @return array<string|int, int>
*/
private function extractDbalTypes(array $data): array
{
$types = [];
Expand Down
4 changes: 2 additions & 2 deletions src/Internal/Index/Indexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ private function indexPrefix(string $prefix, int $termId): void
$state = $this->engine->getStateSetIndex()->index([$prefix])[$prefix];
}

$this->prefixCache[$prefix] = $this->engine->upsert(
$this->prefixCache[$prefix] = (int) $this->engine->upsert(
IndexInfo::TABLE_NAME_PREFIXES,
[
'prefix' => $prefix,
Expand Down Expand Up @@ -302,7 +302,7 @@ private function indexTerm(string $term, int $documentId, string $attributeName,
$state = $this->engine->getStateSetIndex()->index([$term])[$term];
}

$termId = $this->engine->upsert(
$termId = (int) $this->engine->upsert(
IndexInfo::TABLE_NAME_TERMS,
[
'term' => $term,
Expand Down
3 changes: 3 additions & 0 deletions src/Internal/Search/Searcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ private function createAnalyzedQuery(TokenCollection $tokens): string
return $query;
}

/**
* @param array<int> $states
*/
private function createStatesMatchWhere(array $states, string $table, string $term, int $levenshteinDistance, string $termColumnName): string
{
$where = [];
Expand Down
63 changes: 59 additions & 4 deletions src/Internal/StateSetIndex/StateSet.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,37 @@ public function persist(): void
'state' => $state,
], ['state']);
}

$all = $this->inMemoryStateSet->all();
$all = array_combine($this->inMemoryStateSet->all(), array_fill(0, \count($all), true));
$this->dumpStateSetCache($all);
}

/**
* @param array<int, bool> $stateSet
^ */
private function dumpStateSetCache(array $stateSet): void
{
$cacheFile = $this->getStateSetCacheFile();

if ($cacheFile === null) {
return;
}

$cache = '<?php return ';
$cache .= var_export($stateSet, true);
$cache .= ';';

file_put_contents($cacheFile, $cache);
}

private function getStateSetCacheFile(): ?string
{
if ($this->engine->getDataDir() === null) {
return null;
}

return $this->engine->getDataDir() . '/state_set.php';
}

private function initialize(): void
Expand All @@ -55,17 +86,41 @@ private function initialize(): void
return;
}

$this->inMemoryStateSet = new InMemoryStateSet();
$cacheFile = $this->getStateSetCacheFile();

if ($cacheFile === null) {
$data = $this->loadFromStorage();
} else {
if (!file_exists($cacheFile)) {
$data = $this->loadFromStorage();
$this->dumpStateSetCache($data);
} else {
$data = require_once $cacheFile;
}
}

$this->inMemoryStateSet = new InMemoryStateSet($data);

foreach ($this->engine->getConnection()->createQueryBuilder()
$this->initialized = true;
}

/**
* @return array<int, bool>
*/
private function loadFromStorage(): array
{
$storage = [];

foreach ($this->engine->getConnection()
->createQueryBuilder()
->select('state')
->from(IndexInfo::TABLE_NAME_STATE_SET)
->executeQuery()
->iterateAssociative() as $row
) {
$this->inMemoryStateSet->add((int) $row['state']);
$storage[(int) $row['state']] = true;
}

$this->initialized = true;
return $storage;
}
}
19 changes: 11 additions & 8 deletions src/LoupeFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@ final class LoupeFactory
{
private const MIN_SQLITE_VERSION = '3.16.0'; // Introduction of Pragma functions

public function create(string $dbPath, Configuration $configuration): Loupe
public function create(string $dataDir, Configuration $configuration): Loupe
{
if (!file_exists($dbPath)) {
throw InvalidConfigurationException::becauseInvalidDbPath($dbPath);
if (!is_dir($dataDir)) {
if (!mkdir($dataDir, 0777, true)) {
throw InvalidConfigurationException::becauseCouldNotCreateDataDir($dataDir);
}
}

return $this->createFromConnection($this->createConnection($configuration, $dbPath), $configuration);
return $this->createFromConnection($this->createConnection($configuration, $dataDir), $configuration, $dataDir);
}

public function createInMemory(Configuration $configuration): Loupe
Expand All @@ -50,10 +52,10 @@ public function isSupported(): bool
return true;
}

private function createConnection(Configuration $configuration, ?string $dbPath = null): Connection
private function createConnection(Configuration $configuration, ?string $folder = null): Connection
{
$connection = null;
$dsnPart = $dbPath === null ? ':memory:' : ('notused:inthis@case/' . realpath($dbPath));
$dsnPart = $folder === null ? ':memory:' : ('notused:inthis@case/' . realpath($folder) . '/loupe.db');

// Try sqlite3 first, it seems way faster than the pdo-sqlite driver
try {
Expand Down Expand Up @@ -94,7 +96,7 @@ private function createConnection(Configuration $configuration, ?string $dbPath
return $connection;
}

private function createFromConnection(Connection $connection, Configuration $configuration): Loupe
private function createFromConnection(Connection $connection, Configuration $configuration, ?string $dataDir = null): Loupe
{
$tokenizer = $this->createTokenizer($configuration);

Expand All @@ -104,7 +106,8 @@ private function createFromConnection(Connection $connection, Configuration $con
$configuration,
$tokenizer,
new Highlighter($configuration, $tokenizer),
new Parser()
new Parser(),
$dataDir
)
);
}
Expand Down
6 changes: 3 additions & 3 deletions tests/Functional/FunctionalTestTrait.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@

trait FunctionalTestTrait
{
protected function createLoupe(Configuration $configuration, string $dbPath = ''): Loupe
protected function createLoupe(Configuration $configuration, string $dataDir = ''): Loupe
{
$factory = new LoupeFactory();

if ($dbPath === '') {
if ($dataDir === '') {
$loupe = $factory->createInMemory($configuration);
} else {
$loupe = $factory->create($dbPath, $configuration);
$loupe = $factory->create($dataDir, $configuration);
}

return $loupe;
Expand Down
9 changes: 5 additions & 4 deletions tests/Functional/IndexTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -345,14 +345,15 @@ public function testNullValueIsIrrelevantForDocumentSchema(): void
public function testReindex(): void
{
$fs = new Filesystem();
$tmpDb = $fs->tempnam(sys_get_temp_dir(), 'lt');
$tmpDataDir = sys_get_temp_dir() . '/' . uniqid('lt');
$fs->mkdir($tmpDataDir);

$configuration = Configuration::create()
->withFilterableAttributes(['departments', 'gender'])
->withSortableAttributes(['firstname'])
;

$loupe = $this->createLoupe($configuration, $tmpDb);
$loupe = $this->createLoupe($configuration, $tmpDataDir);
$loupe->addDocument(self::getSandraDocument());

$this->assertFalse($loupe->needsReindex());
Expand All @@ -361,14 +362,14 @@ public function testReindex(): void
->withSearchableAttributes(['firstname'])
;

$loupe = $this->createLoupe($configuration, $tmpDb);
$loupe = $this->createLoupe($configuration, $tmpDataDir);

// Just making sure that it was actually persistent
$this->assertSame(1, $loupe->countDocuments());

$this->assertTrue($loupe->needsReindex());

$fs->remove($tmpDb);
$fs->remove($tmpDataDir);
}

public function testReplacingTheSameDocumentWorks(): void
Expand Down
67 changes: 34 additions & 33 deletions tests/Functional/SearchTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -224,31 +224,6 @@ public static function highlightingProvider(): \Generator
],
];

yield 'Highlight with prefix typo' => [
'assat',
['title', 'overview'],
false,
[
'hits' => [
[
'id' => 24,
'title' => 'Kill Bill: Vol. 1',
'overview' => 'An assassin is shot by her ruthless employer, Bill, and other members of their assassination circle – but she lives to plot her vengeance.',
'_formatted' => [
'id' => 24,
'title' => 'Kill Bill: Vol. 1',
'overview' => 'An <em>assassin</em> is shot by her ruthless employer, Bill, and other members of their <em>assassination</em> circle – but she lives to plot her vengeance.',
],
],
],
'query' => 'assat',
'hitsPerPage' => 20,
'page' => 1,
'totalPages' => 1,
'totalHits' => 1,
],
];

yield 'Highlight without typo' => [
'assassin',
['title', 'overview'],
Expand Down Expand Up @@ -547,15 +522,9 @@ public static function prefixSearchProvider(): \Generator
],
];

yield 'Searching for "hucka" should return Huckleberry (with typo)' => [
yield 'Searching for "hucka" should not return Huckleberry (with typo) because prefix typo search is disabled' => [
'hucka',
[
[
'id' => 6,
'firstname' => 'Huckleberry',
'lastname' => 'Finn',
],
],
[],
];

yield 'Searching for "my friend huckl" should return Huckleberry because "huckl" is the last token' => [
Expand Down Expand Up @@ -1128,6 +1097,38 @@ public function testPrefixSearch(string $query, array $expectedResults, int $min
]);
}

public function testPrefixSearchAndHighlightingWithTypoSearchEnabled(): void
{
$typoTolerance = TypoTolerance::create()->withEnabledForPrefixSearch(true);
$configuration = Configuration::create()->withTypoTolerance($typoTolerance);
$loupe = $this->setupLoupeWithMoviesFixture($configuration);

$searchParameters = SearchParameters::create()
->withQuery('assat')
->withAttributesToRetrieve(['id', 'title', 'overview'])
->withSort(['title:asc'])
->withAttributesToHighlight(['overview'])
;

$this->searchAndAssertResults($loupe, $searchParameters, [
'hits' => [[
'id' => 24,
'title' => 'Kill Bill: Vol. 1',
'overview' => 'An assassin is shot by her ruthless employer, Bill, and other members of their assassination circle – but she lives to plot her vengeance.',
'_formatted' => [
'id' => 24,
'title' => 'Kill Bill: Vol. 1',
'overview' => 'An <em>assassin</em> is shot by her ruthless employer, Bill, and other members of their <em>assassination</em> circle – but she lives to plot her vengeance.',
],
]],
'query' => 'assat',
'hitsPerPage' => 20,
'page' => 1,
'totalPages' => 1,
'totalHits' => 1,
]);
}

public function testPrefixSearchIsNotAppliedToPhraseSearch(): void
{
$loupe = $this->setupLoupeWithMoviesFixture();
Expand Down
Loading

0 comments on commit a70d596

Please sign in to comment.