Skip to content

Commit 7571034

Browse files
authored
Initialize the unified index updater for HASH and JSON data type (apache#2111)
1 parent 8a19bd5 commit 7571034

10 files changed

+272
-4
lines changed

CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ include(cmake/fmt.cmake)
138138
include(cmake/jsoncons.cmake)
139139
include(cmake/xxhash.cmake)
140140
include(cmake/span.cmake)
141+
include(cmake/trie.cmake)
141142

142143
if (ENABLE_LUAJIT)
143144
include(cmake/luajit.cmake)
@@ -169,6 +170,7 @@ list(APPEND EXTERNAL_LIBS Threads::Threads)
169170
list(APPEND EXTERNAL_LIBS ${Backtrace_LIBRARY})
170171
list(APPEND EXTERNAL_LIBS xxhash)
171172
list(APPEND EXTERNAL_LIBS span-lite)
173+
list(APPEND EXTERNAL_LIBS tsl_hat_trie)
172174

173175
# Add git sha to version.h
174176
find_package(Git REQUIRED)

NOTICE

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ The text of each license is also included in licenses/LICENSE-[project].txt
6565
* fmt(https://github.com/fmtlib/fmt)
6666
* LuaJIT(https://github.com/KvrocksLabs/LuaJIT)
6767
* lua(https://github.com/KvrocksLabs/lua, alternative to LuaJIT)
68+
* hat-trie(https://github.com/Tessil/hat-trie)
6869

6970
================================================================
7071
Boost Software License Version 1.0

cmake/trie.cmake

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
include_guard()
19+
20+
include(cmake/utils.cmake)
21+
22+
FetchContent_DeclareGitHubWithMirror(trie
23+
Tessil/hat-trie 906e6abd1e7063f1dacd3a6b270aa654b525eb0a
24+
MD5=a930364e9f6b60371319664bddf78000
25+
)
26+
27+
FetchContent_MakeAvailableWithArgs(trie)

licenses/LICENSE-hat-trie.txt

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2017 Thibaut Goetghebuer-Planchon <[email protected]>
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

src/common/status.h

+3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ class [[nodiscard]] Status {
6666

6767
// Blocking
6868
BlockingCmd,
69+
70+
// Search
71+
NoPrefixMatched,
6972
};
7073

7174
Status() : impl_{nullptr} {}

src/search/indexer.cc

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*
19+
*/
20+
21+
#include "indexer.h"
22+
23+
#include <variant>
24+
25+
#include "storage/redis_metadata.h"
26+
#include "types/redis_hash.h"
27+
28+
namespace redis {
29+
30+
StatusOr<FieldValueRetriever> FieldValueRetriever::Create(SearchOnDataType type, std::string_view key,
31+
engine::Storage *storage, const std::string &ns) {
32+
if (type == SearchOnDataType::HASH) {
33+
Hash db(storage, ns);
34+
std::string ns_key = db.AppendNamespacePrefix(key);
35+
HashMetadata metadata(false);
36+
auto s = db.GetMetadata(ns_key, &metadata);
37+
if (!s.ok()) return {Status::NotOK, s.ToString()};
38+
return FieldValueRetriever(db, metadata, key);
39+
} else if (type == SearchOnDataType::JSON) {
40+
Json db(storage, ns);
41+
std::string ns_key = db.AppendNamespacePrefix(key);
42+
JsonMetadata metadata(false);
43+
JsonValue value;
44+
auto s = db.read(ns_key, &metadata, &value);
45+
if (!s.ok()) return {Status::NotOK, s.ToString()};
46+
return FieldValueRetriever(value);
47+
} else {
48+
assert(false && "unreachable code: unexpected SearchOnDataType");
49+
__builtin_unreachable();
50+
}
51+
}
52+
53+
rocksdb::Status FieldValueRetriever::Retrieve(std::string_view field, std::string *output) {
54+
if (std::holds_alternative<HashData>(db)) {
55+
auto &[hash, metadata, key] = std::get<HashData>(db);
56+
std::string ns_key = hash.AppendNamespacePrefix(key);
57+
LatestSnapShot ss(hash.storage_);
58+
rocksdb::ReadOptions read_options;
59+
read_options.snapshot = ss.GetSnapShot();
60+
std::string sub_key = InternalKey(ns_key, field, metadata.version, hash.storage_->IsSlotIdEncoded()).Encode();
61+
return hash.storage_->Get(read_options, sub_key, output);
62+
} else if (std::holds_alternative<JsonData>(db)) {
63+
auto &value = std::get<JsonData>(db);
64+
auto s = value.Get(field);
65+
if (!s.IsOK()) return rocksdb::Status::Corruption(s.Msg());
66+
if (s->value.size() != 1)
67+
return rocksdb::Status::NotFound("json value specified by the field (json path) should exist and be unique");
68+
*output = s->value[0].as_string();
69+
return rocksdb::Status::OK();
70+
} else {
71+
__builtin_unreachable();
72+
}
73+
}
74+
75+
StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, const std::string &ns) {
76+
Database db(indexer->storage, ns);
77+
78+
RedisType type = kRedisNone;
79+
auto s = db.Type(key, &type);
80+
if (!s.ok()) return {Status::NotOK, s.ToString()};
81+
82+
if (type != static_cast<RedisType>(on_data_type)) {
83+
// not the expected type, stop record
84+
return {Status::NotOK, "this data type cannot be indexed"};
85+
}
86+
87+
auto retriever = GET_OR_RET(FieldValueRetriever::Create(on_data_type, key, indexer->storage, ns));
88+
89+
FieldValues values;
90+
for (const auto &[field, info] : fields) {
91+
std::string value;
92+
auto s = retriever.Retrieve(field, &value);
93+
if (s.IsNotFound()) continue;
94+
if (!s.ok()) return {Status::NotOK, s.ToString()};
95+
96+
values.emplace(field, value);
97+
}
98+
99+
return values;
100+
}
101+
102+
void GlobalIndexer::Add(IndexUpdater updater) {
103+
auto &up = updaters.emplace_back(std::move(updater));
104+
for (const auto &prefix : up.prefixes) {
105+
prefix_map.emplace(prefix, &up);
106+
}
107+
}
108+
109+
StatusOr<IndexUpdater::FieldValues> GlobalIndexer::Record(std::string_view key, const std::string &ns) {
110+
auto iter = prefix_map.longest_prefix(key);
111+
if (iter != prefix_map.end()) {
112+
return iter.value()->Record(key, ns);
113+
}
114+
115+
return {Status::NoPrefixMatched};
116+
}
117+
118+
} // namespace redis

src/search/indexer.h

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*
19+
*/
20+
21+
#pragma once
22+
23+
#include <tsl/htrie_map.h>
24+
25+
#include <deque>
26+
#include <map>
27+
#include <utility>
28+
#include <variant>
29+
30+
#include "commands/commander.h"
31+
#include "config/config.h"
32+
#include "indexer.h"
33+
#include "search/search_encoding.h"
34+
#include "server/server.h"
35+
#include "storage/redis_metadata.h"
36+
#include "storage/storage.h"
37+
#include "types/redis_hash.h"
38+
#include "types/redis_json.h"
39+
40+
namespace redis {
41+
42+
struct GlobalIndexer;
43+
44+
struct FieldValueRetriever {
45+
struct HashData {
46+
Hash hash;
47+
HashMetadata metadata;
48+
std::string_view key;
49+
50+
HashData(Hash hash, HashMetadata metadata, std::string_view key)
51+
: hash(std::move(hash)), metadata(std::move(metadata)), key(key) {}
52+
};
53+
using JsonData = JsonValue;
54+
55+
using Variant = std::variant<HashData, JsonData>;
56+
Variant db;
57+
58+
static StatusOr<FieldValueRetriever> Create(SearchOnDataType type, std::string_view key, engine::Storage *storage,
59+
const std::string &ns);
60+
61+
explicit FieldValueRetriever(Hash hash, HashMetadata metadata, std::string_view key)
62+
: db(std::in_place_type<HashData>, std::move(hash), std::move(metadata), key) {}
63+
64+
explicit FieldValueRetriever(JsonValue json) : db(std::in_place_type<JsonData>, std::move(json)) {}
65+
66+
rocksdb::Status Retrieve(std::string_view field, std::string *output);
67+
};
68+
69+
struct IndexUpdater {
70+
using FieldValues = std::map<std::string, std::string>;
71+
72+
SearchOnDataType on_data_type;
73+
std::vector<std::string> prefixes;
74+
std::map<std::string, std::unique_ptr<SearchFieldMetadata>> fields;
75+
GlobalIndexer *indexer = nullptr;
76+
77+
StatusOr<FieldValues> Record(std::string_view key, const std::string &ns);
78+
};
79+
80+
struct GlobalIndexer {
81+
std::deque<IndexUpdater> updaters;
82+
tsl::htrie_map<char, IndexUpdater *> prefix_map;
83+
84+
engine::Storage *storage = nullptr;
85+
86+
explicit GlobalIndexer(engine::Storage *storage) : storage(storage) {}
87+
88+
void Add(IndexUpdater updater);
89+
StatusOr<IndexUpdater::FieldValues> Record(std::string_view key, const std::string &ns);
90+
};
91+
92+
} // namespace redis

src/search/search_encoding.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,9 @@ struct SearchFieldMetadata {
7373

7474
void DecodeFlag(uint8_t flag) { noindex = flag & 1; }
7575

76-
void Encode(std::string *dst) const { PutFixed8(dst, MakeFlag()); }
76+
virtual void Encode(std::string *dst) const { PutFixed8(dst, MakeFlag()); }
7777

78-
rocksdb::Status Decode(Slice *input) {
78+
virtual rocksdb::Status Decode(Slice *input) {
7979
uint8_t flag = 0;
8080
if (!GetFixed8(input, &flag)) {
8181
return rocksdb::Status::Corruption(kErrorInsufficientLength);
@@ -96,13 +96,13 @@ struct SearchTagFieldMetadata : SearchFieldMetadata {
9696
char separator = ',';
9797
bool case_sensitive = false;
9898

99-
void Encode(std::string *dst) const {
99+
void Encode(std::string *dst) const override {
100100
SearchFieldMetadata::Encode(dst);
101101
PutFixed8(dst, separator);
102102
PutFixed8(dst, case_sensitive);
103103
}
104104

105-
rocksdb::Status Decode(Slice *input) {
105+
rocksdb::Status Decode(Slice *input) override {
106106
if (auto s = SearchFieldMetadata::Decode(input); !s.ok()) {
107107
return s;
108108
}

src/types/redis_hash.h

+2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ class Hash : public SubKeyScanner {
6666

6767
private:
6868
rocksdb::Status GetMetadata(const Slice &ns_key, HashMetadata *metadata);
69+
70+
friend struct FieldValueRetriever;
6971
};
7072

7173
} // namespace redis

src/types/redis_json.h

+2
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ class Json : public Database {
7676
rocksdb::Status numop(JsonValue::NumOpEnum op, const std::string &user_key, const std::string &path,
7777
const std::string &value, JsonValue *result);
7878
std::vector<rocksdb::Status> readMulti(const std::vector<Slice> &ns_keys, std::vector<JsonValue> &values);
79+
80+
friend struct FieldValueRetriever;
7981
};
8082

8183
} // namespace redis

0 commit comments

Comments
 (0)