Skip to content

Commit

Permalink
feat: Improve lowercase & partial word search (pinterest#430)
Browse files Browse the repository at this point in the history
* Improve lowercase & partial word search

* remove debug
  • Loading branch information
czgu authored Mar 3, 2021
1 parent 23d8e4e commit dcf4610
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 8 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "querybook",
"version": "2.6.1",
"version": "2.7.0",
"description": "A Big Data Webapp",
"private": true,
"scripts": {
Expand Down
26 changes: 25 additions & 1 deletion querybook/config/elasticsearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,32 @@ tables:
tokenizer: standard
char_filter:
- html_strip
table_name_lowercase:
type: custom
tokenizer: alphanum_tokenizer
filter:
- lowercase
edge_ngram_lowercase:
type: custom
tokenizer: edge_ngram_tokenizer
filter:
- lowercase
normalizer:
case_insensitive:
type: custom
filter:
- lowercase
tokenizer:
edge_ngram_tokenizer:
type: edge_ngram
min_gram: 3
max_gram: 15
token_chars:
- letter
- digit
alphanum_tokenizer:
type: simple_pattern
pattern: '[A-Za-z0-9]+'
mappings:
tables:
properties:
Expand All @@ -65,7 +86,10 @@ tables:
normalizer: case_insensitive
full_name:
type: text
analyzer: whitespace
analyzer: table_name_lowercase
full_name_ngram:
type: text
analyzer: edge_ngram_lowercase
completion_name:
type: completion
analyzer: keyword
Expand Down
4 changes: 3 additions & 1 deletion querybook/server/datasources/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ def _match_table_fields(fields):
for field in fields:
# 'table_name', 'description', and 'column' are fields used by Table search
if field == "table_name":
search_fields.append("full_name^20")
search_fields.append("full_name^10")
search_fields.append("full_name_ngram^15")
elif field == "description":
search_fields.append("description")
elif field == "column":
Expand Down Expand Up @@ -232,6 +233,7 @@ def _construct_tables_query(
}
)
)

return json.dumps(query)


Expand Down
8 changes: 3 additions & 5 deletions querybook/server/logic/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def get_table_weight(table_id: int, session=None) -> int:
def table_to_es(table, session=None):
schema = table.data_schema

column_names = list(map(lambda c: c.name, table.columns))
column_names = [c.name for c in table.columns]
schema_name = schema.name
table_name = table.name
description = (
Expand All @@ -235,16 +235,14 @@ def table_to_es(table, session=None):

full_name = "{}.{}".format(schema_name, table_name)
weight = get_table_weight(table.id, session=session)
table_name_words = list(filter(lambda s: len(s), table_name.split("_")))
schema_words = list(filter(lambda s: len(s), schema_name.split("_")))
full_name_spaces = " ".join(schema_words + table_name_words)

expand_table = {
"id": table.id,
"metastore_id": schema.metastore_id,
"schema": schema_name,
"name": table_name,
"full_name": full_name_spaces,
"full_name": full_name,
"full_name_ngram": full_name,
"completion_name": {
"input": [full_name, table_name,],
"weight": weight,
Expand Down

0 comments on commit dcf4610

Please sign in to comment.