Skip to content

Commit

Permalink
restore sentinel strings and unintern string args to col
Browse files Browse the repository at this point in the history
  • Loading branch information
timbess committed Oct 10, 2023
1 parent dbecf43 commit dbe5431
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 94 deletions.
2 changes: 0 additions & 2 deletions cpp/perspective/src/cpp/computed_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,6 @@ t_computed_function_store::t_computed_function_store(t_expression_vocab& vocab,
computed_function::replace(vocab, regex_mapping, is_type_validator))
, m_replace_all_fn(computed_function::replace_all(
vocab, regex_mapping, is_type_validator))
, m_add_one_fn(computed_function::add_one())
, m_index_fn(computed_function::index(pkey_map, source_table, row_idx))
, m_col_fn(computed_function::col(
vocab, is_type_validator, source_table, row_idx))
Expand Down Expand Up @@ -516,7 +515,6 @@ t_computed_function_store::register_computed_functions(
sym_table.add_function("month_of_year", m_month_of_year_fn);
sym_table.add_function("today", computed_function::today);
sym_table.add_function("now", computed_function::now);
sym_table.add_function("add_one", m_add_one_fn);

// String functions
sym_table.add_function("intern", m_intern_fn);
Expand Down
98 changes: 16 additions & 82 deletions cpp/perspective/src/cpp/computed_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ namespace computed_function {
std::string temp_str
= std::string(temp_string.begin(), temp_string.end());

if (m_is_type_validator) {
// Return the sentinel value which indicates a valid output from
// type checking, as the output value is not STATUS_CLEAR
return m_sentinel;
}

// Intern the string into the vocabulary.
rval.set(m_expression_vocab.intern(temp_str));
return rval;
Expand Down Expand Up @@ -2182,55 +2188,6 @@ namespace computed_function {
return rval;
}

add_one::add_one()
: exprtk::igeneric_function<t_tscalar>("T") {}
add_one::~add_one() {}

t_tscalar
add_one::operator()(t_parameter_list params) {
t_tscalar rval;
rval.clear();

t_generic_type& gt = params[0];
t_scalar_view temp(gt);
t_tscalar temp_scalar;

temp_scalar.set(temp());
t_dtype dtype = temp_scalar.get_dtype();

switch (dtype) {
case DTYPE_INT8:
case DTYPE_INT16:
case DTYPE_INT32:
rval.set(temp_scalar.to_int32() + 1);
break;
case DTYPE_INT64:
rval.set(temp_scalar.to_int64() + 1);
break;
case DTYPE_UINT8:
case DTYPE_UINT16:
case DTYPE_UINT32:
case DTYPE_UINT64:
rval.set(temp_scalar.to_uint64() + 1);
break;
case DTYPE_FLOAT32:
case DTYPE_FLOAT64:
rval.set(temp_scalar.to_double() + 1);
break;
case DTYPE_NONE:
rval.set(0.0);
default:
rval.m_status = STATUS_CLEAR;
return rval;
}

if (!temp_scalar.is_valid()) {
return rval;
}

return rval;
}

index::index(const t_gstate::t_mapping& pkey_map,
std::shared_ptr<t_data_table> source_table, t_uindex& row_idx)
: exprtk::igeneric_function<t_tscalar>("Z")
Expand All @@ -2254,7 +2211,7 @@ namespace computed_function {

col::col(t_expression_vocab& expression_vocab, bool is_type_validator,
std::shared_ptr<t_data_table> source_table, t_uindex& row_idx)
: exprtk::igeneric_function<t_tscalar>("T")
: exprtk::igeneric_function<t_tscalar>("S")
, m_expression_vocab(expression_vocab)
, m_is_type_validator(is_type_validator)
, m_source_table(source_table)
Expand All @@ -2266,26 +2223,8 @@ namespace computed_function {
t_tscalar rval;
rval.clear();

t_generic_type& gt = parameters[0];
t_scalar_view temp(gt);
t_tscalar temp_scalar;

temp_scalar.set(temp());
t_dtype dtype = temp_scalar.get_dtype();

if (dtype != DTYPE_STR) {
rval.m_status = STATUS_CLEAR;
return rval;
}

if (!temp_scalar.is_valid()) {
return rval;
}

std::string temp_str = temp_scalar.to_string();

// rval.set(m_expression_vocab.intern(temp_str));
// return rval;
t_string_view _colname(parameters[0]);
std::string temp_str = std::string(_colname.begin(), _colname.end());

if (!m_source_table->get_schema().has_column(temp_str)) {
rval.m_status = STATUS_CLEAR;
Expand All @@ -2302,7 +2241,7 @@ namespace computed_function {
vlookup::vlookup(t_expression_vocab& expression_vocab,
bool is_type_validator, std::shared_ptr<t_data_table> source_table,
t_uindex& row_idx)
: exprtk::igeneric_function<t_tscalar>("TT")
: exprtk::igeneric_function<t_tscalar>("ST")
, m_expression_vocab(expression_vocab)
, m_is_type_validator(is_type_validator)
, m_source_table(source_table)
Expand All @@ -2314,34 +2253,29 @@ namespace computed_function {
t_tscalar rval;
rval.clear();

t_generic_type& column_gt = parameters[0];
t_scalar_view column_gt_view(column_gt);
t_tscalar column_name;

column_name.set(column_gt_view());
t_dtype column_name_dtype = column_name.get_dtype();
t_string_view _colname(parameters[0]);
auto column_name = std::string(_colname.begin(), _colname.end());

t_generic_type& index_gt = parameters[1];
t_scalar_view index_gt_view(index_gt);
t_tscalar index;

index.set(index_gt_view());

if (column_name_dtype != DTYPE_STR || !index.is_numeric()) {
if (!index.is_numeric()) {
rval.m_status = STATUS_CLEAR;
return rval;
}

if (!column_name.is_valid() || !index.is_valid()) {
if (!index.is_valid()) {
return rval;
}

std::string col_name_str = column_name.to_string();
if (!m_source_table->get_schema().has_column(col_name_str)) {
if (!m_source_table->get_schema().has_column(column_name)) {
rval.m_status = STATUS_CLEAR;
return rval;
}
auto col = m_source_table->get_const_column(col_name_str);
auto col = m_source_table->get_const_column(column_name);

if (m_is_type_validator) {
rval.m_status = STATUS_VALID;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,6 @@ struct PERSPECTIVE_EXPORT t_computed_function_store {
computed_function::substring m_substring_fn;
computed_function::replace m_replace_fn;
computed_function::replace_all m_replace_all_fn;
computed_function::add_one m_add_one_fn;
computed_function::index m_index_fn;
computed_function::col m_col_fn;
computed_function::vlookup m_vlookup_fn;
Expand Down
3 changes: 0 additions & 3 deletions cpp/perspective/src/include/perspective/computed_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,6 @@ namespace computed_function {
// Length of the string
FUNCTION_HEADER(length)

// Add one to a column
FUNCTION_HEADER(add_one)

struct index : public exprtk::igeneric_function<t_tscalar> {
index(const t_pkey_mapping& pkey_map,
std::shared_ptr<t_data_table> source_table, t_uindex& row_idx);
Expand Down
11 changes: 11 additions & 0 deletions packages/perspective/src/js/perspective.js
Original file line number Diff line number Diff line change
Expand Up @@ -1495,6 +1495,17 @@ export default function (Module) {
)}'${value}'${match.substring(intern_idx + intern_fn.length)}`;
};

parsed_expression_string = parsed_expression_string.replace(
/(col|vlookup)\((.*)\)/g,
(match, _, args, __) => {
const start = match.indexOf(args);
return `${match.substring(0, start)}${args.replace(
/intern\('([^']*)'\)/g,
"'$1'"
)})`;
}
);

// Replace intern() for bucket and regex functions that take
// a string literal parameter and does not work if that param is
// interned. TODO: this is clumsy and we should have a better
Expand Down
3 changes: 2 additions & 1 deletion python/perspective/perspective/src/view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ namespace binding {
std::shared_ptr<t_computed_expression> expression
= t_computed_expression_parser::precompute(expression_alias,
expression_string, parsed_expression_string, column_ids,
schema, expression_vocab, regex_mapping);
gnode.get_table_sptr(), gnode.get_pkey_map(), schema,
expression_vocab, regex_mapping);

expressions.push_back(expression);
schema->add_column(expression_alias, expression->get_dtype());
Expand Down
5 changes: 5 additions & 0 deletions python/perspective/perspective/table/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
EXPRESSION_COLUMN_NAME_REGEX = re.compile(r"\"(.*?[^\\])\"")
STRING_LITERAL_REGEX = re.compile(r"'(.*?[^\\])'")
FUNCTION_LITERAL_REGEX = re.compile(r"(bucket|match|match_all|search|indexof)\(.*?,\s*(intern\(\'(.+)\'\)).*\)")
MATCH_INTERNED_ARGS_REGEX = re.compile(r"intern\('([^']*)'\)")
UNINTERNED_FNS_REGEX = re.compile(r"(col|vlookup)\((.*)\)")
REPLACE_FN_REGEX = re.compile(r"(replace_all|replace)\(.*?,\s*(intern\(\'(.*)\'\)),.*\)")
BOOLEAN_LITERAL_REGEX = re.compile(r"([a-zA-Z_]+[a-zA-Z0-9_]*)")

Expand Down Expand Up @@ -197,6 +199,9 @@ def _parse_expression_strings(expressions):
parsed,
)

# remove _all_ `intern()` calls in these functions.
parsed = re.sub(UNINTERNED_FNS_REGEX, lambda match: re.sub(MATCH_INTERNED_ARGS_REGEX, r"'\1'", match.group(0)), parsed)

# remove the `intern()` in bucket and regex functions that take
# string literal parameters. TODO this logic should be centralized
# in C++ instead of being duplicated.
Expand Down
5 changes: 0 additions & 5 deletions rust/perspective-viewer/src/rust/exprtk/language.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,11 +395,6 @@ thread_local! {
insert_text: "replace(${1:string}, ${2:pattern}, ${3:replacer})",
documentation: "Replaces all non-overlapping matches of pattern in string with replacer, or return the original string if no replaces were made.",
},
CompletionItemSuggestion {
label: "add_one",
insert_text: "add_one(${1:uint64})",
documentation: "Adds one to a number",
},
CompletionItemSuggestion {
label: "index",
insert_text: "index()",
Expand Down

0 comments on commit dbe5431

Please sign in to comment.