Skip to content

kql str value manager #214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions uncoder-core/app/translator/core/custom_types/values.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class ValueType(CustomEnum):
no_quotes_value = "no_q_value"
bool_value = "bool_value"
regex_value = "re_value"
greater_than_or_equal = "gte_value"
less_than_or_equal = "lte_value"
gte_value = "gte_value"
lte_value = "lte_value"
multi_value = "multi_value"
ip = "ip"
ip_value = "ip_value"
14 changes: 9 additions & 5 deletions uncoder-core/app/translator/core/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def _get_value_type(field_name: str, value: Union[int, str, StrValue], value_typ
return value_type or ValueType.value

@staticmethod
def _wrap_str_value(value: str) -> str:
def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004
return value

@staticmethod
Expand All @@ -98,10 +98,10 @@ def _pre_process_value(
value_type = self._get_value_type(field, value, value_type)
if isinstance(value, StrValue):
value = self.str_value_manager.from_container_to_str(value, value_type)
return self._wrap_str_value(value) if wrap_str else value
return self._wrap_str_value(value, value_type) if wrap_str else value
if isinstance(value, str):
value = self.str_value_manager.escape_manager.escape(value, value_type)
return self._wrap_str_value(value) if wrap_str else value
return self._wrap_str_value(value, value_type) if wrap_str else value
if isinstance(value, bool):
return self._map_bool_value(value)
if isinstance(value, int):
Expand Down Expand Up @@ -428,14 +428,18 @@ def _generate_from_tokenized_query_container_by_source_mapping(
self, query_container: TokenizedQueryContainer, source_mapping: SourceMapping
) -> str:
unmapped_fields = self.mappings.check_fields_mapping_existence(
query_container.meta_info.query_fields, source_mapping
query_container.meta_info.query_fields,
query_container.meta_info.function_fields_map,
self.platform_functions.manager.supported_render_names,
source_mapping,
)
rendered_functions = self.generate_functions(query_container.functions.functions, source_mapping)
prefix = self.generate_prefix(source_mapping.log_source_signature, rendered_functions.rendered_prefix)

if source_mapping.raw_log_fields:
defined_raw_log_fields = self.generate_raw_log_fields(
fields=query_container.meta_info.query_fields, source_mapping=source_mapping
fields=query_container.meta_info.query_fields + query_container.meta_info.function_fields,
source_mapping=source_mapping,
)
prefix += f"\n{defined_raw_log_fields}"
query = self.generate_query(tokens=query_container.tokens, source_mapping=source_mapping)
Expand Down
10 changes: 9 additions & 1 deletion uncoder-core/app/translator/core/str_value_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ def has_spec_symbols(self) -> bool:
return any(isinstance(el, BaseSpecSymbol) for el in self.split_value)


RE_STR_ALPHA_NUM_SYMBOLS_MAP = {
"b": ReWordBoundarySymbol,
"w": ReWordSymbol,
"d": ReDigitalSymbol,
"s": ReWhiteSpaceSymbol,
}


RE_STR_SPEC_SYMBOLS_MAP = {
"?": ReZeroOrOneQuantifier,
"*": ReZeroOrMoreQuantifier,
Expand Down Expand Up @@ -189,7 +197,7 @@ def from_str_to_container(
) -> StrValue:
return StrValue(value=value, split_value=[value])

def from_re_str_to_container(self, value: str) -> StrValue:
def from_re_str_to_container(self, value: str, value_type: str = ValueType.regex_value) -> StrValue: # noqa: ARG002
split = []
prev_char = None
inside_curly_brackets = False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
from app.translator.managers import render_manager
from app.translator.platforms.anomali.const import anomali_query_details
from app.translator.platforms.anomali.mapping import AnomaliMappings, anomali_query_mappings
from app.translator.platforms.base.sql.renders.sql import SqlFieldValueRender
from app.translator.platforms.base.sql.renders.sql import SQLFieldValueRender


class AnomaliFieldValueRender(SqlFieldValueRender):
class AnomaliFieldValueRender(SQLFieldValueRender):
details: PlatformDetails = anomali_query_details

def contains_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
from app.translator.managers import parser_manager
from app.translator.platforms.athena.const import athena_query_details
from app.translator.platforms.athena.mapping import AthenaMappings, athena_query_mappings
from app.translator.platforms.base.sql.parsers.sql import SqlQueryParser
from app.translator.platforms.base.sql.parsers.sql import SQLQueryParser


@parser_manager.register_supported_by_roota
class AthenaQueryParser(SqlQueryParser):
class AthenaQueryParser(SQLQueryParser):
details: PlatformDetails = athena_query_details
mappings: AthenaMappings = athena_query_mappings
query_delimiter_pattern = r"\sFROM\s\S*\sWHERE\s"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
from app.translator.managers import render_manager
from app.translator.platforms.athena.const import athena_query_details
from app.translator.platforms.athena.mapping import AthenaMappings, athena_query_mappings
from app.translator.platforms.base.sql.renders.sql import SqlFieldValueRender, SqlQueryRender
from app.translator.platforms.base.sql.renders.sql import SQLFieldValueRender, SQLQueryRender


class AthenaFieldValueRender(SqlFieldValueRender):
class AthenaFieldValueRender(SQLFieldValueRender):
details: PlatformDetails = athena_query_details


@render_manager.register
class AthenaQueryRender(SqlQueryRender):
class AthenaQueryRender(SQLQueryRender):
details: PlatformDetails = athena_query_details
mappings: AthenaMappings = athena_query_mappings

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class AQLFieldValueRender(BaseFieldValueRender):
str_value_manager = aql_str_value_manager

@staticmethod
def _wrap_str_value(value: str) -> str:
def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004
return f"'{value}'"

def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,9 @@
from app.translator.core.custom_types.values import ValueType
from app.translator.core.str_value_manager import (
CONTAINER_SPEC_SYMBOLS_MAP,
RE_STR_ALPHA_NUM_SYMBOLS_MAP,
RE_STR_SPEC_SYMBOLS_MAP,
BaseSpecSymbol,
ReDigitalSymbol,
ReWhiteSpaceSymbol,
ReWordBoundarySymbol,
ReWordSymbol,
SingleSymbolWildCard,
StrValue,
StrValueManager,
Expand All @@ -43,12 +40,7 @@
class AQLStrValueManager(StrValueManager):
escape_manager = aql_escape_manager
container_spec_symbols_map: ClassVar[dict[type[BaseSpecSymbol], str]] = AQL_CONTAINER_SPEC_SYMBOLS_MAP
re_str_alpha_num_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = {
"b": ReWordBoundarySymbol,
"w": ReWordSymbol,
"d": ReDigitalSymbol,
"s": ReWhiteSpaceSymbol,
}
re_str_alpha_num_symbols_map = RE_STR_ALPHA_NUM_SYMBOLS_MAP
re_str_spec_symbols_map = RE_STR_SPEC_SYMBOLS_MAP
str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = {
"_": SingleSymbolWildCard,
Expand Down Expand Up @@ -78,7 +70,7 @@ def from_str_to_container(

return StrValue(value, self._concat(split))

def from_re_str_to_container(self, value: str) -> StrValue:
def from_re_str_to_container(self, value: str, value_type: str = ValueType.regex_value) -> StrValue: # noqa: ARG002
value = value.replace("''", "'")
return super().from_re_str_to_container(value)

Expand Down
10 changes: 5 additions & 5 deletions uncoder-core/app/translator/platforms/base/aql/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"""

import re
from typing import ClassVar, Optional, Union
from typing import Any, ClassVar, Optional, Union

from app.translator.core.custom_types.tokens import OperatorType
from app.translator.core.custom_types.values import ValueType
Expand All @@ -27,7 +27,6 @@
from app.translator.core.models.query_tokens.function_value import FunctionValue
from app.translator.core.models.query_tokens.identifier import Identifier
from app.translator.core.models.query_tokens.keyword import Keyword
from app.translator.core.str_value_manager import StrValue
from app.translator.core.tokenizer import QueryTokenizer
from app.translator.platforms.base.aql.const import (
DOUBLE_QUOTES_FIELD_NAME_PATTERN,
Expand Down Expand Up @@ -75,12 +74,13 @@ def should_process_value_wildcards(operator: Optional[str]) -> bool:

def get_operator_and_value(
self, match: re.Match, mapped_operator: str = OperatorType.EQ, operator: Optional[str] = None
) -> tuple[str, StrValue]:
) -> tuple[str, Any]:
if (num_value := get_match_group(match, group_name=ValueType.number_value)) is not None:
return mapped_operator, StrValue(num_value, split_value=[num_value])
return mapped_operator, num_value

if (bool_value := get_match_group(match, group_name=ValueType.bool_value)) is not None:
return mapped_operator, StrValue(bool_value, split_value=[bool_value])
mapped_bool_value = bool_value == "true"
return mapped_operator, mapped_bool_value

if (s_q_value := get_match_group(match, group_name=ValueType.single_quotes_value)) is not None:
if mapped_operator == OperatorType.REGEX:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class LuceneEscapeManager(EscapeManager):
ValueType.value: [
EscapeDetails(pattern=r'([_!@#$%^&*=+()\[\]{}|;:\'",.<>?/`~\-\s\\])', escape_symbols=r"\\\1")
],
ValueType.ip: [EscapeDetails(pattern=r"([/])", escape_symbols=r"\\\1")],
ValueType.ip_value: [EscapeDetails(pattern=r"([/])", escape_symbols=r"\\\1")],
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class LuceneFieldValueRender(BaseFieldValueRender):
def _get_value_type(field_name: str, value: Union[int, str, StrValue], value_type: Optional[str] = None) -> str: # noqa: ARG004
is_ip_field = field_name and (field_name.endswith(".ip") or field_name.endswith(".address"))
if is_ip_field and value_type != ValueType.regex_value:
return ValueType.ip
return ValueType.ip_value

return ValueType.value

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ class LuceneTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
rf"(?P<{ValueType.no_quotes_value}>(?:[a-zA-Z\*\?0-9=%#№!;_/,\'\.$@|]|\\[*?\"-_=%#№!;,\.$@/\s\\])+)\s*"
)
re_value_pattern = rf"/(?P<{ValueType.regex_value}>(?:[:a-zA-Z\*\?0-9=+%#№;\\\-_\,\"\'\.$&^@!\(\)\{{\}}\[\]\s?<>|]|\\\/)+)/(?=\s+|\)|$)" # noqa: E501
gte_value_pattern = rf"\[\s*(?P<{ValueType.greater_than_or_equal}>{_num_value_pattern})\s+TO\s+\*\s*\]"
lte_value_pattern = rf"\[\s*\*\s+TO\s+(?P<{ValueType.less_than_or_equal}>{_num_value_pattern})\s*\]"
gte_value_pattern = rf"\[\s*(?P<{ValueType.gte_value}>{_num_value_pattern})\s+TO\s+\*\s*\]"
lte_value_pattern = rf"\[\s*\*\s+TO\s+(?P<{ValueType.lte_value}>{_num_value_pattern})\s*\]"
range_value_pattern = rf"{gte_value_pattern}|{lte_value_pattern}"
_value_pattern = rf"{num_value_pattern}|{re_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}|{range_value_pattern}" # noqa: E501
keyword_pattern = (
Expand Down Expand Up @@ -97,10 +97,10 @@ def get_operator_and_value( # noqa: PLR0911
if (d_q_value := get_match_group(match, group_name=ValueType.double_quotes_value)) is not None:
return mapped_operator, lucene_str_value_manager.from_str_to_container(d_q_value)

if (gte_value := get_match_group(match, group_name=ValueType.greater_than_or_equal)) is not None:
if (gte_value := get_match_group(match, group_name=ValueType.gte_value)) is not None:
return OperatorType.GTE, StrValue(gte_value, split_value=[gte_value])

if (lte_value := get_match_group(match, group_name=ValueType.less_than_or_equal)) is not None:
if (lte_value := get_match_group(match, group_name=ValueType.lte_value)) is not None:
return OperatorType.LTE, StrValue(lte_value, split_value=[lte_value])

return super().get_operator_and_value(match, mapped_operator, operator)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from app.translator.core.models.escape_details import EscapeDetails


class SplEscapeManager(EscapeManager):
class SPLEscapeManager(EscapeManager):
escape_map: ClassVar[dict[str, list[EscapeDetails]]] = {ValueType.value: [EscapeDetails(pattern=r"([<>=\"'\|\\])")]}


spl_escape_manager = SplEscapeManager()
spl_escape_manager = SPLEscapeManager()
24 changes: 14 additions & 10 deletions uncoder-core/app/translator/platforms/base/spl/parsers/spl.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,19 @@
from app.translator.core.models.functions.base import ParsedFunctions
from app.translator.core.models.query_container import RawQueryContainer, TokenizedQueryContainer
from app.translator.core.parser import PlatformQueryParser
from app.translator.platforms.base.spl.functions import SplFunctions
from app.translator.platforms.base.spl.tokenizer import SplTokenizer
from app.translator.platforms.base.spl.functions import SPLFunctions
from app.translator.platforms.base.spl.tokenizer import SPLTokenizer

TSTATS_FUNC = "tstats"


class SplQueryParser(PlatformQueryParser):
class SPLQueryParser(PlatformQueryParser):
log_source_pattern = r"^___source_type___\s*=\s*(?:\"(?P<d_q_value>[%a-zA-Z_*:0-9\-/]+)\"|(?P<value>[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?" # noqa: E501
rule_name_pattern = r"`(?P<name>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s])*)`"
rule_name_pattern = r"`(?P<name>(?:[:a-zA-Z*0-9=+%#\-_/,;`?~‘\'.<>$&^@!\]\[()\s])*)`" # noqa: RUF001
log_source_key_types = ("index", "source", "sourcetype", "sourcecategory")

platform_functions: SplFunctions = None
tokenizer = SplTokenizer()
platform_functions: SPLFunctions = None
tokenizer = SPLTokenizer()

wrapped_with_comment_pattern = r"^\s*```(?:|\n|.)*```"

Expand All @@ -56,7 +56,7 @@ def _parse_log_sources(self, query: str) -> tuple[dict[str, list[str]], str]:
def _parse_query(self, query: str) -> tuple[str, dict[str, list[str]], ParsedFunctions]:
if re.match(self.rule_name_pattern, query):
search = re.search(self.rule_name_pattern, query, flags=re.IGNORECASE)
query = query[:search.start()] + query[search.end():]
query = query[: search.start()] + query[search.end() :]
query = query.strip()
log_sources, query = self._parse_log_sources(query)
query, functions = self.platform_functions.parse(query)
Expand All @@ -72,9 +72,13 @@ def parse(self, raw_query_container: RawQueryContainer) -> TokenizedQueryContain

query, log_sources, functions = self._parse_query(raw_query_container.query)
query_tokens = self.get_query_tokens(query)
field_tokens = self.get_field_tokens(query_tokens, functions.functions)
source_mappings = self.get_source_mappings(field_tokens, log_sources)
query_field_tokens, function_field_tokens, function_field_tokens_map = self.get_field_tokens(
query_tokens, functions.functions
)
source_mappings = self.get_source_mappings(query_field_tokens + function_field_tokens, log_sources)
meta_info = raw_query_container.meta_info
meta_info.query_fields = field_tokens
meta_info.query_fields = query_field_tokens
meta_info.function_fields = function_field_tokens
meta_info.function_fields_map = function_field_tokens_map
meta_info.source_mapping_ids = [source_mapping.source_id for source_mapping in source_mappings]
return TokenizedQueryContainer(tokens=query_tokens, meta_info=meta_info, functions=functions)
8 changes: 4 additions & 4 deletions uncoder-core/app/translator/platforms/base/spl/renders/spl.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
from app.translator.platforms.base.spl.str_value_manager import spl_str_value_manager


class SplFieldValueRender(BaseFieldValueRender):
class SPLFieldValueRender(BaseFieldValueRender):
str_value_manager = spl_str_value_manager

@staticmethod
def _wrap_str_value(value: str) -> str:
def _wrap_str_value(value: str, value_type: str = ValueType.value) -> str: # noqa: ARG004
return f'"{value}"'

def _pre_process_value(
Expand All @@ -42,7 +42,7 @@ def _pre_process_value(
wrap_int: bool = False, # noqa: ARG002
) -> Union[int, str]:
value = super()._pre_process_value(field, value, value_type=value_type, wrap_str=wrap_str)
return self._wrap_str_value(str(value)) if not isinstance(value, str) else value
return self._wrap_str_value(str(value), value_type) if not isinstance(value, str) else value

def equal_modifier(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
if isinstance(value, list):
Expand Down Expand Up @@ -87,7 +87,7 @@ def keywords(self, field: str, value: DEFAULT_VALUE_TYPE) -> str:
return f"{self._pre_process_value(field, value, wrap_str=True)}"


class SplQueryRender(PlatformQueryRender):
class SPLQueryRender(PlatformQueryRender):
or_token = "OR"
and_token = "AND"
not_token = "NOT"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from app.translator.platforms.base.spl.escape_manager import spl_escape_manager


class SplStrValueManager(StrValueManager):
class SPLStrValueManager(StrValueManager):
escape_manager = spl_escape_manager
str_spec_symbols_map: ClassVar[dict[str, type[BaseSpecSymbol]]] = {"*": UnboundLenWildCard}

Expand Down Expand Up @@ -58,4 +58,4 @@ def from_str_to_container(
return StrValue(self.escape_manager.remove_escape(value), self._concat(split))


spl_str_value_manager = SplStrValueManager()
spl_str_value_manager = SPLStrValueManager()
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from app.translator.tools.utils import get_match_group


class SplTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
class SPLTokenizer(QueryTokenizer, ANDLogicOperatorMixin):
single_value_operators_map: ClassVar[dict[str, str]] = {
"=": OperatorType.EQ,
"<=": OperatorType.LTE,
Expand Down
Loading
Loading