Skip to content

Commit c6cbbe7

Browse files
authored
Merge pull request #6 from UncoderIO/logsource-and-value-parsing-fixes
elastic and opensearch regex fixes, sigma source mapping fix
2 parents 06c84ff + ecf3fdf commit c6cbbe7

File tree

5 files changed

+21
-12
lines changed

5 files changed

+21
-12
lines changed

siem-converter/app/converter/backends/elasticsearch/parsers/elasticsearch.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,16 @@ class ElasticSearchParser(Parser):
3232
mappings: ElasticSearchMappings = elasticsearch_mappings
3333
tokenizer = ElasticSearchTokenizer()
3434

35-
log_source_pattern = r"___source_type___\s*(:|=)\s*(?:\"?(?P<d_q_value>[%a-zA-Z_*:0-9\-/]+)\"|(?P<value>[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?"
35+
log_source_pattern = r"___source_type___\s*(?:[:=])\s*(?:\"?(?P<d_q_value>[%a-zA-Z_*:0-9\-/]+)\"|(?P<value>[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?"
3636
log_source_key_types = ("index", "event\.category")
3737

3838
def _parse_log_sources(self, query: str) -> Tuple[str, Dict[str, List[str]]]:
3939
log_sources = {}
4040
for source_type in self.log_source_key_types:
4141
pattern = self.log_source_pattern.replace('___source_type___', source_type)
4242
while search := re.search(pattern, query, flags=re.IGNORECASE):
43-
value = search.group(1)
43+
group_dict = search.groupdict()
44+
value = group_dict.get("d_q_value") or group_dict.get("value")
4445
log_sources.setdefault(source_type, []).append(value)
4546
pos_start = search.start()
4647
pos_end = search.end()

siem-converter/app/converter/backends/elasticsearch/tokenizer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ class ElasticSearchTokenizer(QueryTokenizer):
3232
match_operator_pattern = r"(?:___field___\s*(?P<match_operator>:))\s*"
3333

3434
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
35-
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)+)"\s*'
35+
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
3636
no_quotes_value_pattern = r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
3737
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\[\]\s?]+)/\s*"
3838
_value_pattern = fr"{num_value_pattern}|{re_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}"
3939
keyword_pattern = r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{|\\\}|\\\:|\\)+)(?:\s+|\)|$)"
4040

41-
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]*)\)"""
41+
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)"""
4242
multi_value_check_pattern = r"___field___\s*___operator___\s*\("
4343

4444
wildcard_symbol = "*"

siem-converter/app/converter/backends/opensearch/parsers/opensearch.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,16 @@ class OpenSearchParser(Parser):
3232
mappings: OpenSearchMappings = opensearch_mappings
3333
tokenizer = OpenSearchTokenizer()
3434

35-
log_source_pattern = r"___source_type___\s*(:|=)\s*(?:\"?(?P<d_q_value>[%a-zA-Z_*:0-9\-/]+)\"|(?P<value>[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?"
35+
log_source_pattern = r"___source_type___\s*(?:[:=])\s*(?:\"?(?P<d_q_value>[%a-zA-Z_*:0-9\-/]+)\"|(?P<value>[%a-zA-Z_*:0-9\-/]+))(?:\s+(?:and|or)\s+|\s+)?"
3636
log_source_key_types = ("index", "event\.category")
3737

3838
def _parse_log_sources(self, query: str) -> Tuple[str, Dict[str, List[str]]]:
3939
log_sources = {}
4040
for source_type in self.log_source_key_types:
4141
pattern = self.log_source_pattern.replace('___source_type___', source_type)
4242
while search := re.search(pattern, query, flags=re.IGNORECASE):
43-
value = search.group(1)
43+
group_dict = search.groupdict()
44+
value = group_dict.get("d_q_value") or group_dict.get("value")
4445
log_sources.setdefault(source_type, []).append(value)
4546
pos_start = search.start()
4647
pos_end = search.end()

siem-converter/app/converter/backends/opensearch/tokenizer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ class OpenSearchTokenizer(QueryTokenizer):
3232
match_operator_pattern = r"(?:___field___\s*(?P<match_operator>:))\s*"
3333

3434
num_value_pattern = r"(?P<num_value>\d+(?:\.\d+)*)\s*"
35-
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)+)"\s*'
35+
double_quotes_value_pattern = r'"(?P<d_q_value>(?:[:a-zA-Z\*0-9=+%#\-_/,\'\.$&^@!\(\)\{\}\s]|\\\"|\\)*)"\s*'
3636
no_quotes_value_pattern = r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\\)+)\s*"
3737
re_value_pattern = r"/(?P<re_value>[:a-zA-Z\*0-9=+%#\\\-_\,\"\'\.$&^@!\(\)\{\}\[\]\s?]+)/\s*"
3838
_value_pattern = fr"{num_value_pattern}|{re_value_pattern}|{no_quotes_value_pattern}|{double_quotes_value_pattern}"
3939
keyword_pattern = r"(?P<n_q_value>(?:[a-zA-Z\*0-9=%#_/,\'\.$@]|\\\"|\\\(|\\\)|\\\[|\\\]|\\\{|\\\}|\\\:|\\)+)(?:\s+|\)|$)"
4040

41-
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]*)\)"""
41+
multi_value_pattern = r"""\((?P<value>[:a-zA-Z\"\*0-9=+%#\-_\/\\'\,.&^@!\(\[\]\s]+)\)"""
4242
multi_value_check_pattern = r"___field___\s*___operator___\s*\("
4343

4444
wildcard_symbol = "*"

siem-converter/app/converter/backends/sigma/renders/sigma.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@
1717
"""
1818

1919
import copy
20-
from typing import Any
20+
from typing import Any, List
2121

2222
import yaml
2323

2424
from app.converter.backends.sigma.const import SIGMA_RULE_DETAILS
2525
from app.converter.backends.sigma.mapping import SigmaMappings, sigma_mappings, SigmaLogSourceSignature
2626
from app.converter.core.compiler import DataStructureCompiler
2727
from app.converter.core.exceptions.core import StrictPlatformFieldException
28-
from app.converter.core.mapping import SourceMapping
28+
from app.converter.core.mapping import SourceMapping, DEFAULT_MAPPING_NAME
2929
from app.converter.core.models.field import Field, Keyword
3030
from app.converter.core.models.functions.types import ParsedFunctions
3131
from app.converter.core.models.group import Group
@@ -229,11 +229,18 @@ def generate_detection(self, data: Any, source_mapping: SourceMapping) -> dict:
229229
self.reset_counters()
230230

231231
return detection
232+
233+
def __get_source_mapping(self, source_mapping_ids: List[str]) -> SourceMapping:
234+
for source_mapping_id in source_mapping_ids:
235+
if source_mapping := self.mappings.get_source_mapping(source_mapping_id):
236+
return source_mapping
237+
238+
return self.mappings.get_source_mapping(DEFAULT_MAPPING_NAME)
232239

233240
def generate(self, query, meta_info: MetaInfoContainer, functions: ParsedFunctions):
234241
self.reset_counters()
235242

236-
source_mapping = self.mappings.get_source_mapping(meta_info.source_mapping_ids[0])
243+
source_mapping = self.__get_source_mapping(meta_info.source_mapping_ids)
237244
log_source_signature: SigmaLogSourceSignature = source_mapping.log_source_signature
238245
sigma_condition = copy.deepcopy(query)
239246
prepared_data_structure = DataStructureCompiler().generate(tokens=sigma_condition)
@@ -243,7 +250,7 @@ def generate(self, query, meta_info: MetaInfoContainer, functions: ParsedFunctio
243250
"id": meta_info.id,
244251
"description": meta_info.description,
245252
"status": "experimental",
246-
"author": "",
253+
"author": meta_info.author,
247254
"references": meta_info.references,
248255
"tags": meta_info.tags,
249256
"logsource": log_source_signature.log_sources,

0 commit comments

Comments
 (0)