Skip to content

Commit

Permalink
Added sigma-similarity tool
Browse files Browse the repository at this point in the history
Fixed also bug in backend base class that was triggered by the way
backends are used by this tool.
  • Loading branch information
thomaspatzke committed Oct 25, 2019
1 parent a5ec672 commit 30948b9
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 49 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ elasticsearch = "*"
elasticsearch-async = "*"
pymisp = "*"
PyYAML = ">=3.11"
progressbar2 = "*"

[requires]
python_version = "3.6"
122 changes: 74 additions & 48 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

91 changes: 91 additions & 0 deletions tools/sigma-similarity
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env python3
# Calculates similarity of Sigma rules by transformation into a normalized
# string form and calculation of a string distance.

import argparse
import pathlib
import itertools
import difflib

import progressbar

from sigma.parser.collection import SigmaCollectionParser
from sigma.backends.base import SingleTextQueryBackend
from sigma.configuration import SigmaConfiguration

argparser = argparse.ArgumentParser(description="Calculate a similarity score between Sigma rules.")
argparser.add_argument("--recursive", "-r", action="store_true", help="Recurse into directories")
argparser.add_argument("--verbose", "-v", action="count", help="Be verbose. Use once more for debug output.")
argparser.add_argument("--top", "-t", type=int, help="Only output the n most similar rule pairs.")
argparser.add_argument("--min-similarity", "-m", type=int, help="Only output pairs with a similarity above this threshold (percent)")
argparser.add_argument("inputs", nargs="+", help="Sigma input files")
args = argparser.parse_args()

def print_verbose(level, *args, **kwargs):
if args.verbose >= level:
print(*args, **kwargs)

class SigmaNormalizationBackend(SingleTextQueryBackend):
"""Normalization of a Sigma rule into a non-existing query language that supports all Sigma features"""
andToken = " AND "
orToken = " OR "
notToken = " NOT "
subExpression = "(%s)"
listExpression = "[%s]"
listSeparator = ","
valueExpression = "%s"
typedValueExpression = dict()
nullExpression = "NULL(%s)"
notNullExpression = "NOTNULL(%s)"
mapExpression = "{'%s':'%s'}"

sort_condition_lists = True

def generateListNode(self, node):
"""Return sorted list"""
return super().generateListNode(list(sorted([ str(item) for item in node ])))

def generateTypedValueNode(self, node):
"""Return normalized form of typed values"""
return "type_{}({})".format(node.identifier, str(node))

def generateAggregation(self, agg):
if agg.aggfunc_notrans == "near":
return " near in={} ex={}".format(str(agg.include), str(agg.exclude))
else:
return " | {}({}) by {} {} {}".format(agg.aggfunc_notrans, agg.aggfield, agg.groupfield, agg.cond_op, agg.condition)

backend = SigmaNormalizationBackend(SigmaConfiguration())

if args.recursive:
paths = [ p for pathname in args.inputs for p in pathlib.Path(pathname).glob("**/*") if p.is_file() ]
else:
paths = [ pathlib.Path(pathname) for pathname in args.inputs ]

parsed = {
str(path): SigmaCollectionParser(path.open().read())
for path in paths
}
converted = {
str(path): list(sigma_collection.generate(backend))
for path, sigma_collection in parsed.items()
}
converted_flat = (
(path, i, normalized)
for path, nlist in converted.items()
for i, normalized in zip(range(len(nlist)), nlist)
)
converted_pairs = list(itertools.combinations(converted_flat, 2))
similarities = [
(item1[:2], item2[:2], difflib.SequenceMatcher(None, item1[2], item2[2]).ratio())
for item1, item2 in progressbar.progressbar(converted_pairs)
]

i = 0
for similarity in sorted(similarities, key=lambda s: s[2], reverse=True):
if args.min_similarity and similarity[2] * 100 < args.min_similarity: # finish after similarity drops below minimum
break
print("{:70} | {:2} | {:70} | {:2} | {:>3.2%}".format(*similarity[0], *similarity[1], similarity[2]))
i += 1
if args.top and i >= args.top: # end after $top pairs
break
8 changes: 7 additions & 1 deletion tools/sigma/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class BaseBackend:
options = tuple() # a list of tuples with following elements: option name, default value, help text, target attribute name (option name if None)
config_required = True

def __init__(self, sigmaconfig, backend_options=None):
def __init__(self, sigmaconfig, backend_options=dict()):
"""
Initialize backend. This gets a sigmaconfig object, which is notified about the used backend class by
passing the object instance to it.
Expand Down Expand Up @@ -221,10 +221,14 @@ class SingleTextQueryBackend(RulenameCommentMixin, BaseBackend, QuoteCharMixin):
mapListsSpecialHandling = False # Same handling for map items with list values as for normal values (strings, integers) if True, generateMapItemListNode method is called with node
mapListValueExpression = None # Syntax for field/value condititons where map value is a list

sort_condition_lists = False # Sort condition items for AND and OR conditions

def generateANDNode(self, node):
generated = [ self.generateNode(val) for val in node ]
filtered = [ g for g in generated if g is not None ]
if filtered:
if self.sort_condition_lists:
filtered = sorted(filtered)
return self.andToken.join(filtered)
else:
return None
Expand All @@ -233,6 +237,8 @@ def generateORNode(self, node):
generated = [ self.generateNode(val) for val in node ]
filtered = [ g for g in generated if g is not None ]
if filtered:
if self.sort_condition_lists:
filtered = sorted(filtered)
return self.orToken.join(filtered)
else:
return None
Expand Down

0 comments on commit 30948b9

Please sign in to comment.