Refactor parsers, fix BigQueryOutputPlugin bug + other changes.

gmh5225 · Jun 11, 2019 · b03d9ae · b03d9ae
1 parent 06eeb07
commit b03d9ae
Show file tree

Hide file tree

Showing 33 changed files with 371 additions and 313 deletions.
diff --git a/grr/client/grr_response_client/client_actions/artifact_collector_test.py b/grr/client/grr_response_client/client_actions/artifact_collector_test.py
@@ -543,42 +543,40 @@ def Parse(self, cmd, args, stdout, stderr, return_val, time_taken,
     ])
 
 
-class FakeFileParser(parser.FileParser):
+class FakeFileParser(parsers.SingleFileParser):
 
   output_types = [rdf_protodict.AttributedDict]
   supported_artifacts = ["FakeFileArtifact"]
 
-  def Parse(self, stat, file_obj, knowledge_base):
-
+  def ParseFile(self, knowledge_base, pathspec, filedesc):
     del knowledge_base  # Unused.
 
-    lines = set(l.strip() for l in file_obj.read().splitlines())
+    lines = set(l.strip() for l in filedesc.read().splitlines())
 
     users = list(filter(None, lines))
 
-    filename = stat.pathspec.path
+    filename = pathspec.path
     cfg = {"filename": filename, "users": users}
 
     yield rdf_protodict.AttributedDict(**cfg)
 
 
-class FakeFileMultiParser(parser.FileMultiParser):
+class FakeFileMultiParser(parsers.MultiFileParser):
 
   output_types = [rdf_protodict.AttributedDict]
   supported_artifacts = ["FakeFileArtifact2"]
 
-  def ParseMultiple(self, stats, file_objects, knowledge_base):
-
+  def ParseFiles(self, knowledge_base, pathspecs, filedescs):
     del knowledge_base  # Unused.
 
     lines = set()
-    for file_obj in file_objects:
+    for file_obj in filedescs:
       lines.update(set(l.strip() for l in file_obj.read().splitlines()))
 
     users = list(filter(None, lines))
 
-    for stat in stats:
-      filename = stat.pathspec.path
+    for pathspec in pathspecs:
+      filename = pathspec.path
       cfg = {"filename": filename, "users": users}
 
       yield rdf_protodict.AttributedDict(**cfg)

diff --git a/grr/client/grr_response_client/client_actions/osquery.py b/grr/client/grr_response_client/client_actions/osquery.py
@@ -247,7 +247,15 @@ def Query(args):
     # `--S` we can make `osqueryd` behave like `osqueryi`. Since this flag also
     # works with `osqueryi`, by passing it we simply expand number of supported
     # executable types.
-    command = [config.CONFIG["Osquery.path"], "--S", "--json", query]
+    command = [
+        config.CONFIG["Osquery.path"],
+        "--S",  # Enforce shell execution.
+        "--logger_stderr=false",  # Only allow errors to be written to stderr.
+        "--logger_min_status=3",  # Disable status logs.
+        "--logger_min_stderr=2",  # Only ERROR-level logs to stderr.
+        "--json",  # Set output format to JSON.
+        query,
+    ]
     proc = subprocess.run(
         command,
         timeout=timeout,

diff --git a/grr/core/grr_response_core/lib/parsers/chrome_history.py b/grr/core/grr_response_core/lib/parsers/chrome_history.py
@@ -11,38 +11,38 @@
 from future.moves.urllib import parse as urlparse
 from past.builtins import long
 
-from grr_response_core.lib import parser
+from grr_response_core.lib import parsers
 from grr_response_core.lib.parsers import sqlite_file
 from grr_response_core.lib.rdfvalues import webhistory as rdf_webhistory
 
 
-class ChromeHistoryParser(parser.FileParser):
+class ChromeHistoryParser(parsers.SingleFileParser):
   """Parse Chrome history files into BrowserHistoryItem objects."""
 
   output_types = [rdf_webhistory.BrowserHistoryItem]
   supported_artifacts = ["ChromeHistory"]
 
-  def Parse(self, stat, file_object, knowledge_base):
-    """Parse the History file."""
-    _ = knowledge_base
+  def ParseFile(self, knowledge_base, pathspec, filedesc):
+    del knowledge_base  # Unused.
+
     # TODO(user): Convert this to use the far more intelligent plaso parser.
-    chrome = ChromeParser(file_object)
+    chrome = ChromeParser(filedesc)
     for timestamp, entry_type, url, data1, _, _ in chrome.Parse():
       if entry_type == "CHROME_DOWNLOAD":
         yield rdf_webhistory.BrowserHistoryItem(
             url=url,
             domain=urlparse.urlparse(url).netloc,
             access_time=timestamp,
             program_name="Chrome",
-            source_path=file_object.Path(),
+            source_path=pathspec.CollapsePath(),
             download_path=data1)
       elif entry_type == "CHROME_VISIT":
         yield rdf_webhistory.BrowserHistoryItem(
             url=url,
             domain=urlparse.urlparse(url).netloc,
             access_time=timestamp,
             program_name="Chrome",
-            source_path=file_object.Path(),
+            source_path=pathspec.CollapsePath(),
             title=data1)
 
 

diff --git a/grr/core/grr_response_core/lib/parsers/config_file.py b/grr/core/grr_response_core/lib/parsers/config_file.py
@@ -17,6 +17,7 @@
 
 from grr_response_core.lib import lexer
 from grr_response_core.lib import parser
+from grr_response_core.lib import parsers
 from grr_response_core.lib import utils
 from grr_response_core.lib.rdfvalues import anomaly as rdf_anomaly
 from grr_response_core.lib.rdfvalues import client_fs as rdf_client_fs
@@ -321,7 +322,7 @@ def ParseToOrderedDict(self, data):
     return result
 
 
-class NfsExportsParser(parser.FileParser):
+class NfsExportsParser(parsers.SingleFileParser):
   """Parser for NFS exports."""
 
   output_types = [rdf_config_file.NfsExport]
@@ -331,9 +332,12 @@ def __init__(self, *args, **kwargs):
     super(NfsExportsParser, self).__init__(*args, **kwargs)
     self._field_parser = FieldParser()
 
-  def Parse(self, unused_stat, file_obj, unused_knowledge_base):
+  def ParseFile(self, knowledge_base, pathspec, filedesc):
+    del knowledge_base  # Unused.
+    del pathspec  # Unused.
+
     for entry in self._field_parser.ParseEntries(
-        utils.ReadFileBytesAsUnicode(file_obj)):
+        utils.ReadFileBytesAsUnicode(filedesc)):
       if not entry:
         continue
       result = rdf_config_file.NfsExport()
@@ -532,7 +536,7 @@ def GenerateResults(self):
     yield rdf_config_file.SshdConfig(config=self.config, matches=matches)
 
 
-class SshdConfigParser(parser.FileParser):
+class SshdConfigParser(parsers.SingleFileParser):
   """A parser for sshd_config files."""
 
   supported_artifacts = ["SshdConfigFile"]
@@ -542,28 +546,14 @@ def __init__(self, *args, **kwargs):
     super(SshdConfigParser, self).__init__(*args, **kwargs)
     self._field_parser = SshdFieldParser()
 
-  def Parse(self, stat, file_object, knowledge_base):
-    """Parse the sshd configuration.
-
-    Process each of the lines in the configuration file.
-
-    Assembes an sshd_config file into a dictionary with the configuration
-    keyword as the key, and the configuration settings as value(s).
+  def ParseFile(self, knowledge_base, pathspec, filedesc):
+    del knowledge_base  # Unused.
+    del pathspec  # Unused.
 
-    Args:
-      stat: unused
-      file_object: An open configuration file object.
-      knowledge_base: unused
-
-    Yields:
-      The configuration as an rdfvalue.
-    """
-    _, _ = stat, knowledge_base
     # Clean out any residual state.
     self._field_parser.Flush()
     lines = [
-        l.strip()
-        for l in utils.ReadFileBytesAsUnicode(file_object).splitlines()
+        l.strip() for l in utils.ReadFileBytesAsUnicode(filedesc).splitlines()
     ]
     for line in lines:
       # Remove comments (will break if it includes a quoted/escaped #)
@@ -596,7 +586,7 @@ def Parse(self, cmd, args, stdout, stderr, return_val, time_taken,
       yield result
 
 
-class MtabParser(parser.FileParser):
+class MtabParser(parsers.SingleFileParser):
   """Parser for mounted filesystem data acquired from /proc/mounts."""
   output_types = [rdf_client_fs.Filesystem]
   supported_artifacts = ["LinuxProcMounts", "LinuxFstab"]
@@ -605,9 +595,12 @@ def __init__(self, *args, **kwargs):
     super(MtabParser, self).__init__(*args, **kwargs)
     self._field_parser = FieldParser()
 
-  def Parse(self, unused_stat, file_obj, unused_knowledge_base):
+  def ParseFile(self, knowledge_base, pathspec, filedesc):
+    del knowledge_base  # Unused.
+    del pathspec  # Unused.
+
     for entry in self._field_parser.ParseEntries(
-        utils.ReadFileBytesAsUnicode(file_obj)):
+        utils.ReadFileBytesAsUnicode(filedesc)):
       if not entry:
         continue
       result = rdf_client_fs.Filesystem()
@@ -706,7 +699,7 @@ def ParseAction(self, action):
     return rslt
 
 
-class RsyslogParser(parser.FileMultiParser):
+class RsyslogParser(parsers.MultiFileParser):
   """Artifact parser for syslog configurations."""
 
   output_types = [rdf_protodict.AttributedDict]
@@ -716,10 +709,13 @@ def __init__(self, *args, **kwargs):
     super(RsyslogParser, self).__init__(*args, **kwargs)
     self._field_parser = RsyslogFieldParser()
 
-  def ParseMultiple(self, unused_stats, file_objs, unused_knowledge_base):
+  def ParseFiles(self, knowledge_base, pathspecs, filedescs):
+    del knowledge_base  # Unused.
+    del pathspecs  # Unused.
+
     # TODO(user): review quoting and line continuation.
     result = rdf_config_file.LogConfig()
-    for file_obj in file_objs:
+    for file_obj in filedescs:
       for entry in self._field_parser.ParseEntries(
           utils.ReadFileBytesAsUnicode(file_obj)):
         directive = entry[0]
@@ -731,15 +727,17 @@ def ParseMultiple(self, unused_stats, file_objs, unused_knowledge_base):
     return [result]
 
 
-class PackageSourceParser(parser.FileParser):
+class PackageSourceParser(parsers.SingleFileParser):
   """Common code for APT and YUM source list parsing."""
   output_types = [rdf_protodict.AttributedDict]
 
   # Prevents this from automatically registering.
   __abstract = True  # pylint: disable=g-bad-name
 
-  def Parse(self, stat, file_obj, unused_knowledge_base):
-    uris_to_parse = self.FindPotentialURIs(file_obj)
+  def ParseFile(self, knowledge_base, pathspec, filedesc):
+    del knowledge_base  # Unused.
+
+    uris_to_parse = self.FindPotentialURIs(filedesc)
     uris = []
 
     for url_to_parse in uris_to_parse:
@@ -751,7 +749,7 @@ def Parse(self, stat, file_obj, unused_knowledge_base):
       if url.transport and (url.host or url.path):
         uris.append(url)
 
-    filename = stat.pathspec.path
+    filename = pathspec.path
     cfg = {"filename": filename, "uris": uris}
     yield rdf_protodict.AttributedDict(**cfg)
 
@@ -839,14 +837,16 @@ def FindPotentialURIs(self, file_obj):
         utils.ReadFileBytesAsUnicode(file_obj), "=", "baseurl")
 
 
-class CronAtAllowDenyParser(parser.FileParser):
+class CronAtAllowDenyParser(parsers.SingleFileParser):
   """Parser for /etc/cron.allow /etc/cron.deny /etc/at.allow & /etc/at.deny."""
   output_types = [rdf_protodict.AttributedDict]
   supported_artifacts = ["CronAtAllowDenyFiles"]
 
-  def Parse(self, stat, file_obj, unused_knowledge_base):
+  def ParseFile(self, knowledge_base, pathspec, filedesc):
+    del knowledge_base  # Unused.
+
     lines = set([
-        l.strip() for l in utils.ReadFileBytesAsUnicode(file_obj).splitlines()
+        l.strip() for l in utils.ReadFileBytesAsUnicode(filedesc).splitlines()
     ])
 
     users = []
@@ -859,15 +859,15 @@ def Parse(self, stat, file_obj, unused_knowledge_base):
       elif line:  # drop empty lines
         users.append(line)
 
-    filename = stat.pathspec.path
+    filename = pathspec.path
     cfg = {"filename": filename, "users": users}
     yield rdf_protodict.AttributedDict(**cfg)
 
     if bad_lines:
       yield rdf_anomaly.Anomaly(
           type="PARSER_ANOMALY",
           symptom="Dodgy entries in %s." % (filename),
-          reference_pathspec=stat.pathspec,
+          reference_pathspec=pathspec,
           finding=bad_lines)
 
 
@@ -993,18 +993,18 @@ def ParseLine(self, entries):
         prev_settings.append(" ".join(values))
 
 
-class NtpdParser(parser.FileParser):
+class NtpdParser(parsers.SingleFileParser):
   """Artifact parser for ntpd.conf file."""
 
-  def Parse(self, stat, file_object, knowledge_base):
-    """Parse a ntp config into rdf."""
-    _, _ = stat, knowledge_base
+  def ParseFile(self, knowledge_base, pathspec, filedesc):
+    del knowledge_base  # Unused.
+    del pathspec  # Unused.
 
     # TODO(hanuszczak): This parser only allows single use because it messes
     # with its state. This should be fixed.
     field_parser = NtpdFieldParser()
     for line in field_parser.ParseEntries(
-        utils.ReadFileBytesAsUnicode(file_object)):
+        utils.ReadFileBytesAsUnicode(filedesc)):
       field_parser.ParseLine(line)
 
     yield rdf_config_file.NtpConfig(
@@ -1146,7 +1146,7 @@ def Preprocess(self, data):
     return data
 
 
-class SudoersParser(parser.FileParser):
+class SudoersParser(parsers.SingleFileParser):
   """Artifact parser for privileged configuration files."""
 
   output_types = [rdf_config_file.SudoersConfig]
@@ -1156,9 +1156,12 @@ def __init__(self, *args, **kwargs):
     super(SudoersParser, self).__init__(*args, **kwargs)
     self._field_parser = SudoersFieldParser()
 
-  def Parse(self, unused_stat, file_obj, unused_knowledge_base):
+  def ParseFile(self, knowledge_base, pathspec, filedesc):
+    del knowledge_base  # Unused.
+    del pathspec  # Unused.
+
     self._field_parser.ParseEntries(
-        self._field_parser.Preprocess(utils.ReadFileBytesAsUnicode(file_obj)))
+        self._field_parser.Preprocess(utils.ReadFileBytesAsUnicode(filedesc)))
     result = rdf_config_file.SudoersConfig()
     for entry in self._field_parser.entries:
       # Handle multiple entries in one line, eg: