20221109a

deckbsd · Nov 9, 2022 · a598ffa · a598ffa
1 parent 4977478
commit a598ffa
Show file tree

Hide file tree

Showing 4 changed files with 304 additions and 14 deletions.
diff --git a/oledump.py b/oledump.py
@@ -2,8 +2,8 @@
 
 __description__ = 'Analyze OLE files (Compound Binary Files)'
 __author__ = 'Didier Stevens'
-__version__ = '0.0.70'
-__date__ = '2022/09/04'
+__version__ = '0.0.71'
+__date__ = '2022/11/09'
 
 """
 
@@ -119,6 +119,7 @@
   2022/06/07: 0.0.68 added extra info parameters %CTIME% %MTIME% %CTIMEHEX% %MTIMEHEX%
   2022/07/22: 0.0.69 minor documentation change
   2022/09/04: 0.0.70 bumping version for update to plugin(s), no changes to oledump.py
+  2022/11/09: 0.0.71 bumping version for update to plugin(s), no changes to oledump.py
 
 Todo:
 

diff --git a/pdf-parser.py b/pdf-parser.py
@@ -2,7 +2,7 @@
 
 __description__ = 'pdf-parser, use it to parse a PDF document'
 __author__ = 'Didier Stevens'
-__version__ = '0.7.6'
+__version__ = '0.7.7'
 __date__ = '2022/05/24'
 __minimum_python_version__ = (2, 5, 1)
 __maximum_python_version__ = (3, 10, 4)
@@ -72,6 +72,7 @@
   2021/07/03: V0.7.5 bug fixes; fixed ASCII85Decode Python 3 bug thanks to R Primus
   2021/11/23: V0.7.6 Python 3 bug fixes
   2022/05/24: bug fixes
+  2022/11/09: V0.7.7 added support for environment variable DSS_DEFAULT_HASH_ALGORITHMS
 
 Todo:
   - handle printf todo
@@ -139,6 +140,10 @@ def PrintManual():
 By defining PDFPARSER_OPTIONS=-O, pdf-parser will always parse stream objects (when found).
 PS: this feature is experimental.
 
+Option -H calculates the MD5 hash by default.
+This can be changed by setting environment variable DSS_DEFAULT_HASH_ALGORITHMS.
+Like this: set DSS_DEFAULT_HASH_ALGORITHMS=sha256
+
 '''
     for line in manual.split('\n'):
         print(textwrap.fill(line))
@@ -1331,6 +1336,59 @@ def GetArguments():
         return arguments
     return envvar.split(' ') + arguments
 
+class cHashCRC32():
+    def __init__(self):
+        self.crc32 = None
+
+    def update(self, data):
+        self.crc32 = zlib.crc32(data)
+
+    def hexdigest(self):
+        return '%08x' % (self.crc32 & 0xffffffff)
+
+class cHashChecksum8():
+    def __init__(self):
+        self.sum = 0
+
+    def update(self, data):
+        if sys.version_info[0] >= 3:
+            self.sum += sum(data)
+        else:
+            self.sum += sum(map(ord, data))
+
+    def hexdigest(self):
+        return '%08x' % (self.sum)
+
+dSpecialHashes = {'crc32': cHashCRC32, 'checksum8': cHashChecksum8}
+
+def GetHashObjects(algorithms):
+    global dSpecialHashes
+
+    dHashes = {}
+
+    if algorithms == '':
+        algorithms = os.getenv('DSS_DEFAULT_HASH_ALGORITHMS', 'md5')
+    if ',' in algorithms:
+        hashes = algorithms.split(',')
+    else:
+        hashes = algorithms.split(';')
+    for name in hashes:
+        if not name in dSpecialHashes.keys() and not name in hashlib.algorithms_available:
+            print('Error: unknown hash algorithm: %s' % name)
+            print('Available hash algorithms: ' + ' '.join([name for name in list(hashlib.algorithms_available)] + list(dSpecialHashes.keys())))
+            return [], {}
+        elif name in dSpecialHashes.keys():
+            dHashes[name] = dSpecialHashes[name]()
+        else:
+            dHashes[name] = hashlib.new(name)
+
+    return hashes, dHashes
+
+def CalculateChosenHash(data):
+    hashes, dHashes = GetHashObjects('')
+    dHashes[hashes[0]].update(data)
+    return dHashes[hashes[0]].hexdigest(), hashes[0]
+
 def Main():
     """pdf-parser, use it to parse a PDF document
     """
@@ -1393,6 +1451,7 @@ def Main():
         cntStartXref = 0
         cntIndirectObject = 0
         dicObjectTypes = {}
+        objectsWithStream = []
         keywords = ['/JS', '/JavaScript', '/AA', '/OpenAction', '/AcroForm', '/RichMedia', '/Launch', '/EmbeddedFile', '/XFA', '/URI']
         for extrakeyword in ParseINIFile():
             if not extrakeyword in keywords:
@@ -1529,6 +1588,8 @@ def Main():
                         for keyword in dKeywords.keys():
                             if object.ContainsName(keyword):
                                 dKeywords[keyword].append(object.id)
+                        if object.ContainsStream():
+                            objectsWithStream.append(object.id)
                 else:
                     if object.type == PDF_ELEMENT_COMMENT and selectComment:
                         if options.generate:
@@ -1600,7 +1661,8 @@ def Main():
                         elif options.hash:
                             print('obj %d %d' % (object.id, object.version))
                             rawContent = FormatOutput(object.content, True)
-                            print(' len: %d md5: %s' % (len(rawContent), hashlib.md5(rawContent).hexdigest()))
+                            hashHexdigest, hashAlgo = CalculateChosenHash(rawContent.encode('latin'))
+                            print(' len: %d %s: %s' % (len(rawContent), hashAlgo, hashHexdigest))
                             print('')
                         elif options.searchstream:
                             if object.StreamContains(options.searchstream, not options.unfiltered, options.casesensitive, options.regex, options.overridingfilters):
@@ -1641,6 +1703,7 @@ def Main():
             print('Trailer: %s' % cntTrailer)
             print('StartXref: %s' % cntStartXref)
             print('Indirect object: %s' % cntIndirectObject)
+            print('Indirect objects with a stream: %s' % ', '.join([str(id) for id in objectsWithStream]))
             for key in sorted(dicObjectTypes.keys()):
                 print(' %s %d: %s' % (key, len(dicObjectTypes[key]), ', '.join(map(lambda x: '%d' % x, dicObjectTypes[key]))))
             if sum(map(len, dKeywords.values())) > 0:

diff --git a/plugin_dttm.py b/plugin_dttm.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+
+__description__ = 'Word DTTM date/time structure plugin for oledump.py'
+__author__ = 'Didier Stevens'
+__version__ = '0.0.1'
+__date__ = '2022/11/09'
+
+"""
+
+Source code put in public domain by Didier Stevens, no Copyright
+https://DidierStevens.com
+Use at your own risk
+
+History:
+  2022/10/09: start
+  2022/11/09: added option --verbose
+
+Todo:
+"""
+
+import bitstruct
+import datetime
+import collections
+
+#https://interoperability.blob.core.windows.net/files/MS-DOC/%5bMS-DOC%5d.pdf
+
+nDTTM = collections.namedtuple('nDTTM', ['valid', 'null', 'year', 'month', 'day', 'weekday', 'hours', 'minutes'])
+
+def ParseDTTM(data):
+    if data == b'\x00\x00\x00\x00':
+        return nDTTM(True, True, 0, 0, 0, 0, 0, 0)
+    inDTTM = nDTTM(False, False, 0, 0, 0, 0, 0, 0)
+    weekday, year, month, day, hours, minutes = bitstruct.unpack('u3u9u4u5u5u6', data[::-1])
+    if minutes > 0x3B or hours > 0x17 or day == 0 or day > 0x1F or month == 0 or month > 0x0C or weekday > 0x06:
+        return inDTTM
+    year += 1900
+    try:
+        oDatetime = datetime.datetime(year, month, day)
+    except ValueError:
+        return inDTTM
+    weekdayCheck = oDatetime.weekday() + 1
+    if weekdayCheck == 7:
+        weekdayCheck = 0
+    if weekdayCheck != weekday:
+        return inDTTM
+    return nDTTM(True, False, year, month, day, weekday, hours, minutes)
+
+def PrintDTTM(inDTTM):
+    if inDTTM.null:
+        return 'null'
+    dWeekdays = {
+        0: 'Sun',
+        1: 'Mon',
+        2: 'Tue',
+        3: 'Wed',
+        4: 'Thu',
+        5: 'Fri',
+        6: 'Sat',
+    }
+    return '%04d/%02d/%02d(%s) %02d:%02d' % (inDTTM.year, inDTTM.month, inDTTM.day, dWeekdays[inDTTM.weekday], inDTTM.hours, inDTTM.minutes)
+
+class cCLSID(cPluginParent):
+    macroOnly = False
+    name = 'OLE streams plugin'
+
+    def __init__(self, name, stream, options):
+        self.streamname = name
+        self.stream = stream
+        self.options = options
+        self.ran = False
+
+    def Analyze(self):
+        oParser = optparse.OptionParser()
+        oParser.add_option('--minyear', type=int, default=1900, help='Minimum year value (default 1900)')
+        oParser.add_option('--maxyear', type=int, default=3000, help='Maximum year value (default 3000)')
+        oParser.add_option('-V', '--verbose', action='store_true', default=False, help='verbose output')
+        (options, args) = oParser.parse_args(self.options.split(' '))
+
+        result = []
+        self.ran = True
+        stream = self.stream
+        positionPotentialDOPs = []
+        potentialPrevious = []
+
+        dPrevious = {
+            0x6805: 'sprmCDttmRMark',
+            0x6864: 'sprmCDttmRMarkDel',
+        }
+
+        for iter in range(len(stream) - 4):
+            data = self.stream[iter:iter + 4]
+            inDTTM = ParseDTTM(data)
+            if not inDTTM.valid:
+                continue
+            if inDTTM.null:
+                continue
+            if inDTTM.year < options.minyear or inDTTM.year > options.maxyear:
+                continue
+            if options.verbose:
+                result.append('0x%08x: %s' % (iter, PrintDTTM(inDTTM)))
+            if iter >= 2:
+                previous = struct.unpack('<H', self.stream[iter - 2:iter])[0]
+                if previous == 0x0000:
+                    positionPotentialDOPs.append(iter)
+                if previous in dPrevious:
+                    potentialPrevious.append([previous, iter])
+
+        validDOPs = []
+        for position in positionPotentialDOPs:
+            format = '<HIIIHI'
+            lengthData = 3*4 + struct.calcsize(format)
+            data = stream[position:position+lengthData]
+            if len(data) != lengthData:
+                continue
+            dataCreated = data[:4]
+            dataRevised = data[4:8]
+            dataLastPrint = data[8:12]
+            inDTTMCreated = ParseDTTM(dataCreated)
+            inDTTMRevised = ParseDTTM(dataRevised)
+            inDTTMLastPrint = ParseDTTM(dataLastPrint)
+            if inDTTMCreated.valid and (inDTTMRevised.valid or inDTTMRevised.null) and (inDTTMLastPrint.valid or inDTTMLastPrint.null):
+                validDOPs.append('Position DOP: 0x%08x' % position)
+                validDOPs.append(' dttmCreated:   %s %s' % (PrintDTTM(inDTTMCreated), binascii.b2a_hex(dataCreated)))
+                validDOPs.append(' dttmRevised:   %s %s' % (PrintDTTM(inDTTMRevised), binascii.b2a_hex(dataRevised)))
+                validDOPs.append(' dttmLastPrint: %s %s' % (PrintDTTM(inDTTMLastPrint), binascii.b2a_hex(dataLastPrint)))
+                metadata = struct.unpack(format, data[12:])
+                validDOPs.append(' nRevision: %d' % metadata[0])
+                validDOPs.append(' tmEdited: %d' % metadata[1])
+                validDOPs.append(' cWords: %d' % metadata[2])
+                validDOPs.append(' cCh: %d' % metadata[3])
+                validDOPs.append(' cPg: %d' % metadata[4])
+                validDOPs.append(' cParas: %d' % metadata[5])
+        result.extend(validDOPs)
+
+        previousOutput = []
+        for previous, position in potentialPrevious:
+            dataDTTM = stream[position:position+4]
+            previousOutput.append(' %s: %s' % (dPrevious.get(previous, '0x%04x' % previous), PrintDTTM(ParseDTTM(dataDTTM))))
+
+        if len(previousOutput) > 0:
+            result.append('DTTMs:')
+            result.extend(previousOutput)
+
+        return result
+
+AddPlugin(cCLSID)