Skip to content

Commit

Permalink
20221109a
Browse files Browse the repository at this point in the history
  • Loading branch information
DidierStevens committed Nov 9, 2022
1 parent 4977478 commit a598ffa
Show file tree
Hide file tree
Showing 4 changed files with 304 additions and 14 deletions.
5 changes: 3 additions & 2 deletions oledump.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

__description__ = 'Analyze OLE files (Compound Binary Files)'
__author__ = 'Didier Stevens'
__version__ = '0.0.70'
__date__ = '2022/09/04'
__version__ = '0.0.71'
__date__ = '2022/11/09'

"""
Expand Down Expand Up @@ -119,6 +119,7 @@
2022/06/07: 0.0.68 added extra info parameters %CTIME% %MTIME% %CTIMEHEX% %MTIMEHEX%
2022/07/22: 0.0.69 minor documentation change
2022/09/04: 0.0.70 bumping version for update to plugin(s), no changes to oledump.py
2022/11/09: 0.0.71 bumping version for update to plugin(s), no changes to oledump.py
Todo:
Expand Down
67 changes: 65 additions & 2 deletions pdf-parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

__description__ = 'pdf-parser, use it to parse a PDF document'
__author__ = 'Didier Stevens'
__version__ = '0.7.6'
__version__ = '0.7.7'
__date__ = '2022/05/24'
__minimum_python_version__ = (2, 5, 1)
__maximum_python_version__ = (3, 10, 4)
Expand Down Expand Up @@ -72,6 +72,7 @@
2021/07/03: V0.7.5 bug fixes; fixed ASCII85Decode Python 3 bug thanks to R Primus
2021/11/23: V0.7.6 Python 3 bug fixes
2022/05/24: bug fixes
2022/11/09: V0.7.7 added support for environment variable DSS_DEFAULT_HASH_ALGORITHMS
Todo:
- handle printf todo
Expand Down Expand Up @@ -139,6 +140,10 @@ def PrintManual():
By defining PDFPARSER_OPTIONS=-O, pdf-parser will always parse stream objects (when found).
PS: this feature is experimental.
Option -H calculates the MD5 hash by default.
This can be changed by setting environment variable DSS_DEFAULT_HASH_ALGORITHMS.
Like this: set DSS_DEFAULT_HASH_ALGORITHMS=sha256
'''
for line in manual.split('\n'):
print(textwrap.fill(line))
Expand Down Expand Up @@ -1331,6 +1336,59 @@ def GetArguments():
return arguments
return envvar.split(' ') + arguments

class cHashCRC32():
def __init__(self):
self.crc32 = None

def update(self, data):
self.crc32 = zlib.crc32(data)

def hexdigest(self):
return '%08x' % (self.crc32 & 0xffffffff)

class cHashChecksum8():
def __init__(self):
self.sum = 0

def update(self, data):
if sys.version_info[0] >= 3:
self.sum += sum(data)
else:
self.sum += sum(map(ord, data))

def hexdigest(self):
return '%08x' % (self.sum)

dSpecialHashes = {'crc32': cHashCRC32, 'checksum8': cHashChecksum8}

def GetHashObjects(algorithms):
global dSpecialHashes

dHashes = {}

if algorithms == '':
algorithms = os.getenv('DSS_DEFAULT_HASH_ALGORITHMS', 'md5')
if ',' in algorithms:
hashes = algorithms.split(',')
else:
hashes = algorithms.split(';')
for name in hashes:
if not name in dSpecialHashes.keys() and not name in hashlib.algorithms_available:
print('Error: unknown hash algorithm: %s' % name)
print('Available hash algorithms: ' + ' '.join([name for name in list(hashlib.algorithms_available)] + list(dSpecialHashes.keys())))
return [], {}
elif name in dSpecialHashes.keys():
dHashes[name] = dSpecialHashes[name]()
else:
dHashes[name] = hashlib.new(name)

return hashes, dHashes

def CalculateChosenHash(data):
hashes, dHashes = GetHashObjects('')
dHashes[hashes[0]].update(data)
return dHashes[hashes[0]].hexdigest(), hashes[0]

def Main():
"""pdf-parser, use it to parse a PDF document
"""
Expand Down Expand Up @@ -1393,6 +1451,7 @@ def Main():
cntStartXref = 0
cntIndirectObject = 0
dicObjectTypes = {}
objectsWithStream = []
keywords = ['/JS', '/JavaScript', '/AA', '/OpenAction', '/AcroForm', '/RichMedia', '/Launch', '/EmbeddedFile', '/XFA', '/URI']
for extrakeyword in ParseINIFile():
if not extrakeyword in keywords:
Expand Down Expand Up @@ -1529,6 +1588,8 @@ def Main():
for keyword in dKeywords.keys():
if object.ContainsName(keyword):
dKeywords[keyword].append(object.id)
if object.ContainsStream():
objectsWithStream.append(object.id)
else:
if object.type == PDF_ELEMENT_COMMENT and selectComment:
if options.generate:
Expand Down Expand Up @@ -1600,7 +1661,8 @@ def Main():
elif options.hash:
print('obj %d %d' % (object.id, object.version))
rawContent = FormatOutput(object.content, True)
print(' len: %d md5: %s' % (len(rawContent), hashlib.md5(rawContent).hexdigest()))
hashHexdigest, hashAlgo = CalculateChosenHash(rawContent.encode('latin'))
print(' len: %d %s: %s' % (len(rawContent), hashAlgo, hashHexdigest))
print('')
elif options.searchstream:
if object.StreamContains(options.searchstream, not options.unfiltered, options.casesensitive, options.regex, options.overridingfilters):
Expand Down Expand Up @@ -1641,6 +1703,7 @@ def Main():
print('Trailer: %s' % cntTrailer)
print('StartXref: %s' % cntStartXref)
print('Indirect object: %s' % cntIndirectObject)
print('Indirect objects with a stream: %s' % ', '.join([str(id) for id in objectsWithStream]))
for key in sorted(dicObjectTypes.keys()):
print(' %s %d: %s' % (key, len(dicObjectTypes[key]), ', '.join(map(lambda x: '%d' % x, dicObjectTypes[key]))))
if sum(map(len, dKeywords.values())) > 0:
Expand Down
146 changes: 146 additions & 0 deletions plugin_dttm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#!/usr/bin/env python

__description__ = 'Word DTTM date/time structure plugin for oledump.py'
__author__ = 'Didier Stevens'
__version__ = '0.0.1'
__date__ = '2022/11/09'

"""
Source code put in public domain by Didier Stevens, no Copyright
https://DidierStevens.com
Use at your own risk
History:
2022/10/09: start
2022/11/09: added option --verbose
Todo:
"""

import bitstruct
import datetime
import collections

#https://interoperability.blob.core.windows.net/files/MS-DOC/%5bMS-DOC%5d.pdf

nDTTM = collections.namedtuple('nDTTM', ['valid', 'null', 'year', 'month', 'day', 'weekday', 'hours', 'minutes'])

def ParseDTTM(data):
if data == b'\x00\x00\x00\x00':
return nDTTM(True, True, 0, 0, 0, 0, 0, 0)
inDTTM = nDTTM(False, False, 0, 0, 0, 0, 0, 0)
weekday, year, month, day, hours, minutes = bitstruct.unpack('u3u9u4u5u5u6', data[::-1])
if minutes > 0x3B or hours > 0x17 or day == 0 or day > 0x1F or month == 0 or month > 0x0C or weekday > 0x06:
return inDTTM
year += 1900
try:
oDatetime = datetime.datetime(year, month, day)
except ValueError:
return inDTTM
weekdayCheck = oDatetime.weekday() + 1
if weekdayCheck == 7:
weekdayCheck = 0
if weekdayCheck != weekday:
return inDTTM
return nDTTM(True, False, year, month, day, weekday, hours, minutes)

def PrintDTTM(inDTTM):
if inDTTM.null:
return 'null'
dWeekdays = {
0: 'Sun',
1: 'Mon',
2: 'Tue',
3: 'Wed',
4: 'Thu',
5: 'Fri',
6: 'Sat',
}
return '%04d/%02d/%02d(%s) %02d:%02d' % (inDTTM.year, inDTTM.month, inDTTM.day, dWeekdays[inDTTM.weekday], inDTTM.hours, inDTTM.minutes)

class cCLSID(cPluginParent):
macroOnly = False
name = 'OLE streams plugin'

def __init__(self, name, stream, options):
self.streamname = name
self.stream = stream
self.options = options
self.ran = False

def Analyze(self):
oParser = optparse.OptionParser()
oParser.add_option('--minyear', type=int, default=1900, help='Minimum year value (default 1900)')
oParser.add_option('--maxyear', type=int, default=3000, help='Maximum year value (default 3000)')
oParser.add_option('-V', '--verbose', action='store_true', default=False, help='verbose output')
(options, args) = oParser.parse_args(self.options.split(' '))

result = []
self.ran = True
stream = self.stream
positionPotentialDOPs = []
potentialPrevious = []

dPrevious = {
0x6805: 'sprmCDttmRMark',
0x6864: 'sprmCDttmRMarkDel',
}

for iter in range(len(stream) - 4):
data = self.stream[iter:iter + 4]
inDTTM = ParseDTTM(data)
if not inDTTM.valid:
continue
if inDTTM.null:
continue
if inDTTM.year < options.minyear or inDTTM.year > options.maxyear:
continue
if options.verbose:
result.append('0x%08x: %s' % (iter, PrintDTTM(inDTTM)))
if iter >= 2:
previous = struct.unpack('<H', self.stream[iter - 2:iter])[0]
if previous == 0x0000:
positionPotentialDOPs.append(iter)
if previous in dPrevious:
potentialPrevious.append([previous, iter])

validDOPs = []
for position in positionPotentialDOPs:
format = '<HIIIHI'
lengthData = 3*4 + struct.calcsize(format)
data = stream[position:position+lengthData]
if len(data) != lengthData:
continue
dataCreated = data[:4]
dataRevised = data[4:8]
dataLastPrint = data[8:12]
inDTTMCreated = ParseDTTM(dataCreated)
inDTTMRevised = ParseDTTM(dataRevised)
inDTTMLastPrint = ParseDTTM(dataLastPrint)
if inDTTMCreated.valid and (inDTTMRevised.valid or inDTTMRevised.null) and (inDTTMLastPrint.valid or inDTTMLastPrint.null):
validDOPs.append('Position DOP: 0x%08x' % position)
validDOPs.append(' dttmCreated: %s %s' % (PrintDTTM(inDTTMCreated), binascii.b2a_hex(dataCreated)))
validDOPs.append(' dttmRevised: %s %s' % (PrintDTTM(inDTTMRevised), binascii.b2a_hex(dataRevised)))
validDOPs.append(' dttmLastPrint: %s %s' % (PrintDTTM(inDTTMLastPrint), binascii.b2a_hex(dataLastPrint)))
metadata = struct.unpack(format, data[12:])
validDOPs.append(' nRevision: %d' % metadata[0])
validDOPs.append(' tmEdited: %d' % metadata[1])
validDOPs.append(' cWords: %d' % metadata[2])
validDOPs.append(' cCh: %d' % metadata[3])
validDOPs.append(' cPg: %d' % metadata[4])
validDOPs.append(' cParas: %d' % metadata[5])
result.extend(validDOPs)

previousOutput = []
for previous, position in potentialPrevious:
dataDTTM = stream[position:position+4]
previousOutput.append(' %s: %s' % (dPrevious.get(previous, '0x%04x' % previous), PrintDTTM(ParseDTTM(dataDTTM))))

if len(previousOutput) > 0:
result.append('DTTMs:')
result.extend(previousOutput)

return result

AddPlugin(cCLSID)
Loading

0 comments on commit a598ffa

Please sign in to comment.