From 7c2ff7a5e0c0da0e1c3579141ab0e49adc30c6c7 Mon Sep 17 00:00:00 2001 From: Didier Stevens Date: Wed, 13 Dec 2017 19:40:43 +0100 Subject: [PATCH] 20171213a --- format-bytes.py | 949 ++++++++++++++++++++++++++++++++++++++++++++++++ oledump.py | 15 +- plugin_biff.py | 40 +- 3 files changed, 986 insertions(+), 18 deletions(-) create mode 100644 format-bytes.py diff --git a/format-bytes.py b/format-bytes.py new file mode 100644 index 0000000..6eb2833 --- /dev/null +++ b/format-bytes.py @@ -0,0 +1,949 @@ +#!/usr/bin/env python + +__description__ = 'This is essentialy a wrapper for the struct module' +__author__ = 'Didier Stevens' +__version__ = '0.0.2' +__date__ = '2017/12/10' + +""" +Source code put in public domain by Didier Stevens, no Copyright +https://DidierStevens.com +Use at your own risk + +History: + 2016/12/03: start + 2017/06/16: refactoring to cBinaryFile + 2017/07/11: added CutData + 2017/11/04: 0.0.2 refactoring; continued; added options -c & -s + 2017/11/18: added option -f + 2017/12/01: updated FilenameCheckHash to handle empty file: # + 2017/12/10: added manual + +Todo: +""" + +import optparse +import sys +import os +import zipfile +import binascii +import random +import gzip +import collections +import glob +import textwrap +import re +import struct +import string +import math +import time +if sys.version_info[0] >= 3: + from io import BytesIO as DataIO +else: + from cStringIO import StringIO as DataIO + +def PrintManual(): + manual = r''' +Manual: + +This tool is essentialy a wrapper for the Python module struct. + +It reads one or more files or stdin and parses the content according to different formats. This tool is very versatile when it comes to handling files, later full details will be provided. + +This Python script was developed with Python 2.7 and tested with Python 2.7 and 3.5. + +Example: + +format-bytes.py random.bin +File: random.bin +s:signed u:unsigned l:little-endian b:big-endian m:mixed-endian +1I: s -69 u 187 +2I: sl 26043 ul 26043 sb -17563 ub 47973 +4I: sl 881419707 ul 881419707 sb -1150973644 ub 3143993652 +4F: l 0.000000 b -0.003502 +4N: b 187.101.137.52 l 52.137.101.187 +4E: l 1997/12/06 14:48:27 b 2069/08/17 19:34:12 +8I: sl -3535861847371979333 ul 14910882226337572283 sb -4943394157892145458 ub 13503349915817406158 +8F: l -1661678170725283018588028971660576297715302893638508902075603349019820032.000000 b -0.000000 +16G: b BB658934-6218-EECE-3AC3-179F6B7428FB m {348965BB-1862-CEEE-3AC3-179F6B7428FB} + +By default, format-bytes.py reads the first 16 bytes (if available) of the file(s) provided as argument, and parses these bytes as: + Integer: I + Float: F + IPv4 address: N + epoch: E + GUID: G + +1I is a 8-bit integer, 2I is a 16-bit integer, ... + +Bytes are interpreted in little-endian (l), big-endian (b) and mixed-endian (m) format. Mixed-endian is only used for GUIDs (G). +Integers can be signed (s) or unsigned (u). + +Use option -f to specify how bytes should be parsed: this option takes a Python struct format string. +Example: + +format-bytes.py -f hib random.bin +File: random.bin + 0: 26043 65bb + 1: -823256990 -3111e79e + 2: 58 3a + +FYI, Python struct module format characters are: + +Character Byte order +-------------------- +@ native += native +< little-endian +> big-endian +! network (= big-endian) + +Format C Type Standard size +----------------------------------------- +x pad byte +c char 1 +b signed 1 +B unsigned char 1 +? _Bool 1 +h short 2 +H unsigned short 2 +i int 4 +I unsigned int 4 +l long 4 +L unsigned long 4 +q long long 8 +Q unsigned long long 8 +f float 4 +d double 8 +s char[] +p char[] +P void * + +To parse a repeating sequence of bytes, use options --count (to specify the number of repetitions) and --step (to specify the number bytes between repeats). +Example: + +format-bytes.py -c 2 -s 4 random.bin +File: random.bin +s:signed u:unsigned l:little-endian b:big-endian m:mixed-endian +00 1I: s -69 u 187 +00 2I: sl 26043 ul 26043 sb -17563 ub 47973 +00 4I: sl 881419707 ul 881419707 sb -1150973644 ub 3143993652 +00 4F: l 0.000000 b -0.003502 +00 4N: b 187.101.137.52 l 52.137.101.187 +00 4E: l 1997/12/06 14:48:27 b 2069/08/17 19:34:12 +00 8I: sl -3535861847371979333 ul 14910882226337572283 sb -4943394157892145458 ub 13503349915817406158 +00 8F: l -1661678170725283018588028971660576297715302893638508902075603349019820032.000000 b -0.000000 +00 16G: b BB658934-6218-EECE-3AC3-179F6B7428FB m {348965BB-1862-CEEE-3AC3-179F6B7428FB} +04 1I: s 98 u 98 +04 2I: sl 6242 ul 6242 sb 25112 ub 25112 +04 4I: sl -823256990 ul 3471710306 sb 1645801166 ub 1645801166 +04 4F: l -1997287680.000000 b 705278197607520272384.000000 +04 4N: b 98.24.238.206 l 206.238.24.98 +04 4E: l 2080/01/05 19:58:26 b 2022/02/25 14:59:26 +04 8I: sl -6982898039867434910 ul 11463846033842116706 sb 7068662184674531231 ub 7068662184674531231 +04 8F: l -0.000000 b 358946151129582029215291849393230786808315346836706673156033999581834828933214436444158528577134449241373022018959436034143150814561128186558682352782632064229834752.000000 +04 16G: b 6218EECE-3AC3-179F-6B74-28FBEB2AD62A m {CEEE1862-C33A-9F17-6B74-28FBEB2AD62A} + + +As stated at the beginning of this manual, this tool is very versatile when it comes to handling files. This will be explained now. + +This tool reads files in binary mode. It can read files from disk, from standard input (stdin) and from "generated" files via the command line. +It can also partially read files (this is done with the cut operator). + +If no file arguments are provided to this tool, it will read data from standard input (stdin). This way, this tool can be used in a piped chain of commands, like this: + +oledump.py -s 4 -d sample.doc.vir | tool.py + +When one or more file arguments are provided to this tool, it will read the files and process the content. +How the files are read, depends on the type of file arguments that are provided. File arguments that start with character @ or # have special meaning, and will be explained later. + +If a file argument does not start with @ or #, it is considered to be a file on disk and the content will be read from disk. +If the file is not a compressed file, the binary content of the file is read from disk for processing. +Compressed files are solely recognized based on their extension: .zip and .gz. +If a file argument with extension .gz is provided, the tool will decompress the gzip file in memory and process the decompressed content. No checks are made to ensure that the file with extension .gz is an actual gzip compressed file. +If a file argument with extension .zip is provided, the tool will extract the first file (or only file) from the ZIP file in memory and process the decompressed content. No checks are made to ensure that the file with extension .zip is an actual ZIP compressed file. +Password protected ZIP files can be processed too. The tool uses password 'infected' (without quotes) as default password. A different password can be provided using option --password. + +Example: + +tool.py sample.zip + +To prevent the tool from decompressing .zip or .gz files, but to process the compressed file itself, use option --noextraction. + +File arguments that start with character @ ("here files"), are read as text files that contain file arguments (one per line) to be processed. +For example, we take a text file with filename list.txt and following content: + +sample-1.bin +sample-5.bin +sample-7.bin + +When using this file (list.txt) in the following command: + +tool.py @list.txt + +the tool will process the following files: sample-1.bin, sample-5.bin and sample-7.bin. + +Wildcards are supported too. The classic *, ? and [] wildcard characters are supported. For example, use the following command to process all .exe and .dll files in the Windows directory: + +tool.py C:\Windows\*.exe C:\Windows\*.dll + +To prevent the tool from processing file arguments with wildcard characters or special initial characters (@ and #) differently, but to process them as normal files, use option --literalfilenames. + +File arguments that start with character # have special meaning. These are not processed as actual files on disk (except when option --literalfilenames is used), but as file arguments that specify how to "generate" the file content. + +File arguments that start with #, #h#, #b# or #e# are used to "generate" the file content. +Arguments that start with #c# are not file arguments, but cut operators (explained later). + +Generating the file content with a # file argument means that the file content is not read from disk, but generated in memory based on the characteristics provided via the file argument. + +When a file argument starts with # (and not with #h#, #b#, #e# or #c#), all characters that follow the # character specify the content of the generated file. +For example, file argument #ABCDE specifies a file containing exactly 5 bytes: ASCII characters A, B, C, D and E. +Thus the following command: + +tool.py #ABCDE + +will make the tool process data with binary content ABCDE. #ABCDE is not an actual file written on disk, but it is a notational convention to provide data via the command line. + +Since this notation can not be used to specify all possible byte values, hexadecimal encoding (#h#) and BASE64 encoding (#b#) notation is supported too. +For example, #h#4142434445 is an hexadecimal notation that generates data ABCDE. Hexadecimal notation allows the generation of non-printable characters for example, like NULL bytes: #h#00 +File argument #b#QUJDREU= is another example, this time BASE64 notation, that generates data ABCDE. + +File arguments that start with #e# are a notational convention to use expressions to generate data. An expression is a single function or the concatenation of several functions (using character + as concatenation operator). +4 functions are available: random, loremipsum, repeat and chr. + +Function random takes exactly one argument: an integer (with value 1 or more). Integers can be specified using decimal notation or hexadecimal notation (prefix 0x). +The random function generates a sequence of bytes with a random value (between 0 and 255), the argument specifies how many bytes need to be generated. Remark that the random number generator that is used is just the Python random number generator, not a cryptographic random number generator. + +Example: + +tool.py #e#random(100) + +will make the tool process data consisting of a sequence of 100 random bytes. + +Function loremipsum takes exactly one argument: an integer (with value 1 or more). +The loremipsum function generates "lorem ipsum" text (fake latin), the argument specifies the number of sentences to generate. + +Example: #e#loremipsum(2) generates this text: +Ipsum commodo proin pulvinar hac vel nunc dignissim neque eget odio erat magna lorem urna cursus fusce facilisis porttitor congue eleifend taciti. Turpis duis suscipit facilisi tristique dictum praesent natoque sem mi egestas venenatis per dui sit sodales est condimentum habitasse ipsum phasellus non bibendum hendrerit. + +Function chr takes one argument or two arguments. +chr with one argument takes an integer between 0 and 255, and generates a single byte with the value specified by the integer. +chr with two arguments takes two integers between 0 and 255, and generates a byte sequence with the values specified by the integers. +For example #e#chr(0x41,0x45) generates data ABCDE. + +Function repeat takes two arguments: an integer (with value 1 or more) and a byte sequence. This byte sequence can be a quoted string of characters (single quotes), like 'ABCDE' or an hexadecimal string prefixed with 0x, like 0x4142434445. +The repeat function will create a sequence of bytes consisting of the provided byte sequence (the second argument) repeated as many times as specified by the first argument. +For example, #e#repeat(3, 'AB') generates byte sequence ABABAB. + +When more than one function needs to be used, the byte sequences generated by the functions can be concatenated with the + operator. +For example, #e#repeat(10,0xFF)+random(100) will generate a byte sequence of 10 FF bytes followed by 100 random bytes. + +The cut argument (or cut operator) allows for the partial selection of the content of a file. This argument starts with #c# followed by a "cut-expression". Use this expression to "cut out" part of the content. +The cut-argument must be put in front of a file argument, like in this example: + +tool.py #c#0:100l data.bin + +With these arguments, tool.py will only process the first 100 bytes (0:100l) of file data.bin. + +A cut argument is applied to all file arguments that follow it. Example: + +tool.py #c#0:100l data-1.bin data-2.bin + +With these arguments, tool.py will only process the first 100 bytes (0:100l) of file data-1.bin and the first 100 bytes file data-2.bin. + +More than one cut argument can be used, like in this example: + +tool.py #c#0:100l data-1.bin #c#0:200l data-2.bin + +With these arguments, tool.py will only process the first 100 bytes (0:100l) of file data-1.bin and the first 200 bytes (0:200l) of file data-2.bin. + +A cut-expression is composed of 2 terms separated by a colon (:), like this: +termA:termB +termA and termB can be: +- nothing (an empty string) +- a positive decimal number; example: 10 +- an hexadecimal number (to be preceded by 0x); example: 0x10 +- a case sensitive string to search for (surrounded by square brackets and single quotes); example: ['MZ'] +- an hexadecimal string to search for (surrounded by square brackets); example: [d0cf11e0] +If termA is nothing, then the cut section of bytes starts with the byte at position 0. +If termA is a number, then the cut section of bytes starts with the byte at the position given by the number (first byte has index 0). +If termA is a string to search for, then the cut section of bytes starts with the byte at the position where the string is first found. If the string is not found, the cut is empty (0 bytes). +If termB is nothing, then the cut section of bytes ends with the last byte. +If termB is a number, then the cut section of bytes ends with the byte at the position given by the number (first byte has index 0). +When termB is a number, it can have suffix letter l. This indicates that the number is a length (number of bytes), and not a position. +termB can also be a negative number (decimal or hexademical): in that case the position is counted from the end of the file. For example, :-5 selects the complete file except the last 5 bytes. +If termB is a string to search for, then the cut section of bytes ends with the last byte at the position where the string is first found. If the string is not found, the cut is empty (0 bytes). +No checks are made to assure that the position specified by termA is lower than the position specified by termB. This is left up to the user. +Search string expressions (ASCII and hexadecimal) can be followed by an instance (a number equal to 1 or greater) to indicate which instance needs to be taken. For example, ['ABC']2 will search for the second instance of string 'ABC'. If this instance is not found, then nothing is selected. +Search string expressions (ASCII and hexadecimal) can be followed by an offset (+ or - a number) to add (or substract) an offset to the found instance. For example, ['ABC']+3 will search for the first instance of string 'ABC' and then select the bytes after ABC (+ 3). +Finally, search string expressions (ASCII and hexadecimal) can be followed by an instance and an offset. +Examples: +This cut-expression can be used to dump the first 256 bytes of a PE file located inside the file content: ['MZ']:0x100l +This cut-expression can be used to dump the OLE file located inside the file content: [d0cf11e0]: + +''' + for line in manual.split('\n'): + print(textwrap.fill(line, 79)) + +#Convert 2 Bytes If Python 3 +def C2BIP3(string): + if sys.version_info[0] > 2: + return bytes([ord(x) for x in string]) + else: + return string + +#Convert 2 Integer If Python 2 +def C2IIP2(data): + if sys.version_info[0] > 2: + return data + else: + return ord(data) + +# CIC: Call If Callable +def CIC(expression): + if callable(expression): + return expression() + else: + return expression + +# IFF: IF Function +def IFF(expression, valueTrue, valueFalse): + if expression: + return CIC(valueTrue) + else: + return CIC(valueFalse) + +#---------------------------------------------------------------------------------------------------- +#import random +#import binascii +#import zipfile +#import gzip +#import sys +#if sys.version_info[0] >= 3: +# from io import BytesIO as DataIO +#else: +# from cStringIO import StringIO as DataIO + +def LoremIpsumSentence(minimum, maximum): + words = ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing', 'elit', 'etiam', 'tortor', 'metus', 'cursus', 'sed', 'sollicitudin', 'ac', 'sagittis', 'eget', 'massa', 'praesent', 'sem', 'fermentum', 'dignissim', 'in', 'vel', 'augue', 'scelerisque', 'auctor', 'libero', 'nam', 'a', 'gravida', 'odio', 'duis', 'vestibulum', 'vulputate', 'quam', 'nec', 'cras', 'nibh', 'feugiat', 'ut', 'vitae', 'ornare', 'justo', 'orci', 'varius', 'natoque', 'penatibus', 'et', 'magnis', 'dis', 'parturient', 'montes', 'nascetur', 'ridiculus', 'mus', 'curabitur', 'nisl', 'egestas', 'urna', 'iaculis', 'lectus', 'maecenas', 'ultrices', 'velit', 'eu', 'porta', 'hac', 'habitasse', 'platea', 'dictumst', 'integer', 'id', 'commodo', 'mauris', 'interdum', 'malesuada', 'fames', 'ante', 'primis', 'faucibus', 'accumsan', 'pharetra', 'aliquam', 'nunc', 'at', 'est', 'non', 'leo', 'nulla', 'sodales', 'porttitor', 'facilisis', 'aenean', 'condimentum', 'rutrum', 'facilisi', 'tincidunt', 'laoreet', 'ultricies', 'neque', 'diam', 'euismod', 'consequat', 'tempor', 'elementum', 'lobortis', 'erat', 'ligula', 'risus', 'donec', 'phasellus', 'quisque', 'vivamus', 'pellentesque', 'tristique', 'venenatis', 'purus', 'mi', 'dictum', 'posuere', 'fringilla', 'quis', 'magna', 'pretium', 'felis', 'pulvinar', 'lacinia', 'proin', 'viverra', 'lacus', 'suscipit', 'aliquet', 'dui', 'molestie', 'dapibus', 'mollis', 'suspendisse', 'sapien', 'blandit', 'morbi', 'tellus', 'enim', 'maximus', 'semper', 'arcu', 'bibendum', 'convallis', 'hendrerit', 'imperdiet', 'finibus', 'fusce', 'congue', 'ullamcorper', 'placerat', 'nullam', 'eros', 'habitant', 'senectus', 'netus', 'turpis', 'luctus', 'volutpat', 'rhoncus', 'mattis', 'nisi', 'ex', 'tempus', 'eleifend', 'vehicula', 'class', 'aptent', 'taciti', 'sociosqu', 'ad', 'litora', 'torquent', 'per', 'conubia', 'nostra', 'inceptos', 'himenaeos'] + sample = random.sample(words, random.randint(minimum, maximum)) + sample[0] = sample[0].capitalize() + return ' '.join(sample) + '.' + +def LoremIpsum(sentences): + return ' '.join([LoremIpsumSentence(15, 30) for i in range(sentences)]) + +STATE_START = 0 +STATE_IDENTIFIER = 1 +STATE_STRING = 2 +STATE_SPECIAL_CHAR = 3 +STATE_ERROR = 4 + +def Tokenize(expression): + result = [] + token = '' + state = STATE_START + while expression != '': + char = expression[0] + expression = expression[1:] + if char == "'": + if state == STATE_START: + state = STATE_STRING + elif state == STATE_IDENTIFIER: + result.append([STATE_IDENTIFIER, token]) + state = STATE_STRING + token = '' + elif state == STATE_STRING: + result.append([STATE_STRING, token]) + state = STATE_START + token = '' + elif char >= '0' and char <= '9' or char.lower() >= 'a' and char.lower() <= 'z': + if state == STATE_START: + token = char + state = STATE_IDENTIFIER + else: + token += char + elif char == ' ': + if state == STATE_IDENTIFIER: + result.append([STATE_IDENTIFIER, token]) + token = '' + state = STATE_START + elif state == STATE_STRING: + token += char + else: + if state == STATE_IDENTIFIER: + result.append([STATE_IDENTIFIER, token]) + token = '' + state = STATE_START + result.append([STATE_SPECIAL_CHAR, char]) + elif state == STATE_STRING: + token += char + else: + result.append([STATE_SPECIAL_CHAR, char]) + token = '' + if state == STATE_IDENTIFIER: + result.append([state, token]) + elif state == STATE_STRING: + result = [[STATE_ERROR, 'Error: string not closed', token]] + return result + +def ParseFunction(tokens): + if len(tokens) == 0: + print('Parsing error') + return None, tokens + if tokens[0][0] != STATE_IDENTIFIER: + print('Parsing error') + return None, tokens + function = tokens[0][1] + tokens = tokens[1:] + if len(tokens) == 0: + print('Parsing error') + return None, tokens + if tokens[0][0] != STATE_SPECIAL_CHAR or tokens[0][1] != '(': + print('Parsing error') + return None, tokens + tokens = tokens[1:] + if len(tokens) == 0: + print('Parsing error') + return None, tokens + arguments = [] + while True: + if tokens[0][0] != STATE_IDENTIFIER and tokens[0][0] != STATE_STRING: + print('Parsing error') + return None, tokens + arguments.append(tokens[0]) + tokens = tokens[1:] + if len(tokens) == 0: + print('Parsing error') + return None, tokens + if tokens[0][0] != STATE_SPECIAL_CHAR or (tokens[0][1] != ',' and tokens[0][1] != ')'): + print('Parsing error') + return None, tokens + if tokens[0][0] == STATE_SPECIAL_CHAR and tokens[0][1] == ')': + tokens = tokens[1:] + break + tokens = tokens[1:] + if len(tokens) == 0: + print('Parsing error') + return None, tokens + return [[function, arguments], tokens] + +def Parse(expression): + tokens = Tokenize(expression) + if len(tokens) == 0: + print('Parsing error') + return None + if tokens[0][0] == STATE_ERROR: + print(tokens[0][1]) + print(tokens[0][2]) + print(expression) + return None + functioncalls = [] + while True: + functioncall, tokens = ParseFunction(tokens) + if functioncall == None: + return None + functioncalls.append(functioncall) + if len(tokens) == 0: + return functioncalls + if tokens[0][0] != STATE_SPECIAL_CHAR or tokens[0][1] != '+': + print('Parsing error') + return None + tokens = tokens[1:] + +def InterpretInteger(token): + if token[0] != STATE_IDENTIFIER: + return None + try: + return int(token[1]) + except: + return None + +def Hex2Bytes(hexadecimal): + if len(hexadecimal) % 2 == 1: + hexadecimal = '0' + hexadecimal + try: + return binascii.a2b_hex(hexadecimal) + except: + return None + +def InterpretHexInteger(token): + if token[0] != STATE_IDENTIFIER: + return None + if not token[1].startswith('0x'): + return None + bytes = Hex2Bytes(token[1][2:]) + if bytes == None: + return None + integer = 0 + for byte in bytes: + integer = integer * 0x100 + C2IIP2(byte) + return integer + +def InterpretNumber(token): + number = InterpretInteger(token) + if number == None: + return InterpretHexInteger(token) + else: + return number + +def InterpretBytes(token): + if token[0] == STATE_STRING: + return token[1] + if token[0] != STATE_IDENTIFIER: + return None + if not token[1].startswith('0x'): + return None + return Hex2Bytes(token[1][2:]) + +def CheckFunction(functionname, arguments, countarguments, maxcountarguments=None): + if maxcountarguments == None: + if countarguments == 0 and len(arguments) != 0: + print('Error: function %s takes no arguments, %d are given' % (functionname, len(arguments))) + return True + if countarguments == 1 and len(arguments) != 1: + print('Error: function %s takes 1 argument, %d are given' % (functionname, len(arguments))) + return True + if countarguments != len(arguments): + print('Error: function %s takes %d arguments, %d are given' % (functionname, countarguments, len(arguments))) + return True + else: + if len(arguments) < countarguments or len(arguments) > maxcountarguments: + print('Error: function %s takes between %d and %d arguments, %d are given' % (functionname, countarguments, maxcountarguments, len(arguments))) + return True + return False + +def CheckNumber(argument, minimum=None, maximum=None): + number = InterpretNumber(argument) + if number == None: + print('Error: argument should be a number: %s' % argument[1]) + return None + if minimum != None and number < minimum: + print('Error: argument should be minimum %d: %d' % (minimum, number)) + return None + if maximum != None and number > maximum: + print('Error: argument should be maximum %d: %d' % (maximum, number)) + return None + return number + +FUNCTIONNAME_REPEAT = 'repeat' +FUNCTIONNAME_RANDOM = 'random' +FUNCTIONNAME_CHR = 'chr' +FUNCTIONNAME_LOREMIPSUM = 'loremipsum' + +def Interpret(expression): + functioncalls = Parse(expression) + if functioncalls == None: + return None + decoded = '' + for functioncall in functioncalls: + functionname, arguments = functioncall + if functionname == FUNCTIONNAME_REPEAT: + if CheckFunction(functionname, arguments, 2): + return None + number = CheckNumber(arguments[0], minimum=1) + if number == None: + return None + bytes = InterpretBytes(arguments[1]) + if bytes == None: + print('Error: argument should be a byte sequence: %s' % arguments[1][1]) + return None + decoded += number * bytes + elif functionname == FUNCTIONNAME_RANDOM: + if CheckFunction(functionname, arguments, 1): + return None + number = CheckNumber(arguments[0], minimum=1) + if number == None: + return None + decoded += ''.join([chr(random.randint(0, 255)) for x in range(number)]) + elif functionname == FUNCTIONNAME_LOREMIPSUM: + if CheckFunction(functionname, arguments, 1): + return None + number = CheckNumber(arguments[0], minimum=1) + if number == None: + return None + decoded += LoremIpsum(number) + elif functionname == FUNCTIONNAME_CHR: + if CheckFunction(functionname, arguments, 1, 2): + return None + number = CheckNumber(arguments[0], minimum=1, maximum=255) + if number == None: + return None + if len(arguments) == 1: + decoded += chr(number) + else: + number2 = CheckNumber(arguments[1], minimum=1, maximum=255) + if number2 == None: + return None + decoded += ''.join([chr(n) for n in range(number, number2 + 1)]) + else: + print('Error: unknown function: %s' % functionname) + return None + return decoded + +FCH_FILENAME = 0 +FCH_DATA = 1 +FCH_ERROR = 2 + +def FilenameCheckHash(filename, literalfilename): + if literalfilename: + return FCH_FILENAME, filename + elif filename.startswith('#h#'): + result = Hex2Bytes(filename[3:]) + if result == None: + return FCH_ERROR, 'hexadecimal' + else: + return FCH_DATA, result + elif filename.startswith('#b#'): + try: + return FCH_DATA, binascii.a2b_base64(filename[3:]) + except: + return FCH_ERROR, 'base64' + elif filename.startswith('#e#'): + result = Interpret(filename[3:]) + if result == None: + return FCH_ERROR, 'expression' + else: + return FCH_DATA, result + elif filename.startswith('#'): + return FCH_DATA, C2BIP3(filename[1:]) + else: + return FCH_FILENAME, filename + +class cBinaryFile: + def __init__(self, filename, zippassword='infected', noextraction=False, literalfilename=False): + self.filename = filename + self.zippassword = zippassword + self.noextraction = noextraction + self.literalfilename = literalfilename + self.oZipfile = None + + fch, data = FilenameCheckHash(self.filename, self.literalfilename) + if fch == FCH_ERROR: + raise Exception('Error %s parsing filename: %s' % (data, self.filename)) + + if self.filename == '': + if sys.platform == 'win32': + import msvcrt + msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY) + self.fIn = sys.stdin + elif fch == FCH_DATA: + self.fIn = DataIO(data) + elif not self.noextraction and self.filename.lower().endswith('.zip'): + self.oZipfile = zipfile.ZipFile(self.filename, 'r') + if len(self.oZipfile.infolist()) == 1: + self.fIn = self.oZipfile.open(self.oZipfile.infolist()[0], 'r', self.zippassword) + else: + self.oZipfile.close() + self.oZipfile = None + self.fIn = open(self.filename, 'rb') + elif not self.noextraction and self.filename.lower().endswith('.gz'): + self.fIn = gzip.GzipFile(self.filename, 'rb') + else: + self.fIn = open(self.filename, 'rb') + + def close(self): + if self.fIn != sys.stdin: + self.fIn.close() + if self.oZipfile != None: + self.oZipfile.close() + + def read(self, size=None): + try: + fRead = self.fIn.buffer + except: + fRead = self.fIn + if size == None: + return fRead.read() + else: + return fRead.read(size) + + def Data(self): + data = self.fIn.read() + self.close() + return data + +#---------------------------------------------------------------------------------------------------- + +def File2Strings(filename): + try: + if filename == '': + f = sys.stdin + else: + f = open(filename, 'r') + except: + return None + try: + return map(lambda line:line.rstrip('\n'), f.readlines()) + except: + return None + finally: + if f != sys.stdin: + f.close() + +def ProcessAt(argument): + if argument.startswith('@'): + strings = File2Strings(argument[1:]) + if strings == None: + raise Exception('Error reading %s' % argument) + else: + return strings + else: + return [argument] + +def Glob(filename): + filenames = glob.glob(filename) + if len(filenames) == 0: + return [filename] + else: + return filenames + +def ExpandFilenameArguments(filenames, literalfilenames=False): + if len(filenames) == 0: + return [['', '']] + elif literalfilenames: + return [[filename, ''] for filename in filenames] + else: + cutexpression = '' + result = [] + for filename in list(collections.OrderedDict.fromkeys(sum(map(Glob, sum(map(ProcessAt, filenames), [])), []))): + if filename.startswith('#c#'): + cutexpression = filename[3:] + else: + result.append([filename, cutexpression]) + if result == []: + return [['', cutexpression]] + return result + +CUTTERM_NOTHING = 0 +CUTTERM_POSITION = 1 +CUTTERM_FIND = 2 +CUTTERM_LENGTH = 3 + +def Replace(string, dReplacements): + if string in dReplacements: + return dReplacements[string] + else: + return string + +def ParseCutTerm(argument): + if argument == '': + return CUTTERM_NOTHING, None, '' + oMatch = re.match(r'\-?0x([0-9a-f]+)', argument, re.I) + if oMatch == None: + oMatch = re.match(r'\-?(\d+)', argument) + else: + value = int(oMatch.group(1), 16) + if argument.startswith('-'): + value = -value + return CUTTERM_POSITION, value, argument[len(oMatch.group(0)):] + if oMatch == None: + oMatch = re.match(r'\[([0-9a-f]+)\](\d+)?([+-]\d+)?', argument, re.I) + else: + value = int(oMatch.group(1)) + if argument.startswith('-'): + value = -value + return CUTTERM_POSITION, value, argument[len(oMatch.group(0)):] + if oMatch == None: + oMatch = re.match(r"\[\'(.+?)\'\](\d+)?([+-]\d+)?", argument) + else: + if len(oMatch.group(1)) % 2 == 1: + raise Exception("Uneven length hexadecimal string") + else: + return CUTTERM_FIND, (binascii.a2b_hex(oMatch.group(1)), int(Replace(oMatch.group(2), {None: '1'})), int(Replace(oMatch.group(3), {None: '0'}))), argument[len(oMatch.group(0)):] + if oMatch == None: + return None, None, argument + else: + return CUTTERM_FIND, (oMatch.group(1), int(Replace(oMatch.group(2), {None: '1'})), int(Replace(oMatch.group(3), {None: '0'}))), argument[len(oMatch.group(0)):] + +def ParseCutArgument(argument): + type, value, remainder = ParseCutTerm(argument.strip()) + if type == CUTTERM_NOTHING: + return CUTTERM_NOTHING, None, CUTTERM_NOTHING, None + elif type == None: + if remainder.startswith(':'): + typeLeft = CUTTERM_NOTHING + valueLeft = None + remainder = remainder[1:] + else: + return None, None, None, None + else: + typeLeft = type + valueLeft = value + if typeLeft == CUTTERM_POSITION and valueLeft < 0: + return None, None, None, None + if typeLeft == CUTTERM_FIND and valueLeft[1] == 0: + return None, None, None, None + if remainder.startswith(':'): + remainder = remainder[1:] + else: + return None, None, None, None + type, value, remainder = ParseCutTerm(remainder) + if type == CUTTERM_POSITION and remainder == 'l': + return typeLeft, valueLeft, CUTTERM_LENGTH, value + elif type == None or remainder != '': + return None, None, None, None + elif type == CUTTERM_FIND and value[1] == 0: + return None, None, None, None + else: + return typeLeft, valueLeft, type, value + +def Find(data, value, nth): + position = -1 + while nth > 0: + position = data.find(value, position + 1) + if position == -1: + return -1 + nth -= 1 + return position + +def CutData(stream, cutArgument): + if cutArgument == '': + return stream + + typeLeft, valueLeft, typeRight, valueRight = ParseCutArgument(cutArgument) + + if typeLeft == None: + return stream + + if typeLeft == CUTTERM_NOTHING: + positionBegin = 0 + elif typeLeft == CUTTERM_POSITION: + positionBegin = valueLeft + elif typeLeft == CUTTERM_FIND: + positionBegin = Find(stream, valueLeft[0], valueLeft[1]) + if positionBegin == -1: + return '' + positionBegin += valueLeft[2] + else: + raise Exception("Unknown value typeLeft") + + if typeRight == CUTTERM_NOTHING: + positionEnd = len(stream) + elif typeRight == CUTTERM_POSITION and valueRight < 0: + positionEnd = len(stream) + valueRight + elif typeRight == CUTTERM_POSITION: + positionEnd = valueRight + 1 + elif typeRight == CUTTERM_LENGTH: + positionEnd = positionBegin + valueRight + elif typeRight == CUTTERM_FIND: + positionEnd = Find(stream, valueRight[0], valueRight[1]) + if positionEnd == -1: + return '' + else: + positionEnd += len(valueRight[0]) + positionEnd += valueRight[2] + else: + raise Exception("Unknown value typeRight") + + return stream[positionBegin:positionEnd] + +def Timestamp2StringLog(stime): + return '%04d%02d%02d-%02d%02d%02d' % stime[0:6] + +def Timestamp2StringHuman(stime): + return '%04d/%02d/%02d %02d:%02d:%02d' % stime[0:6] + +def TimestampLocal(epoch=None): + if epoch == None: + return Timestamp2StringHuman(time.localtime()) + else: + return Timestamp2StringHuman(time.localtime(epoch)) + +def TimestampUTC(epoch=None): + if epoch == None: + return Timestamp2StringHuman(time.gmtime()) + else: + return Timestamp2StringHuman(time.gmtime(epoch)) + +def FormatBytesData(data, position, options): + if len(data) == 0: + return + bytes = [C2IIP2(d) for d in data] + + if position < 0: + prefix = '' + else: + prefix = '%02X ' % position + + print(prefix + '1I: s %d u %d' % (struct.unpack('b', data[0:1])[0], struct.unpack('B', data[0:1])[0])) + + if len(data) < 2: + return + print(prefix + '2I: sl %d ul %d sb %d ub %d' % (struct.unpack('h', data[0:2])[0], struct.unpack('>H', data[0:2])[0])) + + if len(data) < 4: + return + print(prefix + '4I: sl %d ul %d sb %d ub %d' % (struct.unpack('i', data[0:4])[0], struct.unpack('>I', data[0:4])[0])) + print(prefix + '4F: l %f b %f' % (struct.unpack('f', data[0:4])[0])) + print(prefix + '4N: b %d.%d.%d.%d l %d.%d.%d.%d' % (bytes[0], bytes[1], bytes[2], bytes[3], bytes[3], bytes[2], bytes[1], bytes[0])) + print(prefix + '4E: l %s b %s' % (TimestampUTC(struct.unpack('I', data[0:4])[0]))) + + if len(data) < 8: + return + print(prefix + '8I: sl %d ul %d sb %d ub %d' % (struct.unpack('q', data[0:8])[0], struct.unpack('>Q', data[0:8])[0])) + print(prefix + '8F: l %f b %f' % (struct.unpack('d', data[0:8])[0])) + + if len(data) < 16: + return + print(prefix + '16G: b %02X%02X%02X%02X-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X m {%02X%02X%02X%02X-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X}' % tuple(bytes[0:16] + bytes[3::-1] + bytes[5:3:-1] + bytes[7:5:-1] + bytes[8:16])) + +def FormatBytesSingle(filename, cutexpression, options): + oBinaryFile = cBinaryFile(filename, C2BIP3(options.password), options.noextraction, options.literalfilenames) + if cutexpression == '': + if options.format != '': + data = oBinaryFile.read(struct.calcsize(options.format)) + else: + data = oBinaryFile.read(options.count * options.step + 16) + else: + data = CutData(oBinaryFile.read(), cutexpression) + oBinaryFile.close() + if filename != '': + print('File: %s' % filename) + if options.format == '': + print('s:signed u:unsigned l:little-endian b:big-endian m:mixed-endian') + if options.format != '': + size = struct.calcsize(options.format) + for index, element in enumerate(struct.unpack(options.format, data[0:size])): + if isinstance(element, int): + print('%2d: %15s %10d %10x' % (index, type(element), element, element)) + else: + print('%2d: %15s %s' % (index, type(element), str(element))) + elif options.count == 1: + FormatBytesData(data, -1, options) + else: + position = 0 + for iter in range(options.count): + FormatBytesData(data[position:], position, options) + position += options.step + +def FormatBytesFiles(filenames, options): + for filename, cutexpression in filenames: + FormatBytesSingle(filename, cutexpression, options) + +def Main(): + moredesc = ''' + +Source code put in the public domain by Didier Stevens, no Copyright +Use at your own risk +https://DidierStevens.com''' + + oParser = optparse.OptionParser(usage='usage: %prog [options] [[@]file|cut-expression ...]\n' + __description__ + moredesc, version='%prog ' + __version__) + oParser.add_option('-m', '--man', action='store_true', default=False, help='Print manual') + oParser.add_option('-f', '--format', default='', help='Struct format string to use') + oParser.add_option('-c', '--count', type=int, default=1, help='The number of repeating bytes (default 1)') + oParser.add_option('-s', '--step', type=int, default=1, help='The step to use when option --count is not 1 (default 1)') + oParser.add_option('--password', default='infected', help='The ZIP password to be used (default infected)') + oParser.add_option('--noextraction', action='store_true', default=False, help='Do not extract from archive file') + oParser.add_option('--literalfilenames', action='store_true', default=False, help='Do not interpret filenames') + (options, args) = oParser.parse_args() + + if options.man: + oParser.print_help() + PrintManual() + return + + FormatBytesFiles(ExpandFilenameArguments(args, options.literalfilenames), options) + +if __name__ == '__main__': + Main() diff --git a/oledump.py b/oledump.py index 7406d22..b42941e 100644 --- a/oledump.py +++ b/oledump.py @@ -2,8 +2,8 @@ __description__ = 'Analyze OLE files (Compound Binary Files)' __author__ = 'Didier Stevens' -__version__ = '0.0.30' -__date__ = '2017/11/04' +__version__ = '0.0.31' +__date__ = '2017/12/13' """ @@ -70,6 +70,7 @@ 2017/10/14: 0.0.29 added options -t, -S; and \x00Attribut bugfix provided by Charles Smutz 2017/11/01: 0.0.30 replaced hexdump and hexasciidump with cDump 2017/11/04: added return codes -1 and 1 + 2017/12/13: 0.0.31 corrected man Todo: """ @@ -150,11 +151,11 @@ def PrintManual(): Option -x produces a hexadecimal dump instead of an ASCII dump. C:\Demo>oledump.py -s 1 -x Book1.xls -00000000: FE FF 00 00 05 01 02 00 00 00 00 00 00 00 00 00 -00000010: 00 00 00 00 00 00 00 00 01 00 00 00 02 D5 CD D5 -00000020: 9C 2E 1B 10 93 97 08 00 2B 2C F9 AE 30 00 00 00 -00000030: E4 00 00 00 09 00 00 00 01 00 00 00 50 00 00 00 -00000040: 0F 00 00 00 58 00 00 00 17 00 00 00 70 00 00 00 +FE FF 00 00 05 01 02 00 00 00 00 00 00 00 00 00 +00 00 00 00 00 00 00 00 01 00 00 00 02 D5 CD D5 +9C 2E 1B 10 93 97 08 00 2B 2C F9 AE 30 00 00 00 +E4 00 00 00 09 00 00 00 01 00 00 00 50 00 00 00 +0F 00 00 00 58 00 00 00 17 00 00 00 70 00 00 00 ... Option -S dumps the strings. diff --git a/plugin_biff.py b/plugin_biff.py index 1327b2d..7b6b092 100644 --- a/plugin_biff.py +++ b/plugin_biff.py @@ -2,8 +2,8 @@ __description__ = 'BIFF plugin for oledump.py' __author__ = 'Didier Stevens' -__version__ = '0.0.1' -__date__ = '2014/11/21' +__version__ = '0.0.2' +__date__ = '2017/12/12' """ @@ -14,12 +14,16 @@ History: 2014/11/15: start 2014/11/21: changed interface: added options; added options -a (asciidump) and -s (strings) + 2017/12/10: 0.0.2 added optparse & option -o + 2017/12/12: added option -f + 2017/12/13: added 0x support for option -f Todo: """ import struct import re +import optparse def CombineHexASCII(hexDump, asciiDump, length): if hexDump == '': @@ -331,6 +335,17 @@ def Analyze(self): if self.streamname == ['Workbook']: self.ran = True stream = self.stream + + oParser = optparse.OptionParser() + oParser.add_option('-s', '--strings', action='store_true', default=False, help='Dump strings') + oParser.add_option('-a', '--hexascii', action='store_true', default=False, help='Dump hex ascii') + oParser.add_option('-o', '--opcode', type=str, default='', help='Opcode to filter for') + oParser.add_option('-f', '--find', type=str, default='', help='Content to search for') + (options, args) = oParser.parse_args(self.options.split(' ')) + + if options.find.startswith('0x'): + options.find = binascii.a2b_hex(options.find[2:]) + while stream != '': formatcodes = 'HH' formatsize = struct.calcsize(formatcodes) @@ -343,16 +358,19 @@ def Analyze(self): opcodename = dOpcodes[opcode] else: opcodename = '' - result.append('%04x %6d %s ' % (opcode, length, opcodename)) + line = '%04x %6d %s ' % (opcode, length, opcodename) + + if options.find == '' and options.opcode == '' or options.opcode != '' and options.opcode.lower() in line.lower() or options.find != '' and options.find in data: + result.append(line) - if self.options == '-a': - result.extend(' ' + foundstring for foundstring in HexASCII(data, 8)) - elif self.options == '-s': - dEncodings = {'s': 'ASCII', 'L': 'UNICODE'} - for encoding, strings in Strings(data).items(): - if len(strings) > 0: - result.append(' ' + dEncodings[encoding] + ':') - result.extend(' ' + foundstring for foundstring in strings) + if options.hexascii: + result.extend(' ' + foundstring for foundstring in HexASCII(data, 8)) + elif options.strings: + dEncodings = {'s': 'ASCII', 'L': 'UNICODE'} + for encoding, strings in Strings(data).items(): + if len(strings) > 0: + result.append(' ' + dEncodings[encoding] + ':') + result.extend(' ' + foundstring for foundstring in strings) return result