Skip to content

Commit

Permalink
20201221a
Browse files Browse the repository at this point in the history
  • Loading branch information
DidierStevens committed Dec 21, 2020
1 parent 1a9e355 commit 4f6310b
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 25 deletions.
87 changes: 69 additions & 18 deletions byte-stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

__description__ = 'Calculate byte statistics'
__author__ = 'Didier Stevens'
__version__ = '0.0.7'
__date__ = '2017/11/01'
__version__ = '0.0.8'
__date__ = '2020/12/21'

"""
Source code put in public domain by Didier Stevens, no Copyright
Expand All @@ -24,6 +24,7 @@
2017/08/12: 0.0.6 added option -r
2017/09/13: 0.0.7 added average consecutive byte difference, refactoring (cCalculateByteStatistics)
2017/11/01: added option -g
2020/12/21: 0.0.8 Python 3
Todo:
"""
Expand All @@ -37,7 +38,13 @@
import string
import textwrap
import binascii
import Tkinter
import operator

bPython3 = sys.version_info[0] > 2
if bPython3:
import tkinter
else:
import Tkinter as tkinter

def PrintManual():
manual = '''
Expand Down Expand Up @@ -370,14 +377,20 @@ def ProcessAt(argument):
def ExpandFilenameArguments(filenames):
return list(collections.OrderedDict.fromkeys(sum(map(glob.glob, sum(map(ProcessAt, filenames), [])), [])))

def P23Ord(value):
if type(value) == int:
return value
else:
return ord(value)

class cCalculateByteStatistics():

def __init__(self):
self.dPrevalence = {iter: 0 for iter in range(0x100)}
self.previous = None
self.sumDifference = 0
self.count = 0

def Process(self, byte):
self.dPrevalence[byte] += 1
if self.previous != None:
Expand Down Expand Up @@ -419,7 +432,7 @@ def Stats(self):
countBASE64Bytes += self.dPrevalence[iter]
for iter in range(0x61, 0x7B):
countBASE64Bytes += self.dPrevalence[iter]
countBASE64Bytes += self.dPrevalence[ord('+')] + self.dPrevalence[ord('/')] + self.dPrevalence[ord('=')]
countBASE64Bytes += self.dPrevalence[P23Ord('+')] + self.dPrevalence[P23Ord('/')] + self.dPrevalence[P23Ord('=')]
entropy = 0.0
for iter in range(0x100):
if self.dPrevalence[iter] > 0:
Expand Down Expand Up @@ -478,6 +491,25 @@ def MaximumAndPosition(buckets, index):
positionMaximum = position
return (valueMaximum, positionMaximum)

def cmp_to_key(mycmp):
'Convert a cmp= function into a key= function'
class K:
def __init__(self, obj, *args):
self.obj = obj
def __lt__(self, other):
return mycmp(self.obj, other.obj) < 0
def __gt__(self, other):
return mycmp(self.obj, other.obj) > 0
def __eq__(self, other):
return mycmp(self.obj, other.obj) == 0
def __le__(self, other):
return mycmp(self.obj, other.obj) <= 0
def __ge__(self, other):
return mycmp(self.obj, other.obj) >= 0
def __ne__(self, other):
return mycmp(self.obj, other.obj) != 0
return K

def ByteStats(args, options):
if options.bucket < 2:
print('Bucket size must be at least 2, not %d' % options.bucket)
Expand All @@ -493,14 +525,17 @@ def ByteStats(args, options):
args = ExpandFilenameArguments(args)
for file in args:
if file == '':
fIn = sys.stdin
if bPython3:
fIn = sys.stdin.buffer
else:
fIn = sys.stdin
if sys.platform == 'win32':
import msvcrt
msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
else:
fIn = open(file, 'rb')
for char in fIn.read():
value = ord(char)
value = P23Ord(char)
countBytes += 1
oCalculateByteStatistics.Process(value)
oCalculateByteStatisticsBucket.Process(value)
Expand All @@ -522,8 +557,12 @@ def ByteStats(args, options):
if countBytes % options.bucket == 0:
buckets.append([countBytes - options.bucket, oCalculateByteStatisticsBucket.Stats()])
oCalculateByteStatisticsBucket = cCalculateByteStatistics()
if fIn != sys.stdin:
fIn.close()
if bPython3:
if fIn != sys.stdin.buffer:
fIn.close()
else:
if fIn != sys.stdin:
fIn.close()
if len(diffs) > 1:
dDiffs[savPosition - 2] = values

Expand All @@ -549,7 +588,7 @@ def ByteStats(args, options):
print('Unknown property: %s' % options.property)
return
index = dProperties[options.property]
oTk = Tkinter.Tk()
oTk = tkinter.Tk()
oTk.title('byte-stats: property %s' % options.property)
c_width = len(buckets)
multiplier = 1
Expand All @@ -564,23 +603,27 @@ def ByteStats(args, options):
maximum = max(properties[index] for position, properties in buckets)
c_height = 301
multiplier = float(c_height - 1) / float(maximum)
oCanvas = Tkinter.Canvas(oTk, width=c_width, height=c_height, bg= 'white')
oCanvas = tkinter.Canvas(oTk, width=c_width, height=c_height, bg= 'white')
oCanvas.pack()
list = []
points = []
counter = 0
for position, properties in buckets:
list.append(counter)
list.append(c_height - int(properties[index] * multiplier))
points.append(counter)
points.append(c_height - int(properties[index] * multiplier))
counter += 1
oCanvas.create_line(list)
oCanvas.create_line(points)
oTk.mainloop()
else:
listCount = oCalculateByteStatistics.Prevalence().items()
if options.keys:
index = 0
else:
index = 1
listCount.sort(lambda x, y:cmp(x[index], y[index]), reverse=options.descending)
if bPython3:
listCount = list(listCount)
listCount.sort(key=operator.itemgetter(index), reverse=options.descending)
else:
listCount.sort(lambda x, y:cmp(x[index], y[index]), reverse=options.descending)
lineCounter = 0
dotsPrinted = False
print('Byte ASCII Count Pct')
Expand Down Expand Up @@ -642,12 +685,20 @@ def ByteStats(args, options):
if options.keys:
sequences = sorted(dDiffs.items())
else:
sequences = sorted(dDiffs.items(), cmp=lambda x, y: IFF(len(x[1]) == len(y[1]), cmp(y[0], x[0]), cmp(len(x[1]), len(y[1]))), reverse=True)
if bPython3:
def MyCmp(a, b):
return (a > b) - (a < b)
sequences = sorted(dDiffs.items(), key=cmp_to_key(lambda x, y: IFF(len(x[1]) == len(y[1]), MyCmp(y[0], x[0]), MyCmp(len(x[1]), len(y[1])))), reverse=True)
else:
sequences = sorted(dDiffs.items(), cmp=lambda x, y: IFF(len(x[1]) == len(y[1]), cmp(y[0], x[0]), cmp(len(x[1]), len(y[1]))), reverse=True)
if not options.all:
sequences = sequences[:10]
for sequence in sequences:
if len(sequence[1]) >= options.filter:
print('0x%08x: %6d %4d 0x%s' % (sequence[0], len(sequence[1]), ByteSub(sequence[1][1], sequence[1][0]), TruncateString(binascii.hexlify(''.join([chr(c) for c in sequence[1]])), 40)))
if bPython3:
print('0x%08x: %6d %4d 0x%s' % (sequence[0], len(sequence[1]), ByteSub(sequence[1][1], sequence[1][0]), TruncateString(binascii.hexlify((''.join([chr(c) for c in sequence[1]])).encode()).decode(), 40)))
else:
print('0x%08x: %6d %4d 0x%s' % (sequence[0], len(sequence[1]), ByteSub(sequence[1][1], sequence[1][0]), TruncateString(binascii.hexlify(''.join([chr(c) for c in sequence[1]])), 40)))

def Chr(number):
return IFF(number >= 0x20 and number < 0x7F, chr(number), '.')
Expand Down
45 changes: 38 additions & 7 deletions cut-bytes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

__description__ = 'Cut a section of bytes out of a file'
__author__ = 'Didier Stevens'
__version__ = '0.0.12'
__date__ = '2020/02/01'
__version__ = '0.0.13'
__date__ = '2020/12/08'

"""
Expand All @@ -30,6 +30,9 @@
2020/01/24: added #h# support for spaces
2020/01/25: fix ascii dump 127
2020/02/01: 0.0.12 added #u#
2020/02/18: 0.0.13 added #E#
2020/10/21: Python 3 fix in cBinaryFile
2020/12/08: base64dump fix
Todo:
"""
Expand All @@ -45,6 +48,7 @@
import gzip
import json
import struct
import codecs
if sys.version_info[0] >= 3:
from io import BytesIO as DataIO
else:
Expand Down Expand Up @@ -159,6 +163,9 @@ def PrintManual():
File arguments that start with #u# are a notational convention to download a file using an url.
For example: #u#http://didierstevens.com
File arguments that start with #E# are a notational convention for strings with escape characters.
For example: #E#line1\nline2
To process a file that starts with #, prefix it with a relative path to the current directory:
cut-bytes.py : .\#data
Output:
Expand Down Expand Up @@ -259,7 +266,7 @@ def HexAsciiDump(self, rle=False):
return oDumpStream.Content()

def Base64Dump(self, nowhitespace=False):
encoded = binascii.b2a_base64(self.data)
encoded = binascii.b2a_base64(self.data).decode().strip()
if nowhitespace:
return encoded
oDumpStream = self.cDumpStream(self.prefix)
Expand Down Expand Up @@ -439,7 +446,10 @@ def CutData(stream, cutArgument):
#Fix for http://bugs.python.org/issue11395
def StdoutWriteChunked(data):
if sys.version_info[0] > 2:
sys.stdout.buffer.write(data)
if isinstance(data, str):
sys.stdout.write(data)
else:
sys.stdout.buffer.write(data)
else:
while data != '':
sys.stdout.write(data[0:10000])
Expand Down Expand Up @@ -726,6 +736,21 @@ def ParsePackExpression(data):
FCH_DATA = 1
FCH_ERROR = 2

ESCAPE_SEQUENCE_RE = re.compile(r'''
( \\U........ # 8-digit hex escapes
| \\u.... # 4-digit hex escapes
| \\x.. # 2-digit hex escapes
| \\[0-7]{1,3} # Octal escapes
| \\N\{[^}]+\} # Unicode characters by name
| \\[\\'"abfnrtv] # Single-character escapes
)''', re.UNICODE | re.VERBOSE)

def DecodeEscapes(str):
def DecodeMatch(match):
return codecs.decode(match.group(0), 'unicode-escape')

return ESCAPE_SEQUENCE_RE.sub(DecodeMatch, str)

def DownloadFile(url):
try:
if sys.hexversion >= 0x020601F0:
Expand Down Expand Up @@ -773,6 +798,12 @@ def FilenameCheckHash(filename, literalfilename):
return FCH_ERROR, 'url:' + error
else:
return FCH_DATA, result
elif filename.startswith('#E#'):
result = DecodeEscapes(filename[3:])
if result == None:
return FCH_ERROR, 'escapes'
else:
return FCH_DATA, C2BIP3(result)
elif filename.startswith('#'):
return FCH_DATA, C2BIP3(filename[1:])
else:
Expand Down Expand Up @@ -830,7 +861,7 @@ def read(self, size=None):
return fRead.read(size)

def Data(self):
data = self.fIn.read()
data = self.read()
self.close()
return data

Expand Down Expand Up @@ -898,14 +929,14 @@ def CutBytes(expression, filename, options):
raise Exception('Error %s parsing prefix: %s' % (prefix, options.prefix))
else:
data = prefix + data

if options.suffix != '':
fch, suffix = FilenameCheckHash(options.suffix, False)
if fch != FCH_DATA:
raise Exception('Error %s parsing suffix: %s' % (suffix, options.suffix))
else:
data = data + suffix

StdoutWriteChunked(DumpFunction(data))

def Main():
Expand Down

0 comments on commit 4f6310b

Please sign in to comment.