20201221a

lotusexpeditor · Dec 21, 2020 · 4f6310b · 4f6310b
1 parent 1a9e355
commit 4f6310b
Show file tree

Hide file tree

Showing 2 changed files with 107 additions and 25 deletions.
diff --git a/byte-stats.py b/byte-stats.py
@@ -2,8 +2,8 @@
 
 __description__ = 'Calculate byte statistics'
 __author__ = 'Didier Stevens'
-__version__ = '0.0.7'
-__date__ = '2017/11/01'
+__version__ = '0.0.8'
+__date__ = '2020/12/21'
 
 """
 Source code put in public domain by Didier Stevens, no Copyright
@@ -24,6 +24,7 @@
   2017/08/12: 0.0.6 added option -r
   2017/09/13: 0.0.7 added average consecutive byte difference, refactoring (cCalculateByteStatistics)
   2017/11/01: added option -g
+  2020/12/21: 0.0.8 Python 3
 
 Todo:
 """
@@ -37,7 +38,13 @@
 import string
 import textwrap
 import binascii
-import Tkinter
+import operator
+
+bPython3 = sys.version_info[0] > 2
+if bPython3:
+    import tkinter
+else:
+    import Tkinter as tkinter
 
 def PrintManual():
     manual = '''
@@ -370,14 +377,20 @@ def ProcessAt(argument):
 def ExpandFilenameArguments(filenames):
     return list(collections.OrderedDict.fromkeys(sum(map(glob.glob, sum(map(ProcessAt, filenames), [])), [])))
 
+def P23Ord(value):
+    if type(value) == int:
+        return value
+    else:
+        return ord(value)
+
 class cCalculateByteStatistics():
 
     def __init__(self):
         self.dPrevalence = {iter: 0 for iter in range(0x100)}
         self.previous = None
         self.sumDifference = 0
         self.count = 0
-        
+
     def Process(self, byte):
         self.dPrevalence[byte] += 1
         if self.previous != None:
@@ -419,7 +432,7 @@ def Stats(self):
             countBASE64Bytes += self.dPrevalence[iter]
         for iter in range(0x61, 0x7B):
             countBASE64Bytes += self.dPrevalence[iter]
-        countBASE64Bytes += self.dPrevalence[ord('+')] + self.dPrevalence[ord('/')] + self.dPrevalence[ord('=')]
+        countBASE64Bytes += self.dPrevalence[P23Ord('+')] + self.dPrevalence[P23Ord('/')] + self.dPrevalence[P23Ord('=')]
         entropy = 0.0
         for iter in range(0x100):
             if self.dPrevalence[iter] > 0:
@@ -478,6 +491,25 @@ def MaximumAndPosition(buckets, index):
             positionMaximum = position
     return (valueMaximum, positionMaximum)
 
+def cmp_to_key(mycmp):
+    'Convert a cmp= function into a key= function'
+    class K:
+        def __init__(self, obj, *args):
+            self.obj = obj
+        def __lt__(self, other):
+            return mycmp(self.obj, other.obj) < 0
+        def __gt__(self, other):
+            return mycmp(self.obj, other.obj) > 0
+        def __eq__(self, other):
+            return mycmp(self.obj, other.obj) == 0
+        def __le__(self, other):
+            return mycmp(self.obj, other.obj) <= 0
+        def __ge__(self, other):
+            return mycmp(self.obj, other.obj) >= 0
+        def __ne__(self, other):
+            return mycmp(self.obj, other.obj) != 0
+    return K
+
 def ByteStats(args, options):
     if options.bucket < 2:
         print('Bucket size must be at least 2, not %d' % options.bucket)
@@ -493,14 +525,17 @@ def ByteStats(args, options):
         args = ExpandFilenameArguments(args)
     for file in args:
         if file == '':
-            fIn = sys.stdin
+            if bPython3:
+                fIn = sys.stdin.buffer
+            else:
+                fIn = sys.stdin
             if sys.platform == 'win32':
                 import msvcrt
                 msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY)
         else:
             fIn = open(file, 'rb')
         for char in fIn.read():
-            value = ord(char)
+            value = P23Ord(char)
             countBytes += 1
             oCalculateByteStatistics.Process(value)
             oCalculateByteStatisticsBucket.Process(value)
@@ -522,8 +557,12 @@ def ByteStats(args, options):
             if countBytes % options.bucket == 0:
                 buckets.append([countBytes - options.bucket, oCalculateByteStatisticsBucket.Stats()])
                 oCalculateByteStatisticsBucket = cCalculateByteStatistics()
-        if fIn != sys.stdin:
-            fIn.close()
+        if bPython3:
+            if fIn != sys.stdin.buffer:
+                fIn.close()
+        else:
+            if fIn != sys.stdin:
+                fIn.close()
     if len(diffs) > 1:
         dDiffs[savPosition - 2] = values
 
@@ -549,7 +588,7 @@ def ByteStats(args, options):
             print('Unknown property: %s' % options.property)
             return
         index = dProperties[options.property]
-        oTk = Tkinter.Tk()
+        oTk = tkinter.Tk()
         oTk.title('byte-stats: property %s' % options.property)
         c_width = len(buckets)
         multiplier = 1
@@ -564,23 +603,27 @@ def ByteStats(args, options):
             maximum = max(properties[index] for position, properties in buckets)
             c_height = 301
             multiplier = float(c_height - 1) / float(maximum)
-        oCanvas = Tkinter.Canvas(oTk, width=c_width, height=c_height, bg= 'white')
+        oCanvas = tkinter.Canvas(oTk, width=c_width, height=c_height, bg= 'white')
         oCanvas.pack()
-        list = []
+        points = []
         counter = 0
         for position, properties in buckets:
-            list.append(counter)
-            list.append(c_height - int(properties[index] * multiplier))
+            points.append(counter)
+            points.append(c_height - int(properties[index] * multiplier))
             counter += 1
-        oCanvas.create_line(list)
+        oCanvas.create_line(points)
         oTk.mainloop()
     else:
         listCount = oCalculateByteStatistics.Prevalence().items()
         if options.keys:
             index = 0
         else:
             index = 1
-        listCount.sort(lambda x, y:cmp(x[index], y[index]), reverse=options.descending)
+        if bPython3:
+            listCount = list(listCount)
+            listCount.sort(key=operator.itemgetter(index), reverse=options.descending)
+        else:
+            listCount.sort(lambda x, y:cmp(x[index], y[index]), reverse=options.descending)
         lineCounter = 0
         dotsPrinted = False
         print('Byte ASCII Count     Pct')
@@ -642,12 +685,20 @@ def ByteStats(args, options):
         if options.keys:
             sequences = sorted(dDiffs.items())
         else:
-            sequences = sorted(dDiffs.items(), cmp=lambda x, y: IFF(len(x[1]) == len(y[1]), cmp(y[0], x[0]), cmp(len(x[1]), len(y[1]))), reverse=True)
+            if bPython3:
+                def MyCmp(a, b):
+                    return (a > b) - (a < b)
+                sequences = sorted(dDiffs.items(), key=cmp_to_key(lambda x, y: IFF(len(x[1]) == len(y[1]), MyCmp(y[0], x[0]), MyCmp(len(x[1]), len(y[1])))), reverse=True)
+            else:
+                sequences = sorted(dDiffs.items(), cmp=lambda x, y: IFF(len(x[1]) == len(y[1]), cmp(y[0], x[0]), cmp(len(x[1]), len(y[1]))), reverse=True)
         if not options.all:
             sequences = sequences[:10]
         for sequence in sequences:
             if len(sequence[1]) >= options.filter:
-                print('0x%08x: %6d %4d 0x%s' % (sequence[0], len(sequence[1]), ByteSub(sequence[1][1], sequence[1][0]), TruncateString(binascii.hexlify(''.join([chr(c) for c in sequence[1]])), 40)))
+                if bPython3:
+                    print('0x%08x: %6d %4d 0x%s' % (sequence[0], len(sequence[1]), ByteSub(sequence[1][1], sequence[1][0]), TruncateString(binascii.hexlify((''.join([chr(c) for c in sequence[1]])).encode()).decode(), 40)))
+                else:
+                    print('0x%08x: %6d %4d 0x%s' % (sequence[0], len(sequence[1]), ByteSub(sequence[1][1], sequence[1][0]), TruncateString(binascii.hexlify(''.join([chr(c) for c in sequence[1]])), 40)))
 
     def Chr(number):
         return IFF(number >= 0x20 and number < 0x7F, chr(number), '.')

diff --git a/cut-bytes.py b/cut-bytes.py
@@ -2,8 +2,8 @@
 
 __description__ = 'Cut a section of bytes out of a file'
 __author__ = 'Didier Stevens'
-__version__ = '0.0.12'
-__date__ = '2020/02/01'
+__version__ = '0.0.13'
+__date__ = '2020/12/08'
 
 """
 
@@ -30,6 +30,9 @@
   2020/01/24: added #h# support for spaces
   2020/01/25: fix ascii dump 127
   2020/02/01: 0.0.12 added #u#
+  2020/02/18: 0.0.13 added #E#
+  2020/10/21: Python 3 fix in cBinaryFile
+  2020/12/08: base64dump fix
 
 Todo:
 """
@@ -45,6 +48,7 @@
 import gzip
 import json
 import struct
+import codecs
 if sys.version_info[0] >= 3:
     from io import BytesIO as DataIO
 else:
@@ -159,6 +163,9 @@ def PrintManual():
 File arguments that start with #u# are a notational convention to download a file using an url.
 For example: #u#http://didierstevens.com
 
+File arguments that start with #E# are a notational convention for strings with escape characters.
+For example: #E#line1\nline2
+
 To process a file that starts with #, prefix it with a relative path to the current directory:
  cut-bytes.py : .\#data
 Output:
@@ -259,7 +266,7 @@ def HexAsciiDump(self, rle=False):
         return oDumpStream.Content()
 
     def Base64Dump(self, nowhitespace=False):
-        encoded = binascii.b2a_base64(self.data)
+        encoded = binascii.b2a_base64(self.data).decode().strip()
         if nowhitespace:
             return encoded
         oDumpStream = self.cDumpStream(self.prefix)
@@ -439,7 +446,10 @@ def CutData(stream, cutArgument):
 #Fix for http://bugs.python.org/issue11395
 def StdoutWriteChunked(data):
     if sys.version_info[0] > 2:
-        sys.stdout.buffer.write(data)
+        if isinstance(data, str):
+            sys.stdout.write(data)
+        else:
+            sys.stdout.buffer.write(data)
     else:
         while data != '':
             sys.stdout.write(data[0:10000])
@@ -726,6 +736,21 @@ def ParsePackExpression(data):
 FCH_DATA = 1
 FCH_ERROR = 2
 
+ESCAPE_SEQUENCE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'"abfnrtv]  # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+def DecodeEscapes(str):
+    def DecodeMatch(match):
+        return codecs.decode(match.group(0), 'unicode-escape')
+
+    return ESCAPE_SEQUENCE_RE.sub(DecodeMatch, str)
+
 def DownloadFile(url):
     try:
         if sys.hexversion >= 0x020601F0:
@@ -773,6 +798,12 @@ def FilenameCheckHash(filename, literalfilename):
             return FCH_ERROR, 'url:' + error
         else:
             return FCH_DATA, result
+    elif filename.startswith('#E#'):
+        result = DecodeEscapes(filename[3:])
+        if result == None:
+            return FCH_ERROR, 'escapes'
+        else:
+            return FCH_DATA, C2BIP3(result)
     elif filename.startswith('#'):
         return FCH_DATA, C2BIP3(filename[1:])
     else:
@@ -830,7 +861,7 @@ def read(self, size=None):
             return fRead.read(size)
 
     def Data(self):
-        data = self.fIn.read()
+        data = self.read()
         self.close()
         return data
 
@@ -898,14 +929,14 @@ def CutBytes(expression, filename, options):
                 raise Exception('Error %s parsing prefix: %s' % (prefix, options.prefix))
             else:
                 data = prefix + data
-        
+
         if options.suffix != '':
             fch, suffix = FilenameCheckHash(options.suffix, False)
             if fch != FCH_DATA:
                 raise Exception('Error %s parsing suffix: %s' % (suffix, options.suffix))
             else:
                 data = data + suffix
-        
+
         StdoutWriteChunked(DumpFunction(data))
 
 def Main():