Skip to content

Commit

Permalink
Fix FunctionHeader Overflowed and add hasm dump feature
Browse files Browse the repository at this point in the history
  • Loading branch information
Pongsakorn Sommalai committed Jan 10, 2021
1 parent 62647c2 commit c01b503
Show file tree
Hide file tree
Showing 9 changed files with 85 additions and 19 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,8 @@ dmypy.json
.pytype/

# Cython debug symbols
cython_debug/
cython_debug/

# Custom
output/*
!output/.gitkeep
41 changes: 39 additions & 2 deletions hasm.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,45 @@
from util import *
import json
import os

def to_hasm(f, hbc):
def dump(hbc, path):
assert not os.path.exists(path), f"'{path}' exists."
os.makedirs(path)
# Write all obj to metadata.json
json.dump(hbc.getObj(), open(f"{path}/metadata.json", "w"))

stringCount = hbc.getStringCount()
functionCount = hbc.getFunctionCount()

f = open(f"{path}/instruction.hasm", "w")
for i in range(functionCount):
functionName, paramCount, registerCount, symbolCount, inst, _ = hbc.getFunction(i)
functionName, paramCount, registerCount, symbolCount, insts, _ = hbc.getFunction(i)
# Function<>1270(2 params, 1 registers, 0 symbols):
f.write(f"Function<{functionName.decode()}>{i}({paramCount} params, {registerCount} registers, {symbolCount} symbols):\n")
for opcode, operands in insts:
f.write(f"\t{opcode.ljust(20,' ')}\t")
o = []
ss = []
for ii, v in enumerate(operands):
t, is_str, val = v
o.append(f"{t}:{val}")

if is_str:
s = hbc.getString(val)
ss.append((ii, val, s))


f.write(f"{', '.join(o)}\n")
if len(ss) > 0:
for ii, val, s in ss:
try:
s = f"\"{s.decode()}\""
except UnicodeDecodeError:
s = f"hex({s.hex()})"

f.write(f"\t; Oper[{ii}]: String({val}) {s}\n")

f.write("\n")

f.write("EndFunction\n\n")
f.close()
9 changes: 6 additions & 3 deletions hbc/hbc74/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class HBC74:
def __init__(self, f):
self.obj = parse(f)

def getObj():
def getObj(self):
return self.obj

def getVersion(self):
Expand All @@ -29,7 +29,7 @@ def getHeader(self):
def getFunctionCount(self):
return self.obj["header"]["functionCount"]

def getFunction(self, fid):
def getFunction(self, fid, disasm=True):
assert fid >= 0 and fid < self.getFunctionCount(), "Invalid function ID"

functionHeader = self.obj["functionHeaders"][fid]
Expand All @@ -43,7 +43,10 @@ def getFunction(self, fid):
instOffset = self.obj["instOffset"]
start = offset - instOffset
end = start + bytecodeSizeInBytes
inst = disassemble(self.obj["inst"][start:end])
inst = self.obj["inst"][start:end]
if disasm:
inst = disassemble(inst)


functionNameStr = self.getString(functionName)

Expand Down
12 changes: 12 additions & 0 deletions hbc/hbc74/data/structure.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@
"highestWriteCacheIndex": ["bit", 8, 1],
"flags": ["uint", 8, 1]
},
"FuncHeader": {
"offset": ["uint", 32, 1],
"paramCount": ["uint", 32, 1],
"bytecodeSizeInBytes": ["uint", 32, 1],
"functionName": ["uint", 32, 1],
"infoOffset": ["uint", 32, 1],
"frameSize": ["uint", 32, 1],
"environmentSize": ["uint", 32, 1],
"highestReadCacheIndex": ["uint", 8, 1],
"highestWriteCacheIndex": ["uint", 8, 1],
"flags": ["uint", 8, 1]
},
"SmallStringTableEntry": {
"isUTF16": ["bit", 1, 1],
"offset": ["bit", 23, 1],
Expand Down
18 changes: 14 additions & 4 deletions hbc/hbc74/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def parse(f):
structure = json.load(open(f"{basepath}/data/structure.json", "r"))

headerS = structure["header"]
functionHeaderS = structure["SmallFuncHeader"]
smallFunctionHeaderS = structure["SmallFuncHeader"]
functionHeaderS = structure["FuncHeader"]
stringTableEntryS = structure["SmallStringTableEntry"]
overflowStringTableEntryS = structure["OverflowStringTableEntry"]
stringStorageS = structure["StringStorage"]
Expand All @@ -37,11 +38,20 @@ def parse(f):

# Segment 2: Function Header
functionHeaders = []
for _ in range(header["functionCount"]):
for i in range(header["functionCount"]):
functionHeader = {}
for key in functionHeaderS:
functionHeader[key] = read(f, functionHeaderS[key])
for key in smallFunctionHeaderS:
functionHeader[key] = read(f, smallFunctionHeaderS[key])

if (functionHeader["flags"] >> 5) & 1:
saved_pos = f.tell()
large_offset = (functionHeader["infoOffset"] << 16 ) | functionHeader["offset"]
f.seek(large_offset)
for key in functionHeaderS:
functionHeader[key] = read(f, functionHeaderS[key])

f.seek(saved_pos)

functionHeaders.append(functionHeader)

obj["functionHeaders"] = functionHeaders
Expand Down
2 changes: 1 addition & 1 deletion hbc/hbc74/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def disassemble(inst):
if is_str:
oper_t = oper_t[:-2]

size, conv_to, conv_from = operand_type[oper_t]
size, conv_to, _ = operand_type[oper_t]
val = conv_to(inst[i:i+size])
r[1].append((oper_t, is_str, val))
i+=size
Expand Down
10 changes: 2 additions & 8 deletions hbctool.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,5 @@
from hbc import parseFromFile

def to_hasm():
pass

import hasm

if __name__ == "__main__":
hbc = parseFromFile(open("hbc/hbc74/example/index.android.bundle", "rb"))
functionName, paramCount, registerCount, symbolCount, inst, _ = hbc.getFunction(3819)


pass
Empty file added output/.gitkeep
Empty file.
6 changes: 6 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from hbc import parseFromFile
import hasm

if __name__ == "__main__":
hbc = parseFromFile(open("hbc/hbc74/example/index.android.bundle", "rb"))
hasm.dump(hbc, "output/test")

0 comments on commit c01b503

Please sign in to comment.