forked from cryptax/droidlysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdroidutil.py
321 lines (261 loc) · 10.6 KB
/
droidutil.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
import os
import errno
import re
import shutil
import magic
import hashlib
from collections import defaultdict
"""Those are my own utilities for sample analysis"""
def mkdir_if_necessary(path):
"""
Creates the directory if it does not exist yet.
If it exists, does not do anything.
If path is None (not filled), does not do anything.
"""
if path is not None:
try:
os.makedirs(path)
except OSError as exc: # Python >2.5
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
def on_rm_tree_error(fn, path, exc_info):
"""
Error handler for ``shutil.rmtree``.
rmtree fails in particular if the file to delete is read-only.
to remove, we attempt to set all permissions and then retry.
Usage : ``shutil.rmtree(path, onerror=onerror)``
"""
if fn is os.rmdir:
os.chmod(path, 777)
os.rmdir(path)
elif fn is os.remove:
os.chmod(path, 777)
os.remove(path)
def move_dir(src, dst):
# Move src directory to dst - works even if dst already exists.
assert os.path.isdir(src), "src must be an existing directory"
os.system("mv" + " " + src + "/* " + dst)
shutil.rmtree(src, onerror=on_rm_tree_error)
def sanitize_filename(filename):
"""Sanitizes a filename so that we can create the output analysis directory without any problem.
We need to consider we might have filenames with Russian or Chinese characters.
filename is only the 'basename' not an absolute path
Returns the sanitized name."""
# we remove any character which is not letters, numbers, _ or .
return re.sub('[^a-zA-Z0-9_\.]', '', filename)
def listAll(dirName):
filelist1 = []
files = os.listdir(dirName)
for f in files:
if os.path.isfile(os.path.join(dirName, f)):
filelist1.append(os.path.join(dirName, f))
else:
newlist = listAll(os.path.join(dirName, f))
filelist1.extend(newlist)
return filelist1
def count_filedirs(dirname):
"""Counts the number of directories and files in a given directory. Counts recursively.
dirname must be readable.
Returns:
nb of directories
nb of files
This is somewhat the equivalent of: find ./smali -type d -print
or -type f
"""
assert os.access(dirname, os.R_OK), "Can't access directory: "+dirname
dirs = [name for name in os.listdir(dirname) if os.path.isdir(os.path.join(dirname, name))]
nb_dirs = len(dirs)
nb_files = len([name for name in os.listdir(dirname) if os.path.isfile(os.path.join(dirname, name))])
for element in dirs:
try:
element_dirs, element_files = count_filedirs(os.path.join(dirname, element))
except RuntimeError:
# occurs when too many recursive dir
element_dirs = 0
element_files = 0
nb_dirs += element_dirs
nb_files += element_files
return nb_dirs, nb_files
def sha256sum(input_file_name):
"""Computes the SHA256 hash of a binary file
Returns the digest string or '' if an error occurred reading the file"""
chunk_size = 1048576 # 1 MB
file_sha256 = hashlib.sha256()
try:
with open(input_file_name, "rb") as f:
byte = f.read(chunk_size)
while byte:
file_sha256.update(byte)
byte = f.read(chunk_size)
except IOError:
print('sha256sum: cannot open file: %s' % input_file_name)
return ''
return file_sha256.hexdigest()
def sha1sum(input_file_name):
"""
Computes the SHA1 hash of a binary file
Returns the digest string or '' if an error occurred reading the file
"""
chunk_size = 1048576 # 1 MB
file_sha1 = hashlib.sha1()
try:
with open(input_file_name, "rb") as f:
byte = f.read(chunk_size)
while byte:
file_sha1.update(byte)
byte = f.read(chunk_size)
except IOError:
print('sha1sum: cannot open file: %s' % input_file_name)
return ''
return file_sha1.hexdigest()
# -------------------------- File Constants -------------------------
"""Something else than the other file types. We do not support this file type."""
UNKNOWN = 0
"""An APK. It is not possible to differentiate a ZIP from an APK until we have looked inside the ZIP."""
APK = 1
"""A Dalvik Executable file. We do not check the file is valid/accepted by the verifier."""
DEX = 2
"""An ARM ELF executable."""
ARM = 3
"""A Java .class file"""
CLASS = 4
"""A Zip file. Actually, this can also be a JAR or an APK until we have thoroughly checked."""
ZIP = 5
"""A RARed file."""
RAR = 6
"""We can probably add some more later: TAR, TGZ, BZ2..."""
def str_filetype(filetype):
"""Provide as input a droidutil filetype (APK, DEX, ARM...) and returns the corresponding string"""
if filetype == APK:
return "APK"
if filetype == DEX:
return "DEX"
if filetype == ARM:
return "ARM"
if filetype == CLASS:
return "CLASS"
if filetype == ZIP:
return "ZIP"
if filetype == RAR:
return "RAR"
return "UNKNOWN"
def get_filetype(filename):
"""Returns an enumerate for the filetype corresponding to the given absolute filename.
This function does not open the file or unzip it.
It will return one of these:
droidutil.ZIP
droidutil.RAR
droidutil.ARM
droidutil.CLASS
droidutil.DEX
droidutil.UNKNOWN
"""
filetype = magic.from_file(filename)
if filetype is None:
# this happens if magic is unable to find file type
return UNKNOWN
match = re.search('Zip archive data|zip|RAR archive data|executable, ARM|'
'shared object, ARM|Java class|Dalvik dex|Java archive|Android package', filetype)
if match is None:
mytype = UNKNOWN
else:
typecase = {'Zip archive data': ZIP,
'zip': ZIP,
'Java archive': ZIP,
'RAR archive data': RAR,
'executable, ARM': ARM,
'shared object, ARM': ARM,
'Java class': CLASS,
'Dalvik dex': DEX,
'Android package': ZIP, # droidsample needs ZIP type to do all the processing
'None': UNKNOWN
}
mytype = typecase[match.group(0)]
return mytype
def get_elements(xmldoc, tag_name, attribute):
"""Returns a list of elements"""
l = []
for item in xmldoc.getElementsByTagName(tag_name):
value = item.getAttribute(attribute)
l.append(repr(value))
return l
def get_element(xmldoc, tag_name, attribute):
for item in xmldoc.getElementsByTagName(tag_name):
value = item.getAttribute(attribute)
if len(value) > 0:
return value
return None
"""Very simple exception to raise when we found something. For instance to break a loop."""
class Found(Exception):
pass
class matchresult:
"""Match information"""
def __init__(self, thefile, theline, thelineno):
"""Represents a match for a keyword.
Made of a filename and a line"""
self.file = thefile
self.line = theline
self.lineno = thelineno
def __repr__(self):
return 'file=%s lineno=%d line=%s' % (self.file, self.lineno, self.line)
def __str__(self):
return 'file=%s no=%4d line=%30s' % (self.file, self.lineno, self.line)
def recursive_search(search_regexp, directory, exception_list=[], verbose=False):
"""Recursively search in a directory except in some subdirectories
The exception list actually is a list of regexp for directories.
Returns a dictionary of list of matches:
match[ keyword ] = [ <'filename', 'matching line content', 'lineno'>,
<'filename', 'matching line content', 'lineno'>,
<'filename', 'matching line content', 'lineno'>, ]
We can only have one match per line. Otherwise, this won't work we should be using re.findall
"""
matches = defaultdict(list)
if verbose:
print("Searching in " + directory + " for " + search_regexp.decode('utf-8'))
print("Exceptions: %s" % (str(exception_list)))
for entry in os.listdir(directory):
current_entry = os.path.join(directory, entry)
try:
if os.path.isfile(current_entry):
for exception in exception_list:
# TO DO: not entirely sure we need 'match'? perhaps if it is a regexp?
# Remember that "exception" can be part of a path e.g. we want everything that matches blah/bloh
# then com/blah/bloh must match
match = re.search(exception, current_entry) # TO DO: not entirely sure that we need the match
if match is not None or exception in current_entry:
# skip this file
raise Found
# ok, this file must be searched
lineno = 0
for line in open(current_entry, 'rb'):
lineno += 1
match = re.search(search_regexp, line)
if match is not None:
if verbose:
print("Match: File: " + entry + " Keyword: " +
match.group(0).decode('utf-8', errors='replace') +
" Line: " + line.decode('utf-8', errors='replace'))
"""match.group(0) only provides one match per line if we need more,
re.search is not appropriate
and should be replaced by re.findall"""
matches[match.group(0).decode('utf-8', errors='replace')].append(matchresult(current_entry, line, lineno))
if os.path.isdir(current_entry):
for exception in exception_list:
match = re.search(exception, current_entry)
if match is not None:
# skip this directory
raise Found
# this directory is not in the exception list, we must search it recursively
try:
hismatches = recursive_search(search_regexp, current_entry, exception_list, verbose)
# merge in those results
for key in hismatches.keys():
matches[key].extend(hismatches[key])
except RuntimeError:
# we get this when there are too many recursive dirs
pass # next
except Found:
pass # go to next entry
return matches