Skip to content

Commit

Permalink
feat: Update reverse operations to Python3
Browse files Browse the repository at this point in the history
Up until now all reverse operations needed `Python2` to run. This lead
to a user needing to have 2 different versions of Python installed on their
system. Now, only `Python3` is required to run all `Sandblaster` operations.

Signed-off-by: David Bors <[email protected]>
  • Loading branch information
davidxbors committed Mar 8, 2023
1 parent 6a5cffd commit aca1685
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 43 deletions.
2 changes: 1 addition & 1 deletion reverse-sandbox/operation_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ def __eq__(self, other):
return self.raw == other.raw

def __hash__(self):
return struct.unpack('<I', ''.join([chr(v) for v in self.raw[:4]]))[0]
return struct.unpack('<I', b''.join([bytes([v]) for v in self.raw[:4]]))[0]


# Operation nodes processed so far.
Expand Down
20 changes: 10 additions & 10 deletions reverse-sandbox/regex_parser_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,11 @@ def parse_parantheses_close(node_type, node_arg, node_transition, node_idx):

def node_parse(re, i, regex_list, node_idx):
node_type = struct.unpack('>I',
''.join([chr(x) for x in re[i:i+4]]))[0]
b''.join([bytes([x]) for x in re[i:i+4]]))[0]
node_transition = struct.unpack('>I',
''.join([chr(x) for x in re[i+4:i+8]]))[0]
b''.join([bytes([x]) for x in re[i+4:i+8]]))[0]
node_arg = struct.unpack('>I',
''.join([chr(x) for x in re[i+8:i+12]]))[0]
b''.join([bytes([x]) for x in re[i+8:i+12]]))[0]
i += 12

logger.debug('node idx:{:#010x} type: {:#02x} arg: {:#010x}' \
Expand All @@ -136,10 +136,10 @@ def transform(x):
return c

class_size = struct.unpack('>I',
''.join([chr(x) for x in re[i:i+4]]))[0]
b''.join([bytes([x]) for x in re[i:i+4]]))[0]
i += 0x4
content = struct.unpack('>{}I'.format(class_size),
''.join([chr(x) for x in re[i:i+4*class_size]]))
b''.join([bytes([x]) for x in re[i:i+4*class_size]]))
i += 0x4 * class_size
assert(class_size % 2 == 0)

Expand All @@ -162,23 +162,23 @@ class RegexParser(object):
@staticmethod
def parse(re, i, regex_list):
node_count = struct.unpack('>I',
''.join([chr(x) for x in re[i:i+0x4]]))[0]
b''.join([bytes([x]) for x in re[i:i+0x4]]))[0]
logger.debug('node count = {:#x}'.format(node_count))

start_node = struct.unpack('>I',
''.join([chr(x) for x in re[i+0x4:i+0x8]]))[0]
b''.join([bytes([x]) for x in re[i+0x4:i+0x8]]))[0]
logger.debug('start node = {:#x}'.format(start_node))

end_node = struct.unpack('>I',
''.join([chr(x) for x in re[i+0x8:i+0xC]]))[0]
b''.join([bytes([x]) for x in re[i+0x8:i+0xC]]))[0]
logger.debug('end node = {:#x}'.format(end_node))

cclass_count = struct.unpack('>I',
''.join([chr(x) for x in re[i+0xC:i+0x10]]))[0]
b''.join([bytes([x]) for x in re[i+0xC:i+0x10]]))[0]
logger.debug('character class count = {:#x}'.format(cclass_count))

submatch_count = struct.unpack('>I',
''.join([chr(x) for x in re[i+0x10:i+0x14]]))[0]
b''.join([bytes([x]) for x in re[i+0x10:i+0x14]]))[0]
i += 0x14
logger.debug('submatch count = {:#x}'.format(submatch_count))

Expand Down
24 changes: 12 additions & 12 deletions reverse-sandbox/regex_parser_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,13 @@ def parse_parantheses_close(node_type, node_arg, node_transition, node_idx):

def node_parse(re, i, regex_list, node_idx):
node_type = struct.unpack('<B',
''.join([chr(x) for x in re[i:i+1]]))[0]
b''.join([bytes([x]) for x in re[i:i+1]]))[0]
node_transition = struct.unpack('<H',
''.join([chr(x) for x in re[i+1:i+3]]))[0]
b''.join([bytes([x]) for x in re[i+1:i+3]]))[0]
pad = struct.unpack('<B',
''.join([chr(x) for x in re[i+3:i+4]]))[0]
b''.join([bytes([x]) for x in re[i+3:i+4]]))[0]
node_arg = struct.unpack('<I',
''.join([chr(x) for x in re[i+4:i+8]]))[0]
b''.join([bytes([x]) for x in re[i+4:i+8]]))[0]
i += 8

logger.debug('node idx:{:#06x} type: {:#02x} arg: {:#010x}' \
Expand Down Expand Up @@ -156,17 +156,17 @@ def transform_content(content):
return

classes_magic, classes_size = struct.unpack('<II',
''.join([chr(x) for x in re[i:i+8]]))
b''.join([bytes([x]) for x in re[i:i+8]]))
i += 0x8
logger.debug('classes magic = {:#x} size = {:#x}'.format(
classes_magic, classes_size))
assert(len(re) - i == classes_size)
starts = struct.unpack('<{}I'.format(cclass_count),
''.join([chr(x) for x in re[i:i+4*cclass_count]]))
b''.join([bytes([x]) for x in re[i:i+4*cclass_count]]))
i += 0x4 * cclass_count

lens = struct.unpack('<{}B'.format(cclass_count),
''.join([chr(x) for x in re[i:i+cclass_count]]))
b''.join([bytes([x]) for x in re[i:i+cclass_count]]))
i += cclass_count

contents = [re[i+start:i+start+clen] for start, clen in zip(starts, lens)]
Expand All @@ -177,23 +177,23 @@ class RegexParser(object):
@staticmethod
def parse(re, i, regex_list):
magic = struct.unpack('<I',
''.join([chr(x) for x in re[i:i+0x4]]))[0]
b''.join([bytes([x]) for x in re[i:i+0x4]]))[0]
logger.debug('magic = {:#x}'.format(magic))

node_count = struct.unpack('<I',
''.join([chr(x) for x in re[i+0x4:i+0x8]]))[0]
b''.join([bytes([x]) for x in re[i+0x4:i+0x8]]))[0]
logger.debug('node count = {:#x}'.format(node_count))

start_node = struct.unpack('<I',
''.join([chr(x) for x in re[i+0x8:i+0xC]]))[0]
b''.join([bytes([x]) for x in re[i+0x8:i+0xC]]))[0]
logger.debug('start node = {:#x}'.format(start_node))

end_node = struct.unpack('<I',
''.join([chr(x) for x in re[i+0xC:i+0x10]]))[0]
b''.join([bytes([x]) for x in re[i+0xC:i+0x10]]))[0]
logger.debug('end node = {:#x}'.format(end_node))

cclass_count = struct.unpack('<I',
''.join([chr(x) for x in re[i+0x10:i+0x14]]))[0]
b''.join([bytes([x]) for x in re[i+0x10:i+0x14]]))[0]
logger.debug('character class count = {:#x}'.format(cclass_count))
i += 0x14

Expand Down
2 changes: 1 addition & 1 deletion reverse-sandbox/regex_parser_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class RegexParser(object):

@staticmethod
def parse(re, i, regex_list):
length = struct.unpack('<H', ''.join([chr(x) for x in re[i:i+2]]))[0]
length = struct.unpack('<H', b''.join([bytes([x]) for x in re[i:i+2]]))[0]
logger.debug("re.length: 0x%x", length)
i += 2
assert(length == len(re)-i)
Expand Down
24 changes: 15 additions & 9 deletions reverse-sandbox/reverse_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,12 @@ def extract_string_from_offset(f, offset, ios_version):
else:
f.seek(offset * 8)
len = struct.unpack("<I", f.read(4))[0]-1
return '%s' % f.read(len)
ret = f.read(len)
try:
ret = ret.decode()
except:
pass
return ret


def create_operation_nodes(infile, regex_list, num_operation_nodes,
Expand All @@ -43,10 +48,10 @@ def create_operation_nodes(infile, regex_list, num_operation_nodes,
operation_nodes = operation_node.build_operation_nodes(infile,
num_operation_nodes, ios_major_version)
logger.info("operation nodes")

for idx, node in enumerate(operation_nodes):
logger.info("%d: %s", idx, node.str_debug())

for n in operation_nodes:
n.convert_filter(sandbox_filter.convert_filter_callback, infile,
regex_list, ios_major_version, keep_builtin_filters,
Expand Down Expand Up @@ -211,7 +216,7 @@ def get_global_vars(f, vars_offset, num_vars, base_offset):
len = struct.unpack("<I", f.read(4))[0]
s = f.read(len-1)
global_vars.append(s)
logger.info("global variables are {:s}".format(", ".join(s for s in global_vars)))
logger.info("global variables are {:s}".format(", ".join(s.decode() for s in global_vars)))
return global_vars

def get_base_addr(f, ios_version):
Expand Down Expand Up @@ -304,7 +309,7 @@ def main():
re_table_offset = 12
else:
re_table_offset = struct.unpack("<H", f.read(2))[0]

if get_ios_major_version(args.release) >= 12:
f.seek(8)
re_table_count = struct.unpack("<H", f.read(2))[0]
Expand All @@ -319,7 +324,7 @@ def main():
f.seek(re_table_offset)
else:
f.seek(re_table_offset * 8)

re_offsets_table = struct.unpack("<%dH" % re_table_count, f.read(2 * re_table_count))
for offset in re_offsets_table:
if get_ios_major_version(args.release) >= 13:
Expand All @@ -328,7 +333,7 @@ def main():
else:
f.seek(offset * 8)
re_length = struct.unpack("<I", f.read(4))[0]

re = struct.unpack("<%dB" % re_length, f.read(re_length))
logger.debug("total_re_length: 0x%x", re_length)
re_debug_str = "re: [", ", ".join([hex(i) for i in re]), "]"
Expand Down Expand Up @@ -397,7 +402,7 @@ def main():
break
start = f.tell()
end = re_table_offset * 8
num_operation_nodes = (end - start) / 8
num_operation_nodes = (end - start) // 8
logger.info("number of operation nodes: %u" % num_operation_nodes)

operation_nodes = create_operation_nodes(f, regex_list,
Expand Down Expand Up @@ -489,7 +494,8 @@ def main():
break
start = f.tell()
end = re_table_offset * 8
num_operation_nodes = (end - start) / 8
# has to be int and not float
num_operation_nodes = (end - start) // 8
logger.info("number of operation nodes: %d ; start: %#x" % (num_operation_nodes, start))

operation_nodes = create_operation_nodes(f, regex_list,
Expand Down
21 changes: 14 additions & 7 deletions reverse-sandbox/reverse_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,14 @@ def get_length_minus_1(self):
def read_token(self, substr_len):
self.token_stack.append(self.token)
self.token = self.binary_string[self.pos:self.pos+substr_len]
logger.debug("got token \"{:s}\"".format(self.token))
logger.debug("got token \"{:s}\"".format(self.token.decode()))
self.pos += substr_len

def update_base(self):
self.base += self.token
if isinstance(self.token, bytes):
self.base += self.token.decode()
else:
self.base += self.token
self.token = ""
logger.debug("update base to \"{:s}\"".format(self.base))

Expand All @@ -168,14 +171,18 @@ def get_last_byte(self):

def get_substring(self, substr_len):
substr = self.binary_string[self.pos:self.pos+substr_len]
logger.debug(" ".join("0x{:02x}".format(ord(c)) for c in substr))
logger.debug(" ".join("0x{:02x}".format(c) for c in substr))
self.pos += substr_len
return substr

def end_with_subtokens(self, subtokens):
for s in subtokens:
self.output_strings.append(self.base+self.token+s)
logger.debug("output string with subtokens \"{:s}\"".format(self.base+self.token+s))
if isinstance(self.token, bytes):
self.output_strings.append(self.base+self.token.decode()+s)
logger.debug("output string with subtokens \"{:s}\"".format(self.base+self.token.decode()+s))
else:
self.output_strings.append(self.base+self.token+s)
logger.debug("output string with subtokens \"{:s}\"".format(self.base+self.token+s))
self.token = ""

def is_end(self):
Expand Down Expand Up @@ -227,7 +234,7 @@ def parse_byte_string(self, s, global_vars):
logger.debug("state is STATE_CONSTANT_READ")
b = rss.get_last_byte()
if b >= 0x10 and b < 0x3f:
rss.token = "${" + global_vars[b-0x10] + "}"
rss.token = b"${" + global_vars[b-0x10] + b"}"
b = rss.get_next_byte()
rss.update_state(b)
elif rss.state == rss.STATE_CONCAT_BYTE_READ:
Expand Down Expand Up @@ -335,7 +342,7 @@ def parse_byte_string(self, s, global_vars):
logger.warn("last state is not STATE_END_BYTE_READ ({:d})".format(rss.state))
logger.warn("previous state ({:d})".format(rss.state_stack[len(rss.state_stack)-1]))

logger.info("initial string: " + " ".join("0x{:02x}".format(ord(c)) for c in s))
logger.info("initial string: " + " ".join("0x{:02x}".format(c) for c in s))
logger.info("output_strings (num: {:d}): {:s}".format(len(rss.output_strings), ",".join('"{:s}"'.format(s) for s in rss.output_strings)))
return rss.output_strings

Expand Down
4 changes: 2 additions & 2 deletions reverse-sandbox/sandbox_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_filter_arg_string_by_offset(f, offset):
if ios_major_version >= 10:
f.seek(offset * 8)
s = f.read(4+len)
logger.info("binary string is " + s.encode("hex"))
logger.info("binary string is " + s.hex())
ss = reverse_string.SandboxString()
myss = ss.parse_byte_string(s[4:], global_vars)
actual_string = ""
Expand Down Expand Up @@ -72,7 +72,7 @@ def get_filter_arg_string_by_offset_with_type(f, offset):
if ios_major_version >= 10:
f.seek(base_addr + offset * 8)
s = f.read(4+len)
logger.info("binary string is " + s.encode("hex"))
logger.info("binary string is " + s.hex())
ss = reverse_string.SandboxString()
myss = ss.parse_byte_string(s[4:], global_vars)
append = "literal"
Expand Down
2 changes: 1 addition & 1 deletion reverse-sandbox/sandbox_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def create_regex_list(re):

regex_list = []

version = struct.unpack('>I', ''.join([chr(x) for x in re[:4]]))[0]
version = struct.unpack('>I', b''.join([bytes(chr(x), 'utf-8') for x in re[:4]]))[0]
logger.debug("re.version: 0x%x", version)

i = 4
Expand Down

0 comments on commit aca1685

Please sign in to comment.