Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Update reverse operations to Python3 #17

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion reverse-sandbox/operation_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ def __eq__(self, other):
return self.raw == other.raw

def __hash__(self):
return struct.unpack('<I', ''.join([chr(v) for v in self.raw[:4]]))[0]
return struct.unpack('<I', b''.join([bytes([v]) for v in self.raw[:4]]))[0]


# Operation nodes processed so far.
Expand Down
20 changes: 10 additions & 10 deletions reverse-sandbox/regex_parser_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,11 @@ def parse_parantheses_close(node_type, node_arg, node_transition, node_idx):

def node_parse(re, i, regex_list, node_idx):
node_type = struct.unpack('>I',
''.join([chr(x) for x in re[i:i+4]]))[0]
b''.join([bytes([x]) for x in re[i:i+4]]))[0]
node_transition = struct.unpack('>I',
''.join([chr(x) for x in re[i+4:i+8]]))[0]
b''.join([bytes([x]) for x in re[i+4:i+8]]))[0]
node_arg = struct.unpack('>I',
''.join([chr(x) for x in re[i+8:i+12]]))[0]
b''.join([bytes([x]) for x in re[i+8:i+12]]))[0]
i += 12

logger.debug('node idx:{:#010x} type: {:#02x} arg: {:#010x}' \
Expand All @@ -136,10 +136,10 @@ def transform(x):
return c

class_size = struct.unpack('>I',
''.join([chr(x) for x in re[i:i+4]]))[0]
b''.join([bytes([x]) for x in re[i:i+4]]))[0]
i += 0x4
content = struct.unpack('>{}I'.format(class_size),
''.join([chr(x) for x in re[i:i+4*class_size]]))
b''.join([bytes([x]) for x in re[i:i+4*class_size]]))
i += 0x4 * class_size
assert(class_size % 2 == 0)

Expand All @@ -162,23 +162,23 @@ class RegexParser(object):
@staticmethod
def parse(re, i, regex_list):
node_count = struct.unpack('>I',
''.join([chr(x) for x in re[i:i+0x4]]))[0]
b''.join([bytes([x]) for x in re[i:i+0x4]]))[0]
logger.debug('node count = {:#x}'.format(node_count))

start_node = struct.unpack('>I',
''.join([chr(x) for x in re[i+0x4:i+0x8]]))[0]
b''.join([bytes([x]) for x in re[i+0x4:i+0x8]]))[0]
logger.debug('start node = {:#x}'.format(start_node))

end_node = struct.unpack('>I',
''.join([chr(x) for x in re[i+0x8:i+0xC]]))[0]
b''.join([bytes([x]) for x in re[i+0x8:i+0xC]]))[0]
logger.debug('end node = {:#x}'.format(end_node))

cclass_count = struct.unpack('>I',
''.join([chr(x) for x in re[i+0xC:i+0x10]]))[0]
b''.join([bytes([x]) for x in re[i+0xC:i+0x10]]))[0]
logger.debug('character class count = {:#x}'.format(cclass_count))

submatch_count = struct.unpack('>I',
''.join([chr(x) for x in re[i+0x10:i+0x14]]))[0]
b''.join([bytes([x]) for x in re[i+0x10:i+0x14]]))[0]
i += 0x14
logger.debug('submatch count = {:#x}'.format(submatch_count))

Expand Down
24 changes: 12 additions & 12 deletions reverse-sandbox/regex_parser_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,13 @@ def parse_parantheses_close(node_type, node_arg, node_transition, node_idx):

def node_parse(re, i, regex_list, node_idx):
node_type = struct.unpack('<B',
''.join([chr(x) for x in re[i:i+1]]))[0]
b''.join([bytes([x]) for x in re[i:i+1]]))[0]
node_transition = struct.unpack('<H',
''.join([chr(x) for x in re[i+1:i+3]]))[0]
b''.join([bytes([x]) for x in re[i+1:i+3]]))[0]
pad = struct.unpack('<B',
''.join([chr(x) for x in re[i+3:i+4]]))[0]
b''.join([bytes([x]) for x in re[i+3:i+4]]))[0]
node_arg = struct.unpack('<I',
''.join([chr(x) for x in re[i+4:i+8]]))[0]
b''.join([bytes([x]) for x in re[i+4:i+8]]))[0]
i += 8

logger.debug('node idx:{:#06x} type: {:#02x} arg: {:#010x}' \
Expand Down Expand Up @@ -156,17 +156,17 @@ def transform_content(content):
return

classes_magic, classes_size = struct.unpack('<II',
''.join([chr(x) for x in re[i:i+8]]))
b''.join([bytes([x]) for x in re[i:i+8]]))
i += 0x8
logger.debug('classes magic = {:#x} size = {:#x}'.format(
classes_magic, classes_size))
assert(len(re) - i == classes_size)
starts = struct.unpack('<{}I'.format(cclass_count),
''.join([chr(x) for x in re[i:i+4*cclass_count]]))
b''.join([bytes([x]) for x in re[i:i+4*cclass_count]]))
i += 0x4 * cclass_count

lens = struct.unpack('<{}B'.format(cclass_count),
''.join([chr(x) for x in re[i:i+cclass_count]]))
b''.join([bytes([x]) for x in re[i:i+cclass_count]]))
i += cclass_count

contents = [re[i+start:i+start+clen] for start, clen in zip(starts, lens)]
Expand All @@ -177,23 +177,23 @@ class RegexParser(object):
@staticmethod
def parse(re, i, regex_list):
magic = struct.unpack('<I',
''.join([chr(x) for x in re[i:i+0x4]]))[0]
b''.join([bytes([x]) for x in re[i:i+0x4]]))[0]
logger.debug('magic = {:#x}'.format(magic))

node_count = struct.unpack('<I',
''.join([chr(x) for x in re[i+0x4:i+0x8]]))[0]
b''.join([bytes([x]) for x in re[i+0x4:i+0x8]]))[0]
logger.debug('node count = {:#x}'.format(node_count))

start_node = struct.unpack('<I',
''.join([chr(x) for x in re[i+0x8:i+0xC]]))[0]
b''.join([bytes([x]) for x in re[i+0x8:i+0xC]]))[0]
logger.debug('start node = {:#x}'.format(start_node))

end_node = struct.unpack('<I',
''.join([chr(x) for x in re[i+0xC:i+0x10]]))[0]
b''.join([bytes([x]) for x in re[i+0xC:i+0x10]]))[0]
logger.debug('end node = {:#x}'.format(end_node))

cclass_count = struct.unpack('<I',
''.join([chr(x) for x in re[i+0x10:i+0x14]]))[0]
b''.join([bytes([x]) for x in re[i+0x10:i+0x14]]))[0]
logger.debug('character class count = {:#x}'.format(cclass_count))
i += 0x14

Expand Down
2 changes: 1 addition & 1 deletion reverse-sandbox/regex_parser_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ class RegexParser(object):

@staticmethod
def parse(re, i, regex_list):
length = struct.unpack('<H', ''.join([chr(x) for x in re[i:i+2]]))[0]
length = struct.unpack('<H', b''.join([bytes([x]) for x in re[i:i+2]]))[0]
logger.debug("re.length: 0x%x", length)
i += 2
assert(length == len(re)-i)
Expand Down
24 changes: 15 additions & 9 deletions reverse-sandbox/reverse_sandbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,12 @@ def extract_string_from_offset(f, offset, ios_version):
else:
f.seek(offset * 8)
len = struct.unpack("<I", f.read(4))[0]-1
return '%s' % f.read(len)
ret = f.read(len)
try:
ret = ret.decode()
except:
pass
return ret


def create_operation_nodes(infile, regex_list, num_operation_nodes,
Expand All @@ -43,10 +48,10 @@ def create_operation_nodes(infile, regex_list, num_operation_nodes,
operation_nodes = operation_node.build_operation_nodes(infile,
num_operation_nodes, ios_major_version)
logger.info("operation nodes")

for idx, node in enumerate(operation_nodes):
logger.info("%d: %s", idx, node.str_debug())

for n in operation_nodes:
n.convert_filter(sandbox_filter.convert_filter_callback, infile,
regex_list, ios_major_version, keep_builtin_filters,
Expand Down Expand Up @@ -211,7 +216,7 @@ def get_global_vars(f, vars_offset, num_vars, base_offset):
len = struct.unpack("<I", f.read(4))[0]
s = f.read(len-1)
global_vars.append(s)
logger.info("global variables are {:s}".format(", ".join(s for s in global_vars)))
logger.info("global variables are {:s}".format(", ".join(s.decode() for s in global_vars)))
return global_vars

def get_base_addr(f, ios_version):
Expand Down Expand Up @@ -304,7 +309,7 @@ def main():
re_table_offset = 12
else:
re_table_offset = struct.unpack("<H", f.read(2))[0]

if get_ios_major_version(args.release) >= 12:
f.seek(8)
re_table_count = struct.unpack("<H", f.read(2))[0]
Expand All @@ -319,7 +324,7 @@ def main():
f.seek(re_table_offset)
else:
f.seek(re_table_offset * 8)

re_offsets_table = struct.unpack("<%dH" % re_table_count, f.read(2 * re_table_count))
for offset in re_offsets_table:
if get_ios_major_version(args.release) >= 13:
Expand All @@ -328,7 +333,7 @@ def main():
else:
f.seek(offset * 8)
re_length = struct.unpack("<I", f.read(4))[0]

re = struct.unpack("<%dB" % re_length, f.read(re_length))
logger.debug("total_re_length: 0x%x", re_length)
re_debug_str = "re: [", ", ".join([hex(i) for i in re]), "]"
Expand Down Expand Up @@ -397,7 +402,7 @@ def main():
break
start = f.tell()
end = re_table_offset * 8
num_operation_nodes = (end - start) / 8
num_operation_nodes = (end - start) // 8
logger.info("number of operation nodes: %u" % num_operation_nodes)

operation_nodes = create_operation_nodes(f, regex_list,
Expand Down Expand Up @@ -489,7 +494,8 @@ def main():
break
start = f.tell()
end = re_table_offset * 8
num_operation_nodes = (end - start) / 8
# has to be int and not float
num_operation_nodes = (end - start) // 8
logger.info("number of operation nodes: %d ; start: %#x" % (num_operation_nodes, start))

operation_nodes = create_operation_nodes(f, regex_list,
Expand Down
21 changes: 14 additions & 7 deletions reverse-sandbox/reverse_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,14 @@ def get_length_minus_1(self):
def read_token(self, substr_len):
self.token_stack.append(self.token)
self.token = self.binary_string[self.pos:self.pos+substr_len]
logger.debug("got token \"{:s}\"".format(self.token))
logger.debug("got token \"{:s}\"".format(self.token.decode()))
self.pos += substr_len

def update_base(self):
self.base += self.token
if isinstance(self.token, bytes):
self.base += self.token.decode()
else:
self.base += self.token
self.token = ""
logger.debug("update base to \"{:s}\"".format(self.base))

Expand All @@ -168,14 +171,18 @@ def get_last_byte(self):

def get_substring(self, substr_len):
substr = self.binary_string[self.pos:self.pos+substr_len]
logger.debug(" ".join("0x{:02x}".format(ord(c)) for c in substr))
logger.debug(" ".join("0x{:02x}".format(c) for c in substr))
self.pos += substr_len
return substr

def end_with_subtokens(self, subtokens):
for s in subtokens:
self.output_strings.append(self.base+self.token+s)
logger.debug("output string with subtokens \"{:s}\"".format(self.base+self.token+s))
if isinstance(self.token, bytes):
self.output_strings.append(self.base+self.token.decode()+s)
logger.debug("output string with subtokens \"{:s}\"".format(self.base+self.token.decode()+s))
else:
self.output_strings.append(self.base+self.token+s)
logger.debug("output string with subtokens \"{:s}\"".format(self.base+self.token+s))
self.token = ""

def is_end(self):
Expand Down Expand Up @@ -227,7 +234,7 @@ def parse_byte_string(self, s, global_vars):
logger.debug("state is STATE_CONSTANT_READ")
b = rss.get_last_byte()
if b >= 0x10 and b < 0x3f:
rss.token = "${" + global_vars[b-0x10] + "}"
rss.token = b"${" + global_vars[b-0x10] + b"}"
b = rss.get_next_byte()
rss.update_state(b)
elif rss.state == rss.STATE_CONCAT_BYTE_READ:
Expand Down Expand Up @@ -335,7 +342,7 @@ def parse_byte_string(self, s, global_vars):
logger.warn("last state is not STATE_END_BYTE_READ ({:d})".format(rss.state))
logger.warn("previous state ({:d})".format(rss.state_stack[len(rss.state_stack)-1]))

logger.info("initial string: " + " ".join("0x{:02x}".format(ord(c)) for c in s))
logger.info("initial string: " + " ".join("0x{:02x}".format(c) for c in s))
logger.info("output_strings (num: {:d}): {:s}".format(len(rss.output_strings), ",".join('"{:s}"'.format(s) for s in rss.output_strings)))
return rss.output_strings

Expand Down
4 changes: 2 additions & 2 deletions reverse-sandbox/sandbox_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_filter_arg_string_by_offset(f, offset):
if ios_major_version >= 10:
f.seek(offset * 8)
s = f.read(4+len)
logger.info("binary string is " + s.encode("hex"))
logger.info("binary string is " + s.hex())
ss = reverse_string.SandboxString()
myss = ss.parse_byte_string(s[4:], global_vars)
actual_string = ""
Expand Down Expand Up @@ -72,7 +72,7 @@ def get_filter_arg_string_by_offset_with_type(f, offset):
if ios_major_version >= 10:
f.seek(base_addr + offset * 8)
s = f.read(4+len)
logger.info("binary string is " + s.encode("hex"))
logger.info("binary string is " + s.hex())
ss = reverse_string.SandboxString()
myss = ss.parse_byte_string(s[4:], global_vars)
append = "literal"
Expand Down
2 changes: 1 addition & 1 deletion reverse-sandbox/sandbox_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def create_regex_list(re):

regex_list = []

version = struct.unpack('>I', ''.join([chr(x) for x in re[:4]]))[0]
version = struct.unpack('>I', b''.join([bytes(chr(x), 'utf-8') for x in re[:4]]))[0]
logger.debug("re.version: 0x%x", version)

i = 4
Expand Down