Skip to content

Commit

Permalink
Merge branch 'aarch64-blr' into 'main'
Browse files Browse the repository at this point in the history
add missing call operation for arm64

See merge request rewriting/ddisasm!1188
  • Loading branch information
aeflores committed Mar 8, 2024
2 parents b13d4d4 + ad0ced0 commit 9edfe9f
Show file tree
Hide file tree
Showing 7 changed files with 256 additions and 37 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
* No longer consider `_x86.get_pc_thunk*` functions as ABI-intrinsic; this
means `_copy` is not appended to the original symbol, and a symbol forwarding
entry is not created.
* Fix handling of BLR instruction in ARM64.
* Fix size access of LDR instruction in ARM64.
* Extend value_reg analysis to support memory loads using a register with
constant address.


# 1.8.0
Expand Down
14 changes: 14 additions & 0 deletions examples/arm64_asm_examples/ex_cfg/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CC=aarch64-linux-gnu-gcc
EXEC=qemu-aarch64 -L /usr/aarch64-linux-gnu

TARGETS=ex out.txt

.PHONY: all clean check
all: out.txt
check: out.txt
ex: src.s
$(CC) $^ -o $@
out.txt: ex
$(EXEC) $^ > $@
clean:
rm -f $(TARGETS) *.gtirb
78 changes: 78 additions & 0 deletions examples/arm64_asm_examples/ex_cfg/src.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Test different kinds of calls
.arch armv8-a
.text
.global f
.type f, %function
f:
stp x29, x30, [sp, -16]!
mov x29, sp
adrp x0, .message
add x0, x0, :lo12:.message
bl puts
nop
ldp x29, x30, [sp], 16
ret

# This function is called from 'call_indirect_offset' but only indirectly
# through the 'g_pointer' which is also accessed indirectly.
g:
stp x29, x30, [sp, -16]!
mov x29, sp
adrp x0, .message
add x0, x0, :lo12:.message
bl puts
nop
ldp x29, x30, [sp], 16
ret

.align 2
.global main
.type main, %function
main:
call_direct:
stp x29, x30, [sp, -16]!
mov x29, sp
bl f
call_direct_external:
adrp x0, .message
add x0, x0, :lo12:.message
bl puts

call_indirect:
adrp x0, f_pointer
add x0, x0, :lo12:f_pointer
ldr x0, [x0]
blr x0

call_indirect_offset:
adrp x0, f_pointer
add x0, x0, :lo12:f_pointer
ldr x0, [x0,#8]
blr x0

call_indirect_external:
adrp x0, .message
add x0, x0, :lo12:.message
adrp x1, puts_pointer
add x1, x1, :lo12:puts_pointer
ldr x1, [x1]
blr x1
final:
mov w0, 0
ldp x29, x30, [sp], 16
ret

.section .rodata
.align 3
.message:
.string "msg"

.section .data.rel.local,"aw"
.align 3

f_pointer:
.xword f
g_pointer:
.xword g
puts_pointer:
.xword puts
7 changes: 6 additions & 1 deletion examples/asm_examples/ex_cfg/ex_original.s
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ call_local_reg_offset_pc:
add rax, 8
call [rax]

call_local_reg_load:
lea rax, [rip+fun_ptr]
mov rax, [rax]
call rax

je_local_direct:
mov rdi, offset jmp_local_direct
cmp rdi, rdi
Expand All @@ -53,7 +58,7 @@ jmp_local_reg:
jmp rax

jmp_local_reg_offset:
mov rdi, offset call_ext_reg
mov rdi, offset call_ext_reg_printf
mov rax, offset jump_target_ptr-8
add rax, 8
jmp [rax]
Expand Down
4 changes: 4 additions & 0 deletions src/datalog/arch/arm64/arch_arm64.dl
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ logic_operation("BIC").
pointer_size(8).

call_operation("BL").
call_operation("BLR").

syscall_operation("SVC").

Expand All @@ -120,6 +121,9 @@ data_access_size(Operation,as(DestReg,register),Size):-
substr(Operation,0,5) = "LDRSH", Size = 2
).

data_access_size("LDR",as(Reg,register),Size):-
register_size_bytes(Reg, Size).

// ADD reg, reg, #imm
reg_arithmetic_operation(EA,Dst,Src,1,Immediate):-
instruction(EA,_,_,"ADD",Op1,Op2,Op3,0,_,_),
Expand Down
26 changes: 24 additions & 2 deletions src/datalog/value_analysis.dl
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ value_reg(EA,Reg,EA,"NONE",0,as(Val,number),1):-
arch.pc_relative_addr(EA,Reg,Val),
track_register(Reg).

// mov QWORD or DWORD from memory (constant address)
// Load QWORD or DWORD from memory using a simple_data_access_pattern
// That is, a data access that does not require value_reg to be computed.
value_reg(EA,Reg,EA,"NONE",0,Val,1):-
def_used_for_address(EA,Reg,_),
arch.load(EA,SrcOp,_,Reg,_,_,_,_),
Expand All @@ -191,6 +192,27 @@ value_reg(EA,Reg,EA,"NONE",0,Val,1):-
),
Val >= 0.

// Load QWORD or DWORD from memory where memory is accessed using a register
// but the register has a constant value.
value_reg(EA,Reg,EA,"NONE",0,Val,1):-
const_value_reg_used(EA,_,_,BaseReg,RegVal),
arch.load(EA,SrcOp,_,Reg,BaseReg,"NONE",_,Offset),
def_used_for_address(EA,Reg,_),
instruction_memory_access_size(EA,SrcOp,Size),

RegVal + Offset >= 0,
MemAddr = as(RegVal + Offset,address),
4 <= Size, Size <= 8,
(
data_word(MemAddr,Size,Val),
!symbolic_expr_from_relocation(MemAddr,_,_,_,_)
;
symbolic_expr_from_relocation(MemAddr,Size, Symbol, _, TargetAddr),
defined_symbol(_,_,_,_,_,_,_,_,Symbol),
Val = as(TargetAddr,number)
),
Val >= 0.

/**
Constant folding for bitwise operations (reg op reg)
*/
Expand Down Expand Up @@ -364,7 +386,7 @@ best_value_reg(EA,Reg,0,MaxMultiplier,Offset,"incomplete"):-
/**
A constant value is used in a data access.

EADef: The address where Value was orginally defined (which may not necessarily
EADef: The address where Value was originally defined (which may not necessarily
be where Reg is defined, if that value is moved to a different register later).

EARegDef: The address where Reg is defined as Value. Often EADef = EARegDef.
Expand Down
160 changes: 126 additions & 34 deletions tests/cfg_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,46 @@ def check_edges(
f"unexpected edges from {src}",
)

def check_plt_edges(
self,
module: gtirb.Module,
plt_calls: List[Tuple[str, EdgeLabel, EdgeLabel, str]],
) -> None:
"""
Check that each call represented in `plt_calls` has the right
sequences of edges that lead to the expected target.
Each element in `plt_call` is a tuple with a starting
symbol, two edge labels, and a target symbol.
"""
for src, edge_label1, edge_label2, tgt in plt_calls:
src_block = next(module.symbols_named(src)).referent
edges = [
edge
for edge in src_block.outgoing_edges
if edge.label == edge_label1
]
self.assertEqual(
len(edges),
1,
f"Expected one edge with label {edge_label1} from {src}",
)
plt_block = edges[0].target
self.assertEqual(plt_block.section.name, ".plt")
edges_plt = [
edge
for edge in plt_block.outgoing_edges
if edge.label == edge_label2
]
self.assertEqual(
len(edges_plt),
1,
f"Expected one edge with label {edge_label2} "
f"from block at {plt_block.address:0x} called from {src}",
)
tgt_block = edges_plt[0].target
self.assertIn(tgt, [s.name for s in tgt_block.references])

@unittest.skipUnless(
platform.system() == "Linux", "This test is linux only."
)
Expand Down Expand Up @@ -712,6 +752,13 @@ def test_jump_and_calls_bin(self):
),
],
"call_local_reg_offset_pc": [
("fun", EdgeLabel(EdgeType.Call, False, False)),
(
"call_local_reg_load",
EdgeLabel(EdgeType.Fallthrough, False, True),
),
],
"call_local_reg_load": [
("fun", EdgeLabel(EdgeType.Call, False, False)),
(
"je_local_direct",
Expand Down Expand Up @@ -758,52 +805,27 @@ def test_jump_and_calls_bin(self):
# For PLT calls, check that we can traverse a list of edges
# (passing through the PLT block) and end up in the right block
# (with the right symbol)
plt_calls = {
"call_ext_reg": (
plt_calls = [
(
"call_ext_reg",
EdgeLabel(EdgeType.Call, False, False),
EdgeLabel(EdgeType.Branch, False, False),
"puts",
),
"call_ext_indirect": (
(
"call_ext_indirect",
EdgeLabel(EdgeType.Call, False, False),
EdgeLabel(EdgeType.Branch, False, False),
"puts",
),
"call_ext_plt": (
(
"call_ext_plt",
EdgeLabel(EdgeType.Call, False, True),
EdgeLabel(EdgeType.Branch, False, False),
"puts",
),
}

for src, path in plt_calls.items():
edge_label1, edge_label2, tgt = path
src_block = next(m.symbols_named(src)).referent
edges = [
edge
for edge in src_block.outgoing_edges
if edge.label == edge_label1
]
self.assertEqual(
len(edges),
1,
f"Expected one edge with label {edge_label1} from {src}",
)
plt_block = edges[0].target
self.assertEqual(plt_block.section.name, ".plt")
edges_plt = [
edge
for edge in plt_block.outgoing_edges
if edge.label == edge_label2
]
self.assertEqual(
len(edges_plt),
1,
f"Expected one edge with label {edge_label2} "
f"from block at {plt_block.address:0x} called from {src}",
)
tgt_block = edges_plt[0].target
self.assertIn(tgt, [s.name for s in tgt_block.references])
]
self.check_plt_edges(m, plt_calls)

@unittest.skipUnless(
platform.system() == "Linux", "This test is linux only."
Expand Down Expand Up @@ -873,6 +895,13 @@ def test_jump_and_calls_object(self):
),
],
"call_local_reg_offset_pc": [
("fun", EdgeLabel(EdgeType.Call, False, False)),
(
"call_local_reg_load",
EdgeLabel(EdgeType.Fallthrough, False, True),
),
],
"call_local_reg_load": [
("fun", EdgeLabel(EdgeType.Call, False, False)),
(
"je_local_direct",
Expand Down Expand Up @@ -966,6 +995,69 @@ def test_pe_api_call(self):
for edge in incoming_edges:
self.assertEqual(edge.label.type, gtirb.EdgeType.Call)

@unittest.skipUnless(
platform.system() == "Linux", "This test is linux only."
)
def test_arm64_calls(self):
"""
Test different kinds of calls for arm64.
"""
binary = "ex"
ex_cfg_dir = ex_arm64_asm_dir / "ex_cfg"
with cd(ex_cfg_dir):
self.assertTrue(
compile(
"aarch64-linux-gnu-gcc", "aarch64-linux-gnu-g++", "-O0", []
)
)
self.assertTrue(disassemble(binary, format="--ir")[0])
ir = gtirb.IR.load_protobuf(binary + ".gtirb")
m = ir.modules[0]

# Check outgoing edges for each block.
# src and target blocks are identified with through their symbols.
expected_cfg = {
"call_direct": [
("f", EdgeLabel(EdgeType.Call, False, True)),
(
"call_direct_external",
EdgeLabel(EdgeType.Fallthrough, False, True),
),
],
"call_indirect": [
("f", EdgeLabel(EdgeType.Call, False, False)),
(
"call_indirect_offset",
EdgeLabel(EdgeType.Fallthrough, False, True),
),
],
"call_indirect_offset": [
("g", EdgeLabel(EdgeType.Call, False, False)),
(
"call_indirect_external",
EdgeLabel(EdgeType.Fallthrough, False, True),
),
],
"call_indirect_external": [
("puts", EdgeLabel(EdgeType.Call, False, False)),
(
"final",
EdgeLabel(EdgeType.Fallthrough, False, True),
),
],
}
self.check_edges(m, expected_cfg)

plt_calls = [
(
"call_direct_external",
EdgeLabel(EdgeType.Call, False, True),
EdgeLabel(EdgeType.Branch, False, False),
"puts",
)
]
self.check_plt_edges(m, plt_calls)


if __name__ == "__main__":
unittest.main()

0 comments on commit 9edfe9f

Please sign in to comment.