Skip to content

Commit

Permalink
Merge branch 'indirect-main' into 'master'
Browse files Browse the repository at this point in the history
Adjust main inference for indirect load (master)

See merge request rewriting/ddisasm!728
  • Loading branch information
Kevin Warrick committed Jan 27, 2022
2 parents 93f45d0 + eeb5012 commit dfdf8ad
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 47 deletions.
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ else()
endif()

set(DATALOG_BASE_SOURCES
datalog/arch/arch.dl
datalog/binary/elf/elf_binaries.dl
datalog/binary/elf/exceptions.dl
datalog/binary/elf/relocations.dl
Expand All @@ -36,6 +37,7 @@ set(DATALOG_BASE_SOURCES
datalog/data_access_analysis.dl
datalog/empty_range.dl
datalog/basic_function_inference.dl
datalog/jump_tables.dl
datalog/main.dl
datalog/pointer_reatribution.dl
datalog/register_type_analysis.dl
Expand Down
18 changes: 17 additions & 1 deletion src/datalog/arch/arch.dl
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,23 @@ Arithmetic operation on two source registers
/**
The location and register where the address of main is loaded.
*/
.decl main_load_reg(EA:address,Reg:register)
.decl inferred_main_in_reg(EA:address,Reg:register)

/**
* Locate where main() is dispatched (i.e., where __libc_start_main is called.)
*/
.decl inferred_main_dispatch(EA:address) overridable

// Most runtimes dispatch main with the first "call" from the binary entry point.
inferred_main_dispatch(CallEA):-
do_infer_main_function(),
CallEA = min EA:{
start_function(Entry),
instruction(EA,_,_,CallOp,_,_,_,_,_,_),
arch.call_operation(CallOp),
code(EA),
EA >= Entry
}.

// ===========================================================================
// Registers
Expand Down
4 changes: 2 additions & 2 deletions src/datalog/arch/arm64/arch_arm64.dl
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ reg_reg_arithmetic_operation(EA,Reg2,Reg1,Reg2,1,0):-
reg_reg_arithmetic_operation(EA,Reg_def,Reg2,Reg1,1,0):-
reg_reg_arithmetic_operation(EA,Reg_def,Reg1,Reg2,1,0).

main_load_reg(Main_dispatch,"X0"):-
main_function_dispatch(Main_dispatch).
inferred_main_in_reg(EA,"X0"):-
inferred_main_dispatch(EA).

}
45 changes: 24 additions & 21 deletions src/datalog/arch/intel/arch_x86_32.dl
Original file line number Diff line number Diff line change
Expand Up @@ -23,35 +23,38 @@
#include "arch_x86.dl"

.comp X86_32 : X86 {

#include "registers_x86_32.dl"

pointer_size(4).

main_load_reg(PushEA,Reg):-
// x86 uses a unique pattern for finding candidates for
// main_function_dispatch, implemented inline here. On 32-bit x86, an
// additional function has been observed in _start, so we add an additional
// heuristic that checks for a consecutive HLT instruction.
Main_dispatch = min EA:{
inferred_main_in_reg(Push,Reg):-
inferred_main_dispatch(Call),
// Get the value pushed onto the stack as an argument (assumes cdecl).
next(Push,Call),
instruction_get_operation(Push,"PUSH"),
instruction_get_src_op(Push,_,Op),
(
op_indirect_contains_reg(Op,Reg);
op_regdirect_contains_reg(Op,Reg)
).

.override inferred_main_dispatch

// ELF X86-32 may have an additional function call in _start, so we add an
// additional heuristic that checks for a consecutive HLT instruction.
inferred_main_dispatch(CallEA):-
do_infer_main_function(),
CallEA = min EA:{
start_function(Entry),
instruction(HltEA,_,_,"HLT",_,_,_,_,_,_),
code(HltEA),
instruction(EA,_,_,CallOp,_,_,_,_,_,_),
arch.call_operation(CallOp),
code(EA),
next(EA,HltEA),
EA>=Entry
},

// Since we don't use main_function_dispatch(), we must explicitly check
// do_infer_main_function.
do_infer_main_function(),
instruction(HltEA,_,_,"HLT",_,_,_,_,_,_),
EA >= Entry
}.

// Assume cdecl calling convention.
// Get the value pushed onto the stack as an argument.
next(PushEA,Main_dispatch),
code(PushEA),
instruction(PushEA,_,_,"PUSH",PushOp,_,_,_,_,_),
op_regdirect_contains_reg(PushOp,Reg).
}

.decl get_pc_thunk(EA:address,Reg:register)
Expand Down Expand Up @@ -111,7 +114,7 @@ reg_has_got(EA_load,Reg_load):-
arch.frame_pointer(Reg_base),
reg_loaded_from_stack(EA_load,Reg_load,Reg_base,StackPos,StackFrame).

inferred_symbol_name(EA,"_GLOBAL_OFFSET_TABLE_","DEFAULT","NONE"):-
inferred_symbol_name(EA,"_GLOBAL_OFFSET_TABLE_","LOCAL","NONE"):-
!symbol(_,_,_,_,_,_,_,"_GLOBAL_OFFSET_TABLE_"),
got_reference_pointer(EA).

Expand Down
7 changes: 5 additions & 2 deletions src/datalog/arch/intel/arch_x86_64.dl
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@
#include "arch_x86.dl"

.comp X86_64 : X86 {

#include "registers_x86_64.dl"

pointer_size(8).

main_load_reg(Main_dispatch,"RDI"):-
main_function_dispatch(Main_dispatch).
inferred_main_in_reg(EA,"RDI"):-
inferred_main_dispatch(EA).

}

symbolic_operand_attribute(EA,Index,"PltRef"):-
Expand Down
31 changes: 12 additions & 19 deletions src/datalog/binary/elf/elf_binaries.dl
Original file line number Diff line number Diff line change
Expand Up @@ -255,36 +255,29 @@ do_infer_main_function():-
binary_format("ELF"),
!function_symbol(_,"main").

/**
* Locate where main() is dispatched (i.e., where __libc_start_main is called.)
*/
.decl main_function_dispatch(EA:address)

main_function_dispatch(CallEA):-
do_infer_main_function(),
CallEA = min EA:{
start_function(Entry),
instruction(EA,_,_,CallOp,_,_,_,_,_,_),
arch.call_operation(CallOp),
code(EA),
EA>=Entry
}.

/**
* Infer the location of the main function.
*/
.decl inferred_main_function(Main_location:address)

inferred_main_function(Main_location):-
arch.main_load_reg(Main_load, Reg),
arch.inferred_main_in_reg(Main_load,Reg),
def_used(EA_def,Reg,Main_load,_),
value_reg(EA_def,Reg,_,"NONE",_,Offset,_),
Main_location = as(Offset, address),
code(Main_location).

inferred_main_function(Main):-
arch.inferred_main_in_reg(EA,Reg),
instruction_get_op(EA,_,Op),
op_indirect_contains_reg(Op,Reg),
symbolic_operand(EA,_,Dest,"data"),
address_in_data(Dest,Main).

// Inform the use_def analysis that the register is used here as an address.
used(Main_load,Reg,0),
used_for_address(Main_load,Reg):-
arch.main_load_reg(Main_load, Reg).
used(EA,Reg,0),
used_for_address(EA,Reg):-
arch.inferred_main_in_reg(EA,Reg).

main_function(Main_location):-
inferred_main_function(Main_location).
Expand Down
16 changes: 16 additions & 0 deletions tests/linux-elf-x86.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ position-independent: &position-independent
flags: ["-m32", "-fpie"]
skip: false

strip: &strip
test:
strip_exe: "strip"
strip: true

tests:
# ----------------------------------------------------------------------------
# Small C and C++ examples.
Expand Down Expand Up @@ -222,3 +227,14 @@ tests:
reassemble:
compiler: "g++"
flags: ["-m32", "-lpthread", "-fpie"]

# ----------------------------------------------------------------------------
# Small C and C++ examples. (stripped)
# ----------------------------------------------------------------------------
- name: ex1
<<: *default
<<: *strip

- name: ex1
<<: *position-independent
<<: *strip
6 changes: 4 additions & 2 deletions tests/main_inference_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def get_main_address(self, module):
self.fail("No main symbol disassembled")

def check_main_inference(
self, make_dir, binary, strip_exe="strip", **compile_opts,
self, make_dir, binary, strip=False, strip_exe="strip", **compile_opts,
):
"""
Test that the main function is inferred in the same location for
Expand All @@ -35,7 +35,7 @@ def check_main_inference(
disassemble(
binary,
strip_exe,
False,
strip,
False,
format="--ir",
extension="gtirb",
Expand Down Expand Up @@ -80,6 +80,7 @@ def test_main_ex1(self):
continue # no ex1 in this .yaml.

arch = test.get("arch")
strip = test["test"].get("strip", False)
strip_exe = test["test"]["strip_exe"]
exec_wrapper = test["test"]["wrapper"]
compilers = test["build"]["c"]
Expand All @@ -95,6 +96,7 @@ def test_main_ex1(self):
self.check_main_inference(
ex_dir / "ex1",
"ex",
strip=strip,
strip_exe=strip_exe,
compiler=compiler,
cxx_compiler=cxx_compiler,
Expand Down

0 comments on commit dfdf8ad

Please sign in to comment.