diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b52f6bbe1..780a65e95 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -24,6 +24,7 @@ else() endif() set(DATALOG_BASE_SOURCES + datalog/arch/arch.dl datalog/binary/elf/elf_binaries.dl datalog/binary/elf/exceptions.dl datalog/binary/elf/relocations.dl @@ -36,6 +37,7 @@ set(DATALOG_BASE_SOURCES datalog/data_access_analysis.dl datalog/empty_range.dl datalog/basic_function_inference.dl + datalog/jump_tables.dl datalog/main.dl datalog/pointer_reatribution.dl datalog/register_type_analysis.dl diff --git a/src/datalog/arch/arch.dl b/src/datalog/arch/arch.dl index 0eebbcc17..ed5b5476f 100644 --- a/src/datalog/arch/arch.dl +++ b/src/datalog/arch/arch.dl @@ -180,7 +180,23 @@ Arithmetic operation on two source registers /** The location and register where the address of main is loaded. */ -.decl main_load_reg(EA:address,Reg:register) +.decl inferred_main_in_reg(EA:address,Reg:register) + +/** + * Locate where main() is dispatched (i.e., where __libc_start_main is called.) + */ +.decl inferred_main_dispatch(EA:address) overridable + +// Most runtimes dispatch main with the first "call" from the binary entry point. +inferred_main_dispatch(CallEA):- + do_infer_main_function(), + CallEA = min EA:{ + start_function(Entry), + instruction(EA,_,_,CallOp,_,_,_,_,_,_), + arch.call_operation(CallOp), + code(EA), + EA >= Entry + }. // =========================================================================== // Registers diff --git a/src/datalog/arch/arm64/arch_arm64.dl b/src/datalog/arch/arm64/arch_arm64.dl index a5b5b7b1c..e83ddda30 100644 --- a/src/datalog/arch/arm64/arch_arm64.dl +++ b/src/datalog/arch/arm64/arch_arm64.dl @@ -131,7 +131,7 @@ reg_reg_arithmetic_operation(EA,Reg2,Reg1,Reg2,1,0):- reg_reg_arithmetic_operation(EA,Reg_def,Reg2,Reg1,1,0):- reg_reg_arithmetic_operation(EA,Reg_def,Reg1,Reg2,1,0). -main_load_reg(Main_dispatch,"X0"):- - main_function_dispatch(Main_dispatch). +inferred_main_in_reg(EA,"X0"):- + inferred_main_dispatch(EA). } diff --git a/src/datalog/arch/intel/arch_x86_32.dl b/src/datalog/arch/intel/arch_x86_32.dl index a700978bc..3c7f71e3f 100644 --- a/src/datalog/arch/intel/arch_x86_32.dl +++ b/src/datalog/arch/intel/arch_x86_32.dl @@ -23,35 +23,38 @@ #include "arch_x86.dl" .comp X86_32 : X86 { + #include "registers_x86_32.dl" + pointer_size(4). -main_load_reg(PushEA,Reg):- - // x86 uses a unique pattern for finding candidates for - // main_function_dispatch, implemented inline here. On 32-bit x86, an - // additional function has been observed in _start, so we add an additional - // heuristic that checks for a consecutive HLT instruction. - Main_dispatch = min EA:{ +inferred_main_in_reg(Push,Reg):- + inferred_main_dispatch(Call), + // Get the value pushed onto the stack as an argument (assumes cdecl). + next(Push,Call), + instruction_get_operation(Push,"PUSH"), + instruction_get_src_op(Push,_,Op), + ( + op_indirect_contains_reg(Op,Reg); + op_regdirect_contains_reg(Op,Reg) + ). + +.override inferred_main_dispatch + +// ELF X86-32 may have an additional function call in _start, so we add an +// additional heuristic that checks for a consecutive HLT instruction. +inferred_main_dispatch(CallEA):- + do_infer_main_function(), + CallEA = min EA:{ start_function(Entry), - instruction(HltEA,_,_,"HLT",_,_,_,_,_,_), - code(HltEA), instruction(EA,_,_,CallOp,_,_,_,_,_,_), arch.call_operation(CallOp), code(EA), next(EA,HltEA), - EA>=Entry - }, - - // Since we don't use main_function_dispatch(), we must explicitly check - // do_infer_main_function. - do_infer_main_function(), + instruction(HltEA,_,_,"HLT",_,_,_,_,_,_), + EA >= Entry + }. - // Assume cdecl calling convention. - // Get the value pushed onto the stack as an argument. - next(PushEA,Main_dispatch), - code(PushEA), - instruction(PushEA,_,_,"PUSH",PushOp,_,_,_,_,_), - op_regdirect_contains_reg(PushOp,Reg). } .decl get_pc_thunk(EA:address,Reg:register) @@ -111,7 +114,7 @@ reg_has_got(EA_load,Reg_load):- arch.frame_pointer(Reg_base), reg_loaded_from_stack(EA_load,Reg_load,Reg_base,StackPos,StackFrame). -inferred_symbol_name(EA,"_GLOBAL_OFFSET_TABLE_","DEFAULT","NONE"):- +inferred_symbol_name(EA,"_GLOBAL_OFFSET_TABLE_","LOCAL","NONE"):- !symbol(_,_,_,_,_,_,_,"_GLOBAL_OFFSET_TABLE_"), got_reference_pointer(EA). diff --git a/src/datalog/arch/intel/arch_x86_64.dl b/src/datalog/arch/intel/arch_x86_64.dl index 2a67bd2dd..4e4d570c8 100644 --- a/src/datalog/arch/intel/arch_x86_64.dl +++ b/src/datalog/arch/intel/arch_x86_64.dl @@ -23,11 +23,14 @@ #include "arch_x86.dl" .comp X86_64 : X86 { + #include "registers_x86_64.dl" + pointer_size(8). -main_load_reg(Main_dispatch,"RDI"):- - main_function_dispatch(Main_dispatch). +inferred_main_in_reg(EA,"RDI"):- + inferred_main_dispatch(EA). + } symbolic_operand_attribute(EA,Index,"PltRef"):- diff --git a/src/datalog/binary/elf/elf_binaries.dl b/src/datalog/binary/elf/elf_binaries.dl index c33d5dd09..9493577f4 100644 --- a/src/datalog/binary/elf/elf_binaries.dl +++ b/src/datalog/binary/elf/elf_binaries.dl @@ -255,36 +255,29 @@ do_infer_main_function():- binary_format("ELF"), !function_symbol(_,"main"). -/** - * Locate where main() is dispatched (i.e., where __libc_start_main is called.) - */ -.decl main_function_dispatch(EA:address) - -main_function_dispatch(CallEA):- - do_infer_main_function(), - CallEA = min EA:{ - start_function(Entry), - instruction(EA,_,_,CallOp,_,_,_,_,_,_), - arch.call_operation(CallOp), - code(EA), - EA>=Entry - }. - /** * Infer the location of the main function. */ .decl inferred_main_function(Main_location:address) + inferred_main_function(Main_location):- - arch.main_load_reg(Main_load, Reg), + arch.inferred_main_in_reg(Main_load,Reg), def_used(EA_def,Reg,Main_load,_), value_reg(EA_def,Reg,_,"NONE",_,Offset,_), Main_location = as(Offset, address), code(Main_location). +inferred_main_function(Main):- + arch.inferred_main_in_reg(EA,Reg), + instruction_get_op(EA,_,Op), + op_indirect_contains_reg(Op,Reg), + symbolic_operand(EA,_,Dest,"data"), + address_in_data(Dest,Main). + // Inform the use_def analysis that the register is used here as an address. -used(Main_load,Reg,0), -used_for_address(Main_load,Reg):- - arch.main_load_reg(Main_load, Reg). +used(EA,Reg,0), +used_for_address(EA,Reg):- + arch.inferred_main_in_reg(EA,Reg). main_function(Main_location):- inferred_main_function(Main_location). diff --git a/tests/linux-elf-x86.yaml b/tests/linux-elf-x86.yaml index 8f631ac8b..22f0a3ba0 100644 --- a/tests/linux-elf-x86.yaml +++ b/tests/linux-elf-x86.yaml @@ -40,6 +40,11 @@ position-independent: &position-independent flags: ["-m32", "-fpie"] skip: false +strip: &strip + test: + strip_exe: "strip" + strip: true + tests: # ---------------------------------------------------------------------------- # Small C and C++ examples. @@ -222,3 +227,14 @@ tests: reassemble: compiler: "g++" flags: ["-m32", "-lpthread", "-fpie"] + + # ---------------------------------------------------------------------------- + # Small C and C++ examples. (stripped) + # ---------------------------------------------------------------------------- + - name: ex1 + <<: *default + <<: *strip + + - name: ex1 + <<: *position-independent + <<: *strip diff --git a/tests/main_inference_test.py b/tests/main_inference_test.py index 8ab94105e..13f0add82 100644 --- a/tests/main_inference_test.py +++ b/tests/main_inference_test.py @@ -21,7 +21,7 @@ def get_main_address(self, module): self.fail("No main symbol disassembled") def check_main_inference( - self, make_dir, binary, strip_exe="strip", **compile_opts, + self, make_dir, binary, strip=False, strip_exe="strip", **compile_opts, ): """ Test that the main function is inferred in the same location for @@ -35,7 +35,7 @@ def check_main_inference( disassemble( binary, strip_exe, - False, + strip, False, format="--ir", extension="gtirb", @@ -80,6 +80,7 @@ def test_main_ex1(self): continue # no ex1 in this .yaml. arch = test.get("arch") + strip = test["test"].get("strip", False) strip_exe = test["test"]["strip_exe"] exec_wrapper = test["test"]["wrapper"] compilers = test["build"]["c"] @@ -95,6 +96,7 @@ def test_main_ex1(self): self.check_main_inference( ex_dir / "ex1", "ex", + strip=strip, strip_exe=strip_exe, compiler=compiler, cxx_compiler=cxx_compiler,