From 34f96bb981d736d27ac8f4042762204a7c42df98 Mon Sep 17 00:00:00 2001 From: Kevin Warrick Date: Wed, 26 Jan 2022 12:39:22 -0500 Subject: [PATCH 1/7] Adjust main inference for indirect load --- src/datalog/arch/arch.dl | 2 +- src/datalog/arch/arm64/arch_arm64.dl | 4 +-- src/datalog/arch/intel/arch_x86_32.dl | 38 ++++++++------------------ src/datalog/arch/intel/arch_x86_64.dl | 7 +++-- src/datalog/binary/elf/elf_binaries.dl | 38 ++++++++++++++++++++++---- 5 files changed, 51 insertions(+), 38 deletions(-) diff --git a/src/datalog/arch/arch.dl b/src/datalog/arch/arch.dl index 0eebbcc17..aa95224c1 100644 --- a/src/datalog/arch/arch.dl +++ b/src/datalog/arch/arch.dl @@ -180,7 +180,7 @@ Arithmetic operation on two source registers /** The location and register where the address of main is loaded. */ -.decl main_load_reg(EA:address,Reg:register) +.decl inferred_main_reg(EA:address,Reg:register) // =========================================================================== // Registers diff --git a/src/datalog/arch/arm64/arch_arm64.dl b/src/datalog/arch/arm64/arch_arm64.dl index a5b5b7b1c..c5dddf230 100644 --- a/src/datalog/arch/arm64/arch_arm64.dl +++ b/src/datalog/arch/arm64/arch_arm64.dl @@ -131,7 +131,7 @@ reg_reg_arithmetic_operation(EA,Reg2,Reg1,Reg2,1,0):- reg_reg_arithmetic_operation(EA,Reg_def,Reg2,Reg1,1,0):- reg_reg_arithmetic_operation(EA,Reg_def,Reg1,Reg2,1,0). -main_load_reg(Main_dispatch,"X0"):- - main_function_dispatch(Main_dispatch). +inferred_main_reg(EA,"X0"):- + inferred_main_dispatch(EA). } diff --git a/src/datalog/arch/intel/arch_x86_32.dl b/src/datalog/arch/intel/arch_x86_32.dl index a700978bc..6d8d0afcc 100644 --- a/src/datalog/arch/intel/arch_x86_32.dl +++ b/src/datalog/arch/intel/arch_x86_32.dl @@ -26,32 +26,16 @@ #include "registers_x86_32.dl" pointer_size(4). -main_load_reg(PushEA,Reg):- - // x86 uses a unique pattern for finding candidates for - // main_function_dispatch, implemented inline here. On 32-bit x86, an - // additional function has been observed in _start, so we add an additional - // heuristic that checks for a consecutive HLT instruction. - Main_dispatch = min EA:{ - start_function(Entry), - instruction(HltEA,_,_,"HLT",_,_,_,_,_,_), - code(HltEA), - instruction(EA,_,_,CallOp,_,_,_,_,_,_), - arch.call_operation(CallOp), - code(EA), - next(EA,HltEA), - EA>=Entry - }, - - // Since we don't use main_function_dispatch(), we must explicitly check - // do_infer_main_function. - do_infer_main_function(), - - // Assume cdecl calling convention. - // Get the value pushed onto the stack as an argument. - next(PushEA,Main_dispatch), - code(PushEA), - instruction(PushEA,_,_,"PUSH",PushOp,_,_,_,_,_), - op_regdirect_contains_reg(PushOp,Reg). +inferred_main_reg(Push,Reg):- + inferred_main_dispatch(Call), + // Get the value pushed onto the stack as an argument (assumes cdecl). + next(Push,Call), + instruction_get_operation(Push,"PUSH"), + instruction_get_src_op(Push,_,Op), + ( + op_indirect_contains_reg(Op,Reg); + op_regdirect_contains_reg(Op,Reg) + ). } .decl get_pc_thunk(EA:address,Reg:register) @@ -111,7 +95,7 @@ reg_has_got(EA_load,Reg_load):- arch.frame_pointer(Reg_base), reg_loaded_from_stack(EA_load,Reg_load,Reg_base,StackPos,StackFrame). -inferred_symbol_name(EA,"_GLOBAL_OFFSET_TABLE_","DEFAULT","NONE"):- +inferred_symbol_name(EA,"_GLOBAL_OFFSET_TABLE_","LOCAL","NONE"):- !symbol(_,_,_,_,_,_,_,"_GLOBAL_OFFSET_TABLE_"), got_reference_pointer(EA). diff --git a/src/datalog/arch/intel/arch_x86_64.dl b/src/datalog/arch/intel/arch_x86_64.dl index 2a67bd2dd..7f337e072 100644 --- a/src/datalog/arch/intel/arch_x86_64.dl +++ b/src/datalog/arch/intel/arch_x86_64.dl @@ -23,11 +23,14 @@ #include "arch_x86.dl" .comp X86_64 : X86 { + #include "registers_x86_64.dl" + pointer_size(8). -main_load_reg(Main_dispatch,"RDI"):- - main_function_dispatch(Main_dispatch). +inferred_main_reg(EA,"RDI"):- + inferred_main_dispatch(EA). + } symbolic_operand_attribute(EA,Index,"PltRef"):- diff --git a/src/datalog/binary/elf/elf_binaries.dl b/src/datalog/binary/elf/elf_binaries.dl index c33d5dd09..0303272fd 100644 --- a/src/datalog/binary/elf/elf_binaries.dl +++ b/src/datalog/binary/elf/elf_binaries.dl @@ -258,33 +258,59 @@ do_infer_main_function():- /** * Locate where main() is dispatched (i.e., where __libc_start_main is called.) */ -.decl main_function_dispatch(EA:address) +.decl inferred_main_dispatch(EA:address) +.output inferred_main_dispatch -main_function_dispatch(CallEA):- +// Most runtimes dispatch main with the first "call" from the binary entry point. +inferred_main_dispatch(EA):- + !binary_isa("X86"), do_infer_main_function(), - CallEA = min EA:{ + EA = min EA:{ start_function(Entry), instruction(EA,_,_,CallOp,_,_,_,_,_,_), arch.call_operation(CallOp), code(EA), - EA>=Entry + EA >= Entry }. +// ELF X86-32 may have an additional function call in _start, so we add an +// additional heuristic that checks for a consecutive HLT instruction. +inferred_main_dispatch(EA):- + binary_isa("X86"), + do_infer_main_function(), + EA = min EA:{ + start_function(Entry), + instruction(EA,_,_,CallOp,_,_,_,_,_,_), + arch.call_operation(CallOp), + code(EA), + next(EA,HltEA), + instruction(HltEA,_,_,"HLT",_,_,_,_,_,_), + EA >= Entry + }. + /** * Infer the location of the main function. */ .decl inferred_main_function(Main_location:address) + inferred_main_function(Main_location):- - arch.main_load_reg(Main_load, Reg), + arch.inferred_main_reg(Main_load,Reg), def_used(EA_def,Reg,Main_load,_), value_reg(EA_def,Reg,_,"NONE",_,Offset,_), Main_location = as(Offset, address), code(Main_location). +inferred_main_function(Main):- + arch.inferred_main_reg(EA,Reg), + instruction_get_op(EA,_,Op), + op_indirect_contains_reg(Op,Reg), + symbolic_operand(EA,_,Dest,"data"), + address_in_data(Dest,Main). + // Inform the use_def analysis that the register is used here as an address. used(Main_load,Reg,0), used_for_address(Main_load,Reg):- - arch.main_load_reg(Main_load, Reg). + arch.inferred_main_reg(Main_load, Reg). main_function(Main_location):- inferred_main_function(Main_location). From 1be9884fa49e7ae11420654098599495245a6220 Mon Sep 17 00:00:00 2001 From: Kevin Warrick Date: Wed, 26 Jan 2022 12:40:11 -0500 Subject: [PATCH 2/7] Add stripped test --- tests/linux-elf-x86.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/linux-elf-x86.yaml b/tests/linux-elf-x86.yaml index 8f631ac8b..22f0a3ba0 100644 --- a/tests/linux-elf-x86.yaml +++ b/tests/linux-elf-x86.yaml @@ -40,6 +40,11 @@ position-independent: &position-independent flags: ["-m32", "-fpie"] skip: false +strip: &strip + test: + strip_exe: "strip" + strip: true + tests: # ---------------------------------------------------------------------------- # Small C and C++ examples. @@ -222,3 +227,14 @@ tests: reassemble: compiler: "g++" flags: ["-m32", "-lpthread", "-fpie"] + + # ---------------------------------------------------------------------------- + # Small C and C++ examples. (stripped) + # ---------------------------------------------------------------------------- + - name: ex1 + <<: *default + <<: *strip + + - name: ex1 + <<: *position-independent + <<: *strip From 3f4960ed4d6cf4d5faee5ec3668b05cb1883a886 Mon Sep 17 00:00:00 2001 From: Kevin Warrick Date: Wed, 26 Jan 2022 12:52:26 -0500 Subject: [PATCH 3/7] Tidy --- src/datalog/binary/elf/elf_binaries.dl | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/datalog/binary/elf/elf_binaries.dl b/src/datalog/binary/elf/elf_binaries.dl index 0303272fd..471582374 100644 --- a/src/datalog/binary/elf/elf_binaries.dl +++ b/src/datalog/binary/elf/elf_binaries.dl @@ -259,13 +259,12 @@ do_infer_main_function():- * Locate where main() is dispatched (i.e., where __libc_start_main is called.) */ .decl inferred_main_dispatch(EA:address) -.output inferred_main_dispatch // Most runtimes dispatch main with the first "call" from the binary entry point. -inferred_main_dispatch(EA):- +inferred_main_dispatch(CallEA):- !binary_isa("X86"), do_infer_main_function(), - EA = min EA:{ + CallEA = min EA:{ start_function(Entry), instruction(EA,_,_,CallOp,_,_,_,_,_,_), arch.call_operation(CallOp), @@ -275,10 +274,10 @@ inferred_main_dispatch(EA):- // ELF X86-32 may have an additional function call in _start, so we add an // additional heuristic that checks for a consecutive HLT instruction. -inferred_main_dispatch(EA):- +inferred_main_dispatch(CallEA):- binary_isa("X86"), do_infer_main_function(), - EA = min EA:{ + CallEA = min EA:{ start_function(Entry), instruction(EA,_,_,CallOp,_,_,_,_,_,_), arch.call_operation(CallOp), @@ -308,9 +307,9 @@ inferred_main_function(Main):- address_in_data(Dest,Main). // Inform the use_def analysis that the register is used here as an address. -used(Main_load,Reg,0), -used_for_address(Main_load,Reg):- - arch.inferred_main_reg(Main_load, Reg). +used(EA,Reg,0), +used_for_address(EA,Reg):- + arch.inferred_main_reg(EA,Reg). main_function(Main_location):- inferred_main_function(Main_location). From a931b9666e9113fef6f5f2c83c2f6f11354ccc21 Mon Sep 17 00:00:00 2001 From: Kevin Warrick Date: Wed, 26 Jan 2022 13:38:47 -0500 Subject: [PATCH 4/7] Add strip for main inference tests --- tests/main_inference_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/main_inference_test.py b/tests/main_inference_test.py index 8ab94105e..13f0add82 100644 --- a/tests/main_inference_test.py +++ b/tests/main_inference_test.py @@ -21,7 +21,7 @@ def get_main_address(self, module): self.fail("No main symbol disassembled") def check_main_inference( - self, make_dir, binary, strip_exe="strip", **compile_opts, + self, make_dir, binary, strip=False, strip_exe="strip", **compile_opts, ): """ Test that the main function is inferred in the same location for @@ -35,7 +35,7 @@ def check_main_inference( disassemble( binary, strip_exe, - False, + strip, False, format="--ir", extension="gtirb", @@ -80,6 +80,7 @@ def test_main_ex1(self): continue # no ex1 in this .yaml. arch = test.get("arch") + strip = test["test"].get("strip", False) strip_exe = test["test"]["strip_exe"] exec_wrapper = test["test"]["wrapper"] compilers = test["build"]["c"] @@ -95,6 +96,7 @@ def test_main_ex1(self): self.check_main_inference( ex_dir / "ex1", "ex", + strip=strip, strip_exe=strip_exe, compiler=compiler, cxx_compiler=cxx_compiler, From 1dcbe751a252a4d2efa7841fb880457be317e1eb Mon Sep 17 00:00:00 2001 From: Kevin Warrick Date: Thu, 27 Jan 2022 11:44:40 -0500 Subject: [PATCH 5/7] Improved predicate name --- src/datalog/arch/arch.dl | 2 +- src/datalog/arch/arm64/arch_arm64.dl | 2 +- src/datalog/arch/intel/arch_x86_32.dl | 2 +- src/datalog/arch/intel/arch_x86_64.dl | 2 +- src/datalog/binary/elf/elf_binaries.dl | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/datalog/arch/arch.dl b/src/datalog/arch/arch.dl index aa95224c1..e73dd4659 100644 --- a/src/datalog/arch/arch.dl +++ b/src/datalog/arch/arch.dl @@ -180,7 +180,7 @@ Arithmetic operation on two source registers /** The location and register where the address of main is loaded. */ -.decl inferred_main_reg(EA:address,Reg:register) +.decl inferred_main_in_reg(EA:address,Reg:register) // =========================================================================== // Registers diff --git a/src/datalog/arch/arm64/arch_arm64.dl b/src/datalog/arch/arm64/arch_arm64.dl index c5dddf230..e83ddda30 100644 --- a/src/datalog/arch/arm64/arch_arm64.dl +++ b/src/datalog/arch/arm64/arch_arm64.dl @@ -131,7 +131,7 @@ reg_reg_arithmetic_operation(EA,Reg2,Reg1,Reg2,1,0):- reg_reg_arithmetic_operation(EA,Reg_def,Reg2,Reg1,1,0):- reg_reg_arithmetic_operation(EA,Reg_def,Reg1,Reg2,1,0). -inferred_main_reg(EA,"X0"):- +inferred_main_in_reg(EA,"X0"):- inferred_main_dispatch(EA). } diff --git a/src/datalog/arch/intel/arch_x86_32.dl b/src/datalog/arch/intel/arch_x86_32.dl index 6d8d0afcc..72c4d71a8 100644 --- a/src/datalog/arch/intel/arch_x86_32.dl +++ b/src/datalog/arch/intel/arch_x86_32.dl @@ -26,7 +26,7 @@ #include "registers_x86_32.dl" pointer_size(4). -inferred_main_reg(Push,Reg):- +inferred_main_in_reg(Push,Reg):- inferred_main_dispatch(Call), // Get the value pushed onto the stack as an argument (assumes cdecl). next(Push,Call), diff --git a/src/datalog/arch/intel/arch_x86_64.dl b/src/datalog/arch/intel/arch_x86_64.dl index 7f337e072..4e4d570c8 100644 --- a/src/datalog/arch/intel/arch_x86_64.dl +++ b/src/datalog/arch/intel/arch_x86_64.dl @@ -28,7 +28,7 @@ pointer_size(8). -inferred_main_reg(EA,"RDI"):- +inferred_main_in_reg(EA,"RDI"):- inferred_main_dispatch(EA). } diff --git a/src/datalog/binary/elf/elf_binaries.dl b/src/datalog/binary/elf/elf_binaries.dl index 471582374..21c98acd2 100644 --- a/src/datalog/binary/elf/elf_binaries.dl +++ b/src/datalog/binary/elf/elf_binaries.dl @@ -293,14 +293,14 @@ inferred_main_dispatch(CallEA):- .decl inferred_main_function(Main_location:address) inferred_main_function(Main_location):- - arch.inferred_main_reg(Main_load,Reg), + arch.inferred_main_in_reg(Main_load,Reg), def_used(EA_def,Reg,Main_load,_), value_reg(EA_def,Reg,_,"NONE",_,Offset,_), Main_location = as(Offset, address), code(Main_location). inferred_main_function(Main):- - arch.inferred_main_reg(EA,Reg), + arch.inferred_main_in_reg(EA,Reg), instruction_get_op(EA,_,Op), op_indirect_contains_reg(Op,Reg), symbolic_operand(EA,_,Dest,"data"), @@ -309,7 +309,7 @@ inferred_main_function(Main):- // Inform the use_def analysis that the register is used here as an address. used(EA,Reg,0), used_for_address(EA,Reg):- - arch.inferred_main_reg(EA,Reg). + arch.inferred_main_in_reg(EA,Reg). main_function(Main_location):- inferred_main_function(Main_location). From 58732ca5d0415c63f82533c59283c33099d3083c Mon Sep 17 00:00:00 2001 From: Kevin Warrick Date: Wed, 26 Jan 2022 21:22:29 -0500 Subject: [PATCH 6/7] Use overridable relation --- src/CMakeLists.txt | 1 + src/datalog/arch/arch.dl | 16 +++++++++++++ src/datalog/arch/intel/arch_x86_32.dl | 19 +++++++++++++++ src/datalog/binary/elf/elf_binaries.dl | 32 -------------------------- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b52f6bbe1..5598503c7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -24,6 +24,7 @@ else() endif() set(DATALOG_BASE_SOURCES + datalog/arch/arch.dl datalog/binary/elf/elf_binaries.dl datalog/binary/elf/exceptions.dl datalog/binary/elf/relocations.dl diff --git a/src/datalog/arch/arch.dl b/src/datalog/arch/arch.dl index e73dd4659..ed5b5476f 100644 --- a/src/datalog/arch/arch.dl +++ b/src/datalog/arch/arch.dl @@ -182,6 +182,22 @@ The location and register where the address of main is loaded. */ .decl inferred_main_in_reg(EA:address,Reg:register) +/** + * Locate where main() is dispatched (i.e., where __libc_start_main is called.) + */ +.decl inferred_main_dispatch(EA:address) overridable + +// Most runtimes dispatch main with the first "call" from the binary entry point. +inferred_main_dispatch(CallEA):- + do_infer_main_function(), + CallEA = min EA:{ + start_function(Entry), + instruction(EA,_,_,CallOp,_,_,_,_,_,_), + arch.call_operation(CallOp), + code(EA), + EA >= Entry + }. + // =========================================================================== // Registers // =========================================================================== diff --git a/src/datalog/arch/intel/arch_x86_32.dl b/src/datalog/arch/intel/arch_x86_32.dl index 72c4d71a8..3c7f71e3f 100644 --- a/src/datalog/arch/intel/arch_x86_32.dl +++ b/src/datalog/arch/intel/arch_x86_32.dl @@ -23,7 +23,9 @@ #include "arch_x86.dl" .comp X86_32 : X86 { + #include "registers_x86_32.dl" + pointer_size(4). inferred_main_in_reg(Push,Reg):- @@ -36,6 +38,23 @@ inferred_main_in_reg(Push,Reg):- op_indirect_contains_reg(Op,Reg); op_regdirect_contains_reg(Op,Reg) ). + +.override inferred_main_dispatch + +// ELF X86-32 may have an additional function call in _start, so we add an +// additional heuristic that checks for a consecutive HLT instruction. +inferred_main_dispatch(CallEA):- + do_infer_main_function(), + CallEA = min EA:{ + start_function(Entry), + instruction(EA,_,_,CallOp,_,_,_,_,_,_), + arch.call_operation(CallOp), + code(EA), + next(EA,HltEA), + instruction(HltEA,_,_,"HLT",_,_,_,_,_,_), + EA >= Entry + }. + } .decl get_pc_thunk(EA:address,Reg:register) diff --git a/src/datalog/binary/elf/elf_binaries.dl b/src/datalog/binary/elf/elf_binaries.dl index 21c98acd2..9493577f4 100644 --- a/src/datalog/binary/elf/elf_binaries.dl +++ b/src/datalog/binary/elf/elf_binaries.dl @@ -255,38 +255,6 @@ do_infer_main_function():- binary_format("ELF"), !function_symbol(_,"main"). -/** - * Locate where main() is dispatched (i.e., where __libc_start_main is called.) - */ -.decl inferred_main_dispatch(EA:address) - -// Most runtimes dispatch main with the first "call" from the binary entry point. -inferred_main_dispatch(CallEA):- - !binary_isa("X86"), - do_infer_main_function(), - CallEA = min EA:{ - start_function(Entry), - instruction(EA,_,_,CallOp,_,_,_,_,_,_), - arch.call_operation(CallOp), - code(EA), - EA >= Entry - }. - -// ELF X86-32 may have an additional function call in _start, so we add an -// additional heuristic that checks for a consecutive HLT instruction. -inferred_main_dispatch(CallEA):- - binary_isa("X86"), - do_infer_main_function(), - CallEA = min EA:{ - start_function(Entry), - instruction(EA,_,_,CallOp,_,_,_,_,_,_), - arch.call_operation(CallOp), - code(EA), - next(EA,HltEA), - instruction(HltEA,_,_,"HLT",_,_,_,_,_,_), - EA >= Entry - }. - /** * Infer the location of the main function. */ From eeb50129b4a906df57b3961da266c08721e1f642 Mon Sep 17 00:00:00 2001 From: Kevin Warrick Date: Thu, 27 Jan 2022 10:30:57 -0500 Subject: [PATCH 7/7] Add source to cmake config --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5598503c7..780a65e95 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -37,6 +37,7 @@ set(DATALOG_BASE_SOURCES datalog/data_access_analysis.dl datalog/empty_range.dl datalog/basic_function_inference.dl + datalog/jump_tables.dl datalog/main.dl datalog/pointer_reatribution.dl datalog/register_type_analysis.dl