Skip to content

Commit

Permalink
Merge branch 'improve-stack-analysis' into 'main'
Browse files Browse the repository at this point in the history
Improvements in stack def-use and value analysis

See merge request rewriting/ddisasm!1180
  • Loading branch information
aeflores committed Feb 1, 2024
2 parents 2edd8e8 + f987065 commit 0aa97b3
Show file tree
Hide file tree
Showing 15 changed files with 732 additions and 17 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# 1.8.1 (Unreleased)

* Improve def-use and value-reg stack analysis to consider push and pop
instructions. These changes also fix a couple of bugs in the stack variable
propagation.
* Update LIEF to 0.13.2
* No longer consider `_x86.get_pc_thunk*` functions as ABI-intrinsic; this
means `_copy` is not appended to the original symbol, and a symbol forwarding
entry is not created.


# 1.8.0

* Prefer LOCAL symbols over GLOBAL ones when selecting symbols for symbolic
Expand Down
13 changes: 13 additions & 0 deletions examples/asm_examples/ex_stack_value_reg/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
.PHONY: all clean check
all: out.txt

out.txt: ex
@./$^ > $@
ex: src.s
gcc $^ -o $@

clean:
rm -f ex out.txt
check:
./ex > /tmp/res.txt
@ diff out.txt /tmp/res.txt && echo TEST OK
106 changes: 106 additions & 0 deletions examples/asm_examples/ex_stack_value_reg/src.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@

# Patterns for moving values through the stack

.align 16
.globl main
.type main, @function
main:
push_pop:
pushq %rbp
movq %rsp, %rbp
leaq .hello_ptr(%rip), %rdx
push %rdx
xor %rdx, %rdx
pop %rdx
movq (%rdx), %rdi
call puts@PLT

# Push several immediates to the stack
# and pop them later

nested_push_imm:
leaq .aaa(%rip), %rdx
pushw $2
pushq $4
pushq %rdx
xor %rdx, %rdx

popq %rdi
pop_4:
popq %rsi
movb $0x42, (%rsi,%rdi)
call puts@PLT

leaq .aaa(%rip), %rdi
xor %rax,%rax
pop_2:
popw %ax
movb $0x43, (%rdi,%rax)
call puts@PLT

# Push several immediates to the stack
# and read them later without popping them

push_load:

leaq .aaa(%rip), %rdx
pushq $2
pushw $4
pushq %rdx
xor %rdx, %rdx

movq (%rsp),%rdi
xor %rsi,%rsi
read_4:
movw 8(%rsp),%si
movb $0x44, (%rsi,%rdi)
call puts@PLT

leaq .aaa(%rip), %rdi
read_2:
movq 10(%rsp),%rsi
movb $0x45, (%rsi,%rdi)
call puts@PLT

add $18, %rsp

# Push some registers to the stack
# and read them later

push_regs:
leaq .aaa(%rip), %rdx
mov $1, %rax
pushq %rax
mov $3, %ax
pushw %ax
pushq %rdx
xor %rdx, %rdx

movq (%rsp),%rdi
xor %rsi,%rsi
read_3:
movw 8(%rsp),%si
movb $0x44, (%rsi,%rdi)
call puts@PLT

leaq .aaa(%rip), %rdi
read_1:
movq 10(%rsp),%rsi
movb $0x45, (%rsi,%rdi)
call puts@PLT

add $18, %rsp
xor %rax,%rax
popq %rbp
ret



.data

.hello_ptr:
.quad .hello
.hello:
.string "hello\n"
.aaa:
.string "AAAAAAAAAAAAAA\n"
13 changes: 13 additions & 0 deletions examples/x86_32_asm_examples/ex_stack_value_reg/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
.PHONY: all clean check
all: out.txt

out.txt: ex
@./$^ > $@
ex: src.s
gcc $^ -pie -m32 -o $@

clean:
rm -f ex out.txt
check:
./ex > /tmp/res.txt
@ diff out.txt /tmp/res.txt && echo TEST OK
134 changes: 134 additions & 0 deletions examples/x86_32_asm_examples/ex_stack_value_reg/src.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@

# Patterns for moving values through the stack

.align 16
.globl main
.type main, @function
main:
pushl %ebp
movl %esp, %ebp


call __x86.get_pc_thunk.bx
addl $_GLOBAL_OFFSET_TABLE_, %ebx
leal .hello_ptr@GOTOFF(%ebx), %edx
push %edx
xor %edx, %edx
pop %edx
movl (%edx), %edi

sub $12, %esp
push %edi
call puts@PLT
add $16, %esp

# Push several immediates to the stack
# and pop them later

nested_push_imm:
leal .aaa@GOTOFF(%ebx), %edx
pushw $2
pushl $4
pushl %edx
xor %edx, %edx

popl %edi
pop_4:
popl %esi
movb $0x42, (%esi,%edi)

push %edi
call puts@PLT
add $4, %esp

leal .aaa@GOTOFF(%ebx), %edi
xor %eax,%eax
pop_2:
popw %ax
movb $0x43, (%edi,%eax)

push %edi
call puts@PLT
add $4, %esp

# Push several immediates to the stack
# and read them later without popping them

push_load:

leal .aaa@GOTOFF(%ebx), %edx
pushl $2
pushw $4
pushl %edx
xor %edx, %edx

movl (%esp),%edi
xor %esi,%esi
read_4:
movw 4(%esp),%si
movb $0x44, (%esi,%edi)

push %edi
call puts@PLT
add $4, %esp

leal .aaa@GOTOFF(%ebx), %edi
read_2:
movl 6(%esp),%esi
movb $0x45, (%esi,%edi)


push %edi
call puts@PLT
add $4, %esp

add $10, %esp

# Push some registers to the stack
# and read them later

push_regs:
leal .aaa@GOTOFF(%ebx), %edx
mov $1, %eax
pushl %eax
mov $3, %ax
pushw %ax
pushl %edx
xor %edx, %edx

movl (%esp),%edi
xor %esi,%esi
read_3:
movw 4(%esp),%si
movb $0x44, (%esi,%edi)

push %edi
call puts@PLT
add $4, %esp

leal .aaa@GOTOFF(%ebx), %edi
read_1:
movl 6(%esp),%esi
movb $0x45, (%esi,%edi)

push %edi
call puts@PLT
add $4, %esp

add $10, %esp
xor %eax,%eax

popl %ebp
end:
ret



.data

.hello_ptr:
.long .hello
.hello:
.string "hello\n"
.aaa:
.string "AAAAAAAAAAAAAA\n"
16 changes: 15 additions & 1 deletion src/datalog/arch/arch.dl
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ AccesSize: The size of the entire consecutive memory that the given
memory_access_aggregated("",0,"","",0,0,0):- false.

/**
Represents a load or store.
Represents a load or store of a register.

The address of the accessed memory is of the form:

Expand Down Expand Up @@ -316,6 +316,20 @@ See arch.memory_access().
store(EA,SrcOp,DstOp,SrcReg,BaseReg,IndexReg,Mult,Offset):-
memory_access("STORE",EA,SrcOp,DstOp,SrcReg,BaseReg,IndexReg,Mult,Offset).

/**
Represents the store of an immediate to memory.
The address of the written memory is: BaseReg + IndexReg * Mult + Offset.
The immediate stored to memory is Immediate.

This is not instantiatied for all architectures but it is defined here
to maintain a uniform interface. E.g. It is possible in x86 but
not in ARM.
*/
.decl store_immediate(EA:address,SrcOp:operand_index,DstOp:operand_index,
Immediate:number,BaseReg:reg_nullable,IndexReg:reg_nullable,Mult:number,Offset:number)

store_immediate(0,0,0,0,"NONE","NONE",0,0):- false.

.decl delay_slot(BranchEA:address,EA:address)

delay_slot(0,0):-
Expand Down
21 changes: 21 additions & 0 deletions src/datalog/arch/intel/arch_x86.dl
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,27 @@ reg_arithmetic_operation(EA,Reg,Reg_origin_nn,1,Offset):-
reg_nonnull(Reg_origin,Reg_origin_nn),
op_regdirect_contains_reg(Op2,Reg).

// push and pop modify the stack pointer
reg_arithmetic_operation(EA,StackReg,StackReg,1,Offset):-
arch.stack_pointer(StackReg),
instruction(EA,_,_,OpCode,Op,0,0,0,_,_),
(
OpCode = "PUSH",
Sign = -1
;
OpCode = "POP",
Sign = 1
),
(
op_regdirect(Op,Reg),
arch.register_size_bytes(Reg,SizeBytes)
;
op_indirect(Op,_,_,_,_,_,SizeBytes)
;
op_immediate(Op,_,SizeBytes)
),
Offset = Sign * as(SizeBytes,number).

reg_reg_arithmetic_operation(EA,Reg2,Reg2,Reg1,-1,0):-
instruction(EA,_,_,"SUB",Op1,Op2,0,0,_,_),
op_regdirect_contains_reg(Op1,Reg1),
Expand Down
25 changes: 25 additions & 0 deletions src/datalog/arch/intel/memory_access.dl
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,33 @@ memory_access("LOAD",EA,1,2,DstReg,BaseReg,IndexReg,Mult,Offset):-
op_indirect_mapped(SrcOp,"NONE",BaseReg,IndexReg,Mult,Offset,_),
op_regdirect_contains_reg(DestOp,DstReg).

// We consider the offset w.r.t the stack pointer BEFORE it has been updated.
memory_access("LOAD",EA,0,1,DstReg,StackReg,"NONE",0,0):-
instruction(EA,_,_,"POP",Op,0,0,0,_,_),
stack_pointer(StackReg),
op_regdirect_contains_reg(Op,DstReg).

memory_access("STORE",EA,1,2,SrcReg,BaseReg,IndexReg,Mult,Offset):-
instruction(EA,_,_,Operation,SrcOp,DestOp,0,0,_,_),
move_operation(Operation),
op_indirect_mapped(DestOp,"NONE",BaseReg,IndexReg,Mult,Offset,_),
op_regdirect_contains_reg(SrcOp,SrcReg).

// We consider the offset w.r.t the stack pointer AFTER it has been updated.
memory_access("STORE",EA,1,0,SrcReg,StackReg,"NONE",0,0):-
instruction(EA,_,_,"PUSH",Op,0,0,0,_,_),
stack_pointer(StackReg),
op_regdirect_contains_reg(Op,SrcReg).


store_immediate(EA,1,2,Immediate,BaseReg,IndexReg,Mult,Offset):-
instruction(EA,_,_,Operation,SrcOp,DestOp,0,0,_,_),
move_operation(Operation),
op_indirect_mapped(DestOp,"NONE",BaseReg,IndexReg,Mult,Offset,_),
op_immediate(SrcOp,Immediate,_).

// We consider the offset w.r.t the stack pointer AFTER it has been updated.
store_immediate(EA,1,2,Immediate,StackReg,"NONE",0,0):-
instruction(EA,_,_,"PUSH",SrcOp,0,0,0,_,_),
stack_pointer(StackReg),
op_immediate(SrcOp,Immediate,_).
5 changes: 4 additions & 1 deletion src/datalog/arch/intel/registers_x86_64.dl
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,14 @@ This module stablishes basic information about register:
- Whether they all callee or caller saved according to the ABI

*/

reg_map_rule("AL","RAX").
reg_map_rule("BL","RBX").
reg_map_rule("CL","RCX").
reg_map_rule("DL","RDX").
reg_map_rule("AX","RAX").
reg_map_rule("BX","RBX").
reg_map_rule("CX","RCX").
reg_map_rule("DX","RDX").
reg_map_rule("DI","RDI").
reg_map_rule("SI","RSI").
reg_map_rule("BP","RBP").
Expand Down
Loading

0 comments on commit 0aa97b3

Please sign in to comment.