Skip to content

Commit

Permalink
Merge branch 'junghee/reorg-arm-master' into 'master'
Browse files Browse the repository at this point in the history
Reorganize arm_binaries.dl

See merge request rewriting/ddisasm!722
  • Loading branch information
aeflores committed Jan 20, 2022
2 parents 46f96aa + e6dd37c commit 93f45d0
Show file tree
Hide file tree
Showing 3 changed files with 279 additions and 898 deletions.
1 change: 0 additions & 1 deletion src/datalog/arch/arm64/jump_operations.dl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ jump_operation("CBNZ").

unconditional_jump_operation("B").
unconditional_jump_operation("BR").
unconditional_jump_operation("BX").

jump_equal_operation("B.EQ").

Expand Down
279 changes: 279 additions & 0 deletions src/datalog/arm64_binaries.dl
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,289 @@
// endorsement should be inferred.
//
//===----------------------------------------------------------------------===//
/**
Define a set predicates to abstract from ARM64 specific instruction set
and characteristics
*/

basic_target(Val):-
defined_symbol(Val,_,_,_,_,_,_,"$x").

/**
Compute an immediate load performed across two consecutive instructions
*/
.decl split_load(ea:address, nextea:address, dest:address, type:symbol)
.output split_load

/** E.g.,
adrp x0, L1
add x0, x0, :lo12:L1
*/
split_load(EA, NextEA, as(Base + Offset,address), "ADD") :-
// ADRP <Register> <Immediate>
// e.g. adrp x0, BaseOp
instruction_get_operation(EA, "ADRP"),
instruction_get_op(EA, 1, BaseOp),
op_immediate(BaseOp, Base),
instruction_get_op(EA, 2, RegOp),
op_regdirect_contains_reg(RegOp,Reg),

// NOTE: Can't use def_used due to cyclic negation issue.
// This is a heuristic to look at next one or two instruction(s)
// to find ADD instruction.
(
next(EA, NextEA)
;
next(EA, NextEA0), next(NextEA0, NextEA),
instruction_get_dest_op(NextEA0,_,TmpOp),
!op_regdirect_contains_reg(TmpOp,Reg)
),

// ADD <Register> <Register> <Immediate>
// e.g. add x0, x0, <OffsetOp>
instruction_get_operation(NextEA, "ADD"),
instruction_get_op(NextEA, 2, OffsetOp),
op_immediate(OffsetOp, Offset),
instruction_get_op(NextEA, 1, RegOp2),
op_regdirect_contains_reg(RegOp2,Reg2),
// Make sure the registers match
Reg = Reg2.

/** E.g.,
adrp x0, L1
ldr x0, [x0,:lo12:L1]
*/
split_load(EA, NextEA, as(Base + Offset,address), "LDR/STR") :-
// ADRP <Register> <Immediate>
// e.g. adrp x0, BaseOp
instruction_get_operation(EA, "ADRP"),
instruction_get_op(EA, 1, BaseOp),
op_immediate(BaseOp, Base),
instruction_get_op(EA, 2, RegOp),
op_regdirect_contains_reg(RegOp,Reg),

// NOTE: Can't use def_used due to cyclic negation issue.
// This is a heuristic to look at next one or two instruction(s)
// to find LDR or STR instruction.
(
next(EA,NextEA)
;
next(EA,NextEA0), next(NextEA0,NextEA),
NextEA0 != NextEA,
instruction_get_dest_op(NextEA0,_,TmpOp),
!op_regdirect_contains_reg(TmpOp,Reg)
),

// LDR/STR <Register> <IndirectOp>
// e.g. ldr x0, [x0, <OffsetOp>]
instruction_get_operation(NextEA, Operation),
(
arch.load_operation(Operation)
;
arch.store_operation(Operation)
),
instruction_get_op(NextEA, 1, OffsetOp),
op_indirect(OffsetOp,"NONE",Reg2,"NONE",_,Offset,_),
// Make sure the registers match
Reg = Reg2.

pc_relative_jump(EA, as(Dest,address)):-
split_load(EA, _, Dest, "LDR/STR").

/**
This version of split_load uses def_used, so this should not be used for
the code inference step due to cyclic negation issue.
Instead, this is for the symbolization step.
*/
.decl split_load_for_symbolization(ea:address, nextea:address, dest:address, type:symbol)
.output split_load_for_symbolization

split_load_for_symbolization(EA, NextEA, as(Base + Offset,address), "ADD") :-
// ADRP <Register> <Immediate>
// e.g. adrp x0, BaseOp
instruction_get_operation(EA, "ADRP"),
instruction_get_op(EA, 1, BaseOp),
op_immediate(BaseOp, Base),

def_used(EA,_,NextEA,_),

// ADD <Register> <Register> <Immediate>
// e.g. add x0, x0, <OffsetOp>
instruction_get_operation(NextEA, "ADD"),
instruction_get_op(NextEA, 2, OffsetOp),
op_immediate(OffsetOp, Offset).

split_load_for_symbolization(EA, NextEA, as(Base + Offset,address), "LDR/STR") :-
// ADRP <Register> <Immediate>
// e.g. adrp x0, BaseOp
instruction_get_operation(EA, "ADRP"),
instruction_get_op(EA, 1, BaseOp),
op_immediate(BaseOp, Base),

def_used(EA,_,NextEA,_),

// LDR/STR <Register> <IndirectOp>
// e.g. ldr x0, [x0, <OffsetOp>]
instruction_get_operation(NextEA, Operation),
(
arch.load_operation(Operation)
;
arch.store_operation(Operation)
),
instruction_get_op(NextEA, 1, OffsetOp),
op_indirect(OffsetOp,"NONE",_,"NONE",_,Offset,_).

split_load_operand(EA, 1, as(Dest,address)) :-
split_load_for_symbolization(EA, _, Dest, _).

split_load_operand(EA, 1, as(Dest,address)) :-
split_load_for_symbolization(_, EA, Dest, "LDR/STR").

split_load_operand(EA, 2, as(Dest,address)) :-
split_load_for_symbolization(_, EA, Dest, "ADD").


/**
ARM64: ex_false_pointer_array: optimized (e.g., -O2):

6e4: adrp x0, .L_11018
6e8: mov fp,sp
6ec: stp x19,x20,[sp,#16]
6f0: add x20,x0, :lo12:.L_11018
6f4: mov x19,#0
6f8: ldr x0,[x0,:lo12:.L_11018]

In optimized binary as above, there may be cases where one adrp instruction
is shared among multiple split_loads, such as the add and ldr:
(0x6e4,0x6f0), (0x6e4,06f8), respectively.
The relocation type of the object at .L_11018 is RELATIVE (global data
object).
In such case, using :got: for adrp and :got_lo12: for ldr causes runtime
failure. To avoid the problem, the relocation directives are not used for adrp and ldr.
*/
symbolic_operand_attribute(ea, 1, "GotRef") :-
split_load_for_symbolization(ea, _, Dest, "LDR/STR"),
!split_load_for_symbolization(ea, _, Dest, "ADD"),
relocation(Dest,_,_,_,_,_,_).

symbolic_operand_attribute(ea, 1, "GotRef"),
symbolic_operand_attribute(ea, 1, "Lo12") :-
split_load_for_symbolization(ea0, ea, Dest, "LDR/STR"),
!split_load_for_symbolization(ea0, _, Dest, "ADD"),
relocation(Dest,_,_,_,_,_,_).

symbolic_operand_attribute(ea, 1, "Lo12") :-
split_load_for_symbolization(ea0, ea, Dest, "LDR/STR"),
(
!relocation(Dest,_,_,_,_,_,_), ea0 = ea0
;
split_load_for_symbolization(ea0, _, Dest, "ADD")
).

symbolic_operand_attribute(ea, 2, "Lo12") :-
split_load_for_symbolization(_, ea, Dest, "ADD"),
!relocation(as(Dest,address),"GLOB_DAT",_,_,_,_,_).


value_reg_edge(EA,Reg,EA,"NONE",0,Val):-
split_load_for_symbolization(_,EA,RefAddr,_),
instruction(EA,_,_,Operation,Op1,_,_,_,_,_),
arch.load_operation(Operation),
instruction_get_dest_op(EA,_,DestRegOp),
op_regdirect_contains_reg(DestRegOp,Reg),
op_indirect(Op1,_,_,_,_,_,DataSize),
data_word(RefAddr,DataSize/8,Val).

value_reg_edge(EA,Reg,EA,"NONE",0,Val):-
def_used_for_address(EA,Reg),
instruction(EA,_,_,"ADRP",Op1,Op2,_,_,_,_),
op_regdirect_contains_reg(Op2,Reg),
op_immediate(Op1,Offset),
Val = Offset.


symbolic_operand_candidate(EA,1,Dest,Type):-
// ADR <Register> <Immediate>
// e.g. adr r1, BaseOp
instruction_get_operation(EA, "ADR"),
instruction_get_op(EA, 1, OffsetOp),
op_immediate(OffsetOp, Offset),
Dest = as(Offset,address),
(
code(Dest), Type="code"
;
data_segment(Begin,End),
Dest >= Begin, Dest <= End,
Type = "data"
).

/**
ARM64 jump-table reference idiom:

EA: ldr/ldrb w0,[x2,w0,uxtw N] --> x2=TableStart
adr x1,L
add x0,x1,w0, sxtb #M
br x0
TableStart:
(L1 - L) / 4
(L2 - L) / 4
...
*/
block_needs_splitting_at(Symbol2),
symbol_minus_symbol_jump_table(TableStart,TableStart,Size,Symbol1,Symbol2,"first",Scale):-
instruction(EA,_,_,Operation,Op1,_,_,_,_,_),
arch.load_operation(Operation),
(
Operation = "LDR", Size = 4;
Operation = "LDRB", Size = 1
),
instruction_get_operation(EA+4, "ADR"),
instruction_get_op(EA+4, 1, ImmOp),
op_immediate(ImmOp, Base),
instruction_get_operation(EA+8, "ADD"),
instruction_get_dest_op(EA+8,_,TargetOp),
arch.jump(EA+12),
instruction_get_operation(EA+12,JumpOperation),
arch.jump_operation_op_index(JumpOperation,TargetOpIndex),
instruction_get_op(EA+12,TargetOpIndex,TargetOp),
Scale = 4, // TODO: Currently, we don't take extenders from the decoder.
// Use the extender for the add: check if M is 2.
op_indirect(Op1,"NONE",Reg,_,_,_,_),
def_used(EA_def,Reg,EA,_),
best_value_reg(EA_def,Reg,_,_,Val,"complete"),
TableStart = as(Val,address),
(
Size != 1,
data_word(TableStart,Size,DiffUnsigned)
;
Size = 1,
data_byte(TableStart,DiffUnsigned)
),
Diff = as(DiffUnsigned,number),
Symbol1 = as(Base,address),
Symbol2 = as(Diff*as(Scale,number)+as(Symbol1,number),address).

symbolic_operand_point(EA,Imm_index,-1,"add: unlikely to have symbolic operand"):-
symbolic_operand_candidate(EA,Imm_index,_,_),
instruction_get_operation(EA,Operation),
contains("ADD",Operation),
!split_load_for_symbolization(_,EA,_,"ADD").

unlikely_have_symbolic_immediate(EA):-
instruction_get_operation(EA, Operation),
contains("ADD", Operation),
!split_load(_,EA,_,"ADD").

instruction_get_dest_op(EA,Index,Op):-
instruction(EA,_,_,"LDP",Op1,Op2,_,_,_,_),
(
Index = 1, Op = Op1, Op2 = Op2;
Index = 2, Op = Op2, Op1 = Op1
).

instruction_get_src_op(EA,Index,Op):-
instruction(EA,_,_,"STP",Op1,Op2,_,_,_,_),
(
Index = 1, Op = Op1, Op2 = Op2;
Index = 2, Op = Op2, Op1 = Op1
).
Loading

0 comments on commit 93f45d0

Please sign in to comment.