diff options
author | comex | 2015-02-01 01:56:29 -0500 |
---|---|---|
committer | comex | 2015-02-01 01:56:42 -0500 |
commit | a23ef990492cd0384de1a924c44805587d5b5aed (patch) | |
tree | aa3a28446fc1a7ca1d799c8f3ad3acc6afdea0f2 /lib | |
parent | trivial wording tweak (diff) | |
download | substitute-a23ef990492cd0384de1a924c44805587d5b5aed.tar.gz |
fix my utter failure to handle branches/conditionals correctly (on ARM)
Diffstat (limited to 'lib')
-rw-r--r-- | lib/arm/assemble.h | 38 | ||||
-rw-r--r-- | lib/arm/dis-arm.inc.h | 17 | ||||
-rw-r--r-- | lib/arm/dis-thumb.inc.h | 44 | ||||
-rw-r--r-- | lib/arm/dis-thumb2.inc.h | 24 | ||||
-rw-r--r-- | lib/arm/jump-patch.h | 9 | ||||
-rw-r--r-- | lib/arm/misc.h | 51 | ||||
-rw-r--r-- | lib/arm/transform-dis-arm-multi.inc.h | 85 | ||||
-rw-r--r-- | lib/arm64/jump-patch.h | 1 | ||||
-rw-r--r-- | lib/arm64/misc.h | 2 | ||||
-rw-r--r-- | lib/arm64/transform-dis-arm64.inc.h | 15 | ||||
-rw-r--r-- | lib/dis.h | 6 | ||||
-rw-r--r-- | lib/hook-functions.c | 6 | ||||
-rw-r--r-- | lib/jump-dis.c | 3 | ||||
-rw-r--r-- | lib/transform-dis.c | 55 | ||||
-rw-r--r-- | lib/transform-dis.h | 2 |
15 files changed, 276 insertions, 82 deletions
diff --git a/lib/arm/assemble.h b/lib/arm/assemble.h index 6f1e8e7..c0af020 100644 --- a/lib/arm/assemble.h +++ b/lib/arm/assemble.h @@ -4,27 +4,28 @@ struct assemble_ctx { void **codep; bool thumb; + int cond; }; static inline void PUSHone(struct assemble_ctx ctx, int Rt) { if (ctx.thumb) op32(ctx.codep, 0x0d04f84d | Rt << 28); else - op32(ctx.codep, 0xe52d0004 | Rt << 12); + op32(ctx.codep, 0x052d0004 | Rt << 12 | ctx.cond << 28); } static inline void POPone(struct assemble_ctx ctx, int Rt) { if (ctx.thumb) op32(ctx.codep, 0x0b04f85d | Rt << 28); else - op32(ctx.codep, 0xe49d0004 | Rt << 12); + op32(ctx.codep, 0x049d0004 | Rt << 12 | ctx.cond << 28); } static inline void POPmulti(struct assemble_ctx ctx, uint16_t mask) { if (ctx.thumb) op32(ctx.codep, 0x0000e8bd | mask << 16); else - op32(ctx.codep, 0xe8bd0000 | mask); + op32(ctx.codep, 0x08bd0000 | mask | ctx.cond << 28); } static inline void MOVW_MOVT(struct assemble_ctx ctx, int Rd, uint32_t val) { @@ -36,8 +37,10 @@ static inline void MOVW_MOVT(struct assemble_ctx ctx, int Rd, uint32_t val) { (hi >> 8 & 7) << 28 | (hi & 0xff) << 16); } else { - op32(ctx.codep, 0xe3000000 | Rd << 12 | (lo >> 12) << 16 | (lo & 0xfff)); - op32(ctx.codep, 0xe3400000 | Rd << 12 | (hi >> 12) << 16 | (hi & 0xfff)); + op32(ctx.codep, 0x03000000 | Rd << 12 | (lo >> 12) << 16 | (lo & 0xfff) | + ctx.cond << 28); + op32(ctx.codep, 0x03400000 | Rd << 12 | (hi >> 12) << 16 | (hi & 0xfff) | + ctx.cond << 28); } } @@ -46,7 +49,7 @@ static inline void STRri(struct assemble_ctx ctx, int Rt, int Rn, uint32_t off) if (ctx.thumb) op32(ctx.codep, 0x0000f8c0 | Rn | Rt << 28 | off << 16); else - op32(ctx.codep, 0xe4800000 | Rn << 16 | Rt << 12 | off); + op32(ctx.codep, 0x04800000 | Rn << 16 | Rt << 12 | off | ctx.cond << 28); } static inline void LDRxi(struct assemble_ctx ctx, int Rt, int Rn, uint32_t off, @@ -76,11 +79,30 @@ static inline void LDRxi(struct assemble_ctx ctx, int Rt, int Rn, uint32_t off, op32(ctx.codep, 0xe5900000 | Rn << 16 | Rt << 12 | off); break; type2: - op32(ctx.codep, 0xe1c00000 | Rn << 16 | Rt << 12 | subop << 4 | - (off & 0xf) | (off & 0xf0) << 4 | not_ldrd << 20); + op32(ctx.codep, 0x01c00000 | Rn << 16 | Rt << 12 | subop << 4 | + (off & 0xf) | (off & 0xf0) << 4 | not_ldrd << 20 | + ctx.cond << 28); break; default: __builtin_abort(); } } } + +static inline void Bccrel(struct assemble_ctx ctx, int offset) { + if (ctx.thumb) { + offset = (offset - 4) / 2; + op16(ctx.codep, 0xd000 | ctx.cond << 8 | offset); + } else { + offset = (offset - 8) / 4; + op32(ctx.codep, 0x0a000000 | offset | ctx.cond << 28); + } +} + +static inline void LDR_PC(struct assemble_ctx ctx, uint32_t dpc) { + if (ctx.thumb) + op32(ctx.codep, 0xf000f8df); + else + op32(ctx.codep, 0x051ff004 | ctx.cond << 28); + op32(ctx.codep, (uint32_t) dpc); +} diff --git a/lib/arm/dis-arm.inc.h b/lib/arm/dis-arm.inc.h index 2f06234..8f4d776 100644 --- a/lib/arm/dis-arm.inc.h +++ b/lib/arm/dis-arm.inc.h @@ -65,7 +65,7 @@ static INLINE void P(GPR_Rt_addr_offset_none_addr_am2offset_reg_offset_S_4_STRBT data(r(addr), rs(offset, 0, 4), r(Rt)); } static INLINE void P(GPR_Rt_addr_offset_none_addr_am3offset_offset_S_2_STRD_POST)(tdis_ctx ctx, struct bitslice offset, struct bitslice Rt, struct bitslice addr) { - data_flags(IS_LDRD_STRD, r(Rt), r(addr), rs(offset, 0, 4)); + data_flags(DFLAG_IS_LDRD_STRD, r(Rt), r(addr), rs(offset, 0, 4)); } static INLINE void P(GPR_Rt_addr_offset_none_addr_postidx_imm8_offset_S_1_STRHTi)(tdis_ctx ctx, UNUSED struct bitslice offset, struct bitslice Rt, struct bitslice addr) { data(r(addr), r(Rt)); @@ -73,9 +73,9 @@ static INLINE void P(GPR_Rt_addr_offset_none_addr_postidx_imm8_offset_S_1_STRHTi static INLINE void P(GPR_Rt_addrmode3_addr_S_2_STRD)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) { unsigned addr_val = bs_get(addr, ctx->op); if (addr_val & 1 << 13) - data_flags(IS_LDRD_STRD, r(Rt), rs(addr, 9, 4)); + data_flags(DFLAG_IS_LDRD_STRD, r(Rt), rs(addr, 9, 4)); else - data_flags(IS_LDRD_STRD, r(Rt), rs(addr, 9, 4), rs(addr, 0, 4)); + data_flags(DFLAG_IS_LDRD_STRD, r(Rt), rs(addr, 9, 4), rs(addr, 0, 4)); } static INLINE void P(GPR_Rt_addrmode3_pre_addr_S_2_STRD_PRE)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) { return P(GPR_Rt_addrmode3_addr_S_2_STRD)(ctx, addr, Rt); @@ -133,9 +133,9 @@ static INLINE void P(addrmode3_addr_unk_Rt_4_LDRD)(tdis_ctx ctx, struct bitslice /* ignoring Rt2 = Rt + 1, but LDRD itself isn't supposed to load PC anyway */ unsigned addr_val = bs_get(addr, ctx->op); if (addr_val & 1 << 13) - data_flags(IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4)); + data_flags(DFLAG_IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4)); else - data_flags(IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4), rs(addr, 0, 4)); + data_flags(DFLAG_IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4), rs(addr, 0, 4)); } static INLINE void P(addrmode3_pre_addr_unk_Rt_4_LDRD_PRE)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) { return P(addrmode3_addr_unk_Rt_4_LDRD)(ctx, addr, Rt); @@ -161,9 +161,10 @@ static INLINE void P(addrmode_imm12_pre_addr_unk_Rt_2_LDRB_PRE_IMM)(tdis_ctx ctx static INLINE void P(adrlabel_label_unk_Rd_1_ADR)(tdis_ctx ctx, struct bitslice label, struct bitslice Rd) { return P(pcrel)(ctx, ctx->pc + 8 + bs_get(label, ctx->op), bs_get(Rd, ctx->op), PLM_ADR); } -static INLINE void P(br_target_target_B_1_Bcc)(tdis_ctx ctx, struct bitslice target) { - bool cond = (ctx->op >> 28) != 0xe; - return P(branch)(ctx, ctx->pc + 8 + sext(bs_get(target, ctx->op), 24), /*cond*/ cond); +static INLINE void P(br_target_target_pred_p_B_1_Bcc)(tdis_ctx ctx, struct bitslice target, struct bitslice p) { + unsigned p_val = bs_get(p, ctx->op); + return P(branch)(ctx, ctx->pc + 8 + sext(bs_get(target, ctx->op), 24), + p_val == 0xe ? 0 : (CC_ARMCC | p_val)); } static INLINE void P(ldst_so_reg_addr_unk_Rt_2_LDRB_PRE_REG)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) { data(rout(Rt), rs(addr, 0, 4), rs(addr, 13, 4)); diff --git a/lib/arm/dis-thumb.inc.h b/lib/arm/dis-thumb.inc.h index 4e6d106..8be137e 100644 --- a/lib/arm/dis-thumb.inc.h +++ b/lib/arm/dis-thumb.inc.h @@ -49,31 +49,45 @@ static INLINE void P(t_addrmode_pc_addr_unk_Rt_1_tLDRpci)(tdis_ctx ctx, struct b static INLINE void P(t_adrlabel_addr_unk_Rd_1_tADR)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rd) { return P(pcrel)(ctx, ((ctx->pc + 4) & ~2) + bs_get(addr, ctx->op), bs_get(Rd, ctx->op), PLM_ADR); } -static INLINE void P(t_bcctarget_target_B_1_tBcc)(tdis_ctx ctx, struct bitslice target) { - return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 8), /*cond*/ true); +static INLINE void P(t_bcctarget_target_pred_p_B_1_tBcc)(tdis_ctx ctx, struct bitslice target, struct bitslice p) { + return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 8), + CC_ARMCC | bs_get(p, ctx->op)); } static INLINE void P(t_brtarget_target_B_1_tB)(tdis_ctx ctx, struct bitslice target) { - bool cond = ctx->arch.thumb_it_length > 0; - return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 11), cond); + int cc = ctx->arch.it_conds[0] != 0xe ? CC_ALREADY_IN_IT : 0; + return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 11), cc); } static INLINE void P(t_cbtarget_target_B_2_tCBNZ)(tdis_ctx ctx, struct bitslice target) { - return P(branch)(ctx, ctx->pc + 4 + 2 * bs_get(target, ctx->op), /*cond*/ true); -} -static INLINE void P(it_pred_cc_it_mask_mask_1_t2IT)(tdis_ctx ctx, struct bitslice mask, UNUSED struct bitslice cc) { + P(branch)(ctx, ctx->pc + 4 + 2 * bs_get(target, ctx->op), CC_CBXZ); + if (TDIS_CTX_MODIFY(ctx)) { + /* change target, and flip z/nz if necessary (i.e. always) */ + unsigned new = bs_set(target, TDIS_CTX_NEWVAL(ctx, 0), ctx->op); + if (TDIS_CTX_NEWVAL(ctx, 1)) + new ^= 1 << 11; + TDIS_CTX_SET_NEWOP(ctx, new); + } +} +static INLINE void P(it_pred_cc_it_mask_mask_1_t2IT)(tdis_ctx ctx, struct bitslice mask, struct bitslice cc) { /* why */ unsigned mask_val = bs_get(mask, ctx->op); - unsigned length = __builtin_ctz(mask_val); - if (length >= 3) + unsigned cc_val = bs_get(cc, ctx->op); + if (mask_val == 0) return P(unidentified)(ctx); /* nop */ - ctx->arch.thumb_it_length = length; - return P(unidentified)(ctx); + int length = 4 - __builtin_ctz(mask_val); + ctx->arch.it_conds[1] = cc_val; + for (int i = 0; i < length; i++) + ctx->arch.it_conds[i+2] = (cc_val & ~1) | (mask_val >> (3 - i) & 1); + return P(thumb_it)(ctx); } -static INLINE void P(dis_thumb)(tdis_ctx ctx) { +static INLINE void P(thumb_do_it)(tdis_ctx ctx) { uint16_t op = ctx->op = *(uint16_t *) ctx->ptr; - ctx->op_size = 2; - if (ctx->arch.thumb_it_length) - ctx->arch.thumb_it_length--; #include "../generated/generic-dis-thumb.inc.h" __builtin_abort(); } + +static INLINE void P(dis_thumb)(tdis_ctx ctx) { + ctx->op_size = 2; + P(thumb_do_it)(ctx); + advance_it_cond(&ctx->arch); +} diff --git a/lib/arm/dis-thumb2.inc.h b/lib/arm/dis-thumb2.inc.h index a9d7f9d..6f651c5 100644 --- a/lib/arm/dis-thumb2.inc.h +++ b/lib/arm/dis-thumb2.inc.h @@ -87,8 +87,9 @@ static INLINE void P(addrmode5_pre_addr_4_t2LDC2L_PRE)(tdis_ctx ctx, struct bits static INLINE void P(addrmode5_pre_addr_S_4_t2STC2L_PRE)(tdis_ctx ctx, struct bitslice addr) { data(rs(addr, 9, 4)); } -static INLINE void P(brtarget_target_B_1_t2Bcc)(tdis_ctx ctx, struct bitslice target) { - return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 20), /*cond*/ true); +static INLINE void P(brtarget_target_pred_p_B_1_t2Bcc)(tdis_ctx ctx, struct bitslice target, struct bitslice p) { + return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 20), + CC_ARMCC | bs_get(p, ctx->op)); } static INLINE void P(rGPR_Rt_t2addrmode_imm0_1020s4_addr_unk_Rd_S_1_t2STREX)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt, struct bitslice Rd) { data(rout(Rd), r(Rt), rs(addr, 8, 4)); @@ -100,10 +101,10 @@ static INLINE void P(rGPR_Rt_t2addrmode_imm8_pre_addr_S_2_t2STRB_PRE)(tdis_ctx c data(r(Rt), rs(addr, 9, 4)); } static INLINE void P(rGPR_Rt_t2addrmode_imm8s4_addr_S_1_t2STRDi8)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) { - data_flags(IS_LDRD_STRD, r(Rt), rs(addr, 9, 4)); + data_flags(DFLAG_IS_LDRD_STRD, r(Rt), rs(addr, 9, 4)); } static INLINE void P(rGPR_Rt_t2addrmode_imm8s4_pre_addr_S_1_t2STRD_PRE)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) { - data_flags(IS_LDRD_STRD, r(Rt), rs(addr, 9, 4)); + data_flags(DFLAG_IS_LDRD_STRD, r(Rt), rs(addr, 9, 4)); } static INLINE void P(rGPR_Rt_t2addrmode_negimm8_addr_S_2_t2STRBi8)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) { data(r(Rt), rs(addr, 9, 4)); @@ -130,10 +131,10 @@ static INLINE void P(addr_offset_none_Rn_t2am_imm8_offset_offset_unk_Rt_5_t2LDRB data(rout(Rt), r(Rn)); } static INLINE void P(t2addrmode_imm8s4_addr_unk_Rt_1_t2LDRDi8)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) { - data_flags(IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4)); + data_flags(DFLAG_IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4)); } static INLINE void P(t2addrmode_imm8s4_pre_addr_unk_Rt_1_t2LDRD_PRE)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) { - data_flags(IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4)); + data_flags(DFLAG_IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4)); } static INLINE void P(t2addrmode_negimm8_addr_unk_Rt_5_t2LDRBi8)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) { data(rout(Rt), rs(addr, 9, 4)); @@ -151,8 +152,8 @@ static INLINE void P(t2ldrlabel_addr_unk_Rt_5_t2LDRBpci)(tdis_ctx ctx, struct bi return P(pcrel)(ctx, ((ctx->pc + 4) & ~2) + (bs_get(addr, ctx->op) & ((1 << 12) - 1)), bs_get(Rt, ctx->op), get_thumb2_load_mode(ctx->op)); } static INLINE void P(uncondbrtarget_target_B_1_t2B)(tdis_ctx ctx, struct bitslice target) { - bool cond = ctx->arch.thumb_it_length > 0; - return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 24), cond); + int cc = ctx->arch.it_conds[0] != 0xe ? CC_ALREADY_IN_IT : 0; + return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 24), cc); } static INLINE void P(unk_Rd_3_t2MOVTi16)(tdis_ctx ctx, struct bitslice Rd) { data(rout(Rd)); @@ -165,7 +166,7 @@ static INLINE void P(unk_Rt_13_VMOVRRD)(tdis_ctx ctx, UNUSED struct bitslice Rt) return P(unidentified)(ctx); } -static INLINE void do_it(tdis_ctx ctx) { +static INLINE void P(thumb2_do_it)(tdis_ctx ctx) { uint32_t op = ctx->op; #include "../generated/generic-dis-thumb2.inc.h" __builtin_abort(); @@ -174,12 +175,11 @@ static INLINE void do_it(tdis_ctx ctx) { static INLINE void P(dis_thumb2)(tdis_ctx ctx) { ctx->op = *(uint32_t *) ctx->ptr; ctx->op_size = 4; - if (ctx->arch.thumb_it_length) - ctx->arch.thumb_it_length--; /* LLVM likes to think about Thumb2 instructions the way the ARM manual * does - 15..0 15..0 rather than 31..0 as actually laid out in memory... */ ctx->op = flip16(ctx->op); - do_it(ctx); + P(thumb2_do_it)(ctx); + advance_it_cond(&ctx->arch); TDIS_CTX_SET_NEWOP(ctx, flip16(TDIS_CTX_NEWOP(ctx))); ctx->op = flip16(ctx->op); } diff --git a/lib/arm/jump-patch.h b/lib/arm/jump-patch.h index 238d56e..b65a97d 100644 --- a/lib/arm/jump-patch.h +++ b/lib/arm/jump-patch.h @@ -1,7 +1,7 @@ #pragma once #include "dis.h" +#include "arm/assemble.h" #define MAX_JUMP_PATCH_SIZE 8 -#define MAX_REWRITTEN_SIZE (12 * 4) /* actually should be less */ static inline int jump_patch_size(UNUSED uintptr_t pc, UNUSED uintptr_t dpc, @@ -13,9 +13,6 @@ static inline int jump_patch_size(UNUSED uintptr_t pc, static inline void make_jump_patch(void **codep, UNUSED uintptr_t pc, uintptr_t dpc, struct arch_dis_ctx arch) { - if (arch.pc_low_bit) - op32(codep, 0xf000f8df); - else - op32(codep, 0xe51ff004); - op32(codep, (uint32_t) dpc); + struct assemble_ctx actx = {codep, arch.pc_low_bit, 0xe}; + LDR_PC(actx, dpc); } diff --git a/lib/arm/misc.h b/lib/arm/misc.h index f8d593e..02b06fe 100644 --- a/lib/arm/misc.h +++ b/lib/arm/misc.h @@ -4,9 +4,56 @@ #define TARGET_JUMP_PATCH_HDR "arm/jump-patch.h" #define TARGET_TRANSFORM_DIS_HEADER "arm/transform-dis-arm-multi.inc.h" #define MIN_INSN_SIZE 2 +/* each input instruction might turn into: + * - 2 bytes for Bcc, if in IT + * then ONE of: + * - 2/4 bytes for just the instruction + * - 2+8 bytes for branch (which in *valid* code rules out IT but whatever) + * - up to 7 4-byte insns for pcrel (if dest=pc, and while these can be subject + * to IT, there can only reasonably be two per block, and if there are both + * then that's an unconditional exit - but we don't enforce any of this + * currently) + * - up to 7 4-byte insns for similar moves to PC that fall under 'data' + * the maximum number of possible inputs is 4, plus 4 extras if the last one + * was an IT (but in that case it can't be one of the above cases) + * while this looks huge, it's overly conservative and doesn't matter much, + * since only the actually used space will be taken up in the final output + */ +#define TD_MAX_REWRITTEN_SIZE (7*4*7 + 4) /* 196 */ + struct arch_dis_ctx { - unsigned thumb_it_length; + /* thumb? */ bool pc_low_bit; + /* if thumb, IT cond for the next 5 instructions + * (5 because we still advance after IT) */ + uint8_t it_conds[5]; + /* for transform_dis - did we add space for a Bccrel? */ + uint8_t bccrel_bits; + void *bccrel_p; }; -enum { IS_LDRD_STRD = 1 << 16 }; +static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { + ctx->pc_low_bit = false; + ctx->bccrel_p = NULL; + memset(ctx->it_conds, 0xe, 5); +} + +static inline void advance_it_cond(struct arch_dis_ctx *ctx) { + ctx->it_conds[0] = ctx->it_conds[1]; + ctx->it_conds[1] = ctx->it_conds[2]; + ctx->it_conds[2] = ctx->it_conds[3]; + ctx->it_conds[3] = ctx->it_conds[4]; + ctx->it_conds[4] = 0xe; +} + +#define DFLAG_IS_LDRD_STRD (1 << 16) + +/* Types of conditionals for 'branch' */ +/* a regular old branch-with-condition */ +#define CC_ARMCC (CC_CONDITIONAL | 0x200) +/* already in an IT block - in transform_dis this will be rewritten to a branch + * anyway, so it can be treated as unconditional; in jump_dis we have to know + * to keep going */ +#define CC_ALREADY_IN_IT (CC_CONDITIONAL | 0x400) +/* CBZ/CBNZ is rewritten */ +#define CC_CBXZ (CC_CONDITIONAL | 0x800) diff --git a/lib/arm/transform-dis-arm-multi.inc.h b/lib/arm/transform-dis-arm-multi.inc.h index 2e6a62d..18fa4c1 100644 --- a/lib/arm/transform-dis-arm-multi.inc.h +++ b/lib/arm/transform-dis-arm-multi.inc.h @@ -1,5 +1,28 @@ #include "arm/assemble.h" +static struct assemble_ctx tdctx_to_actx(const struct transform_dis_ctx *ctx) { + int cond; + if (ctx->arch.pc_low_bit) { + cond = ctx->op >> 28; + if (cond == 0xf) + cond = 0xe; + } else { + cond = 0; + } + return (struct assemble_ctx) { + ctx->rewritten_ptr_ptr, + ctx->arch.pc_low_bit, + cond + }; + +} + +static int invert_arm_cond(int cc) { + if (cc >= 0xe) + __builtin_abort(); + return cc ^ 1; +} + static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx, unsigned o0, unsigned o1, unsigned o2, unsigned o3, unsigned out_mask) { #ifdef TRANSFORM_DIS_VERBOSE @@ -16,7 +39,7 @@ static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx, newval[3] = o3; void **codep = ctx->rewritten_ptr_ptr; - struct assemble_ctx actx = {ctx->rewritten_ptr_ptr, ctx->arch.pc_low_bit}; + struct assemble_ctx actx = tdctx_to_actx(ctx); /* A few cases: * 1. Move to PC that does not read PC. Probably fine. @@ -41,7 +64,7 @@ static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx, else if (newval[i] != null_op) in_regs |= 1 << newval[i]; } - if (out_mask & IS_LDRD_STRD) + if (out_mask & DFLAG_IS_LDRD_STRD) in_regs |= 1 << (newval[0] + 1); uint32_t pc = ctx->pc + (ctx->arch.pc_low_bit ? 4 : 8); int scratch = __builtin_ctz(~(in_regs | (1 << out_reg))); @@ -64,7 +87,8 @@ static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx, ctx->write_newop_here = *codep; *codep += ctx->op_size; STRri(actx, scratch, 13, 4); POPmulti(actx, 1 << scratch | 1 << 15); - transform_dis_ret(ctx); + if (actx.cond != 0xe) + transform_dis_ret(ctx); } else { if (out_reg != -1 && !(in_regs & 1 << out_reg)) { /* case 3 - ignore scratch */ @@ -98,7 +122,7 @@ static NOINLINE UNUSED void transform_dis_pcrel(struct transform_dis_ctx *ctx, (void *) dpc, reg, load_mode); #endif ctx->write_newop_here = NULL; - struct assemble_ctx actx = {ctx->rewritten_ptr_ptr, ctx->arch.pc_low_bit}; + struct assemble_ctx actx = tdctx_to_actx(ctx); if (reg == 15) { int scratch = 0; PUSHone(actx, scratch); @@ -115,3 +139,56 @@ static NOINLINE UNUSED void transform_dis_pcrel(struct transform_dis_ctx *ctx, LDRxi(actx, reg, reg, 0, load_mode); } } + +static NOINLINE UNUSED void transform_dis_branch(struct transform_dis_ctx *ctx, + uintptr_t dpc, int cc) { +#ifdef TRANSFORM_DIS_VERBOSE + printf("transform_dis (%p): branch => %p\n", (void *) ctx->pc, (void *) dpc); +#endif + if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) { + /* don't support this for now */ + /* making the simplifying assumption here that functions will not try + * to branch into the middle of an IT block, which is the case where + * pc_patch_end changes to include additional instructions (as opposed + * to include the end of a partially included instruction, which is + * common) */ + ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START; + return; + } + struct assemble_ctx actx = tdctx_to_actx(ctx); + ctx->write_newop_here = NULL; + if ((cc & CC_ARMCC) == CC_ARMCC) { + actx.cond = invert_arm_cond(cc & 0xf); + Bccrel(actx, 8); + } else if ((cc & CC_CBXZ) == CC_CBXZ) { + ctx->modify = true; + ctx->newval[0] = 2+8; + ctx->newval[1] = 1; /* do invert */ + void **codep = ctx->rewritten_ptr_ptr; + ctx->write_newop_here = *codep; *codep += 2; + } + actx.cond = 0xe; + LDR_PC(actx, dpc | 1); +} + +static void transform_dis_pre_dis(struct transform_dis_ctx *ctx) { + /* for simplicity we turn IT into a series of branches for each + * instruction, so... */ + if (ctx->arch.it_conds[0] != 0xe) { + ctx->arch.bccrel_bits = invert_arm_cond(ctx->arch.it_conds[0]); + ctx->arch.bccrel_p = *ctx->rewritten_ptr_ptr; + *ctx->rewritten_ptr_ptr += 2; + } else { + ctx->arch.bccrel_p = NULL; + } +} + +static void transform_dis_post_dis(struct transform_dis_ctx *ctx) { + if (ctx->arch.bccrel_p) { + struct assemble_ctx actx = {&ctx->arch.bccrel_p, + /*thumb*/ true, + ctx->arch.bccrel_bits}; + Bccrel(actx, *ctx->rewritten_ptr_ptr - ctx->arch.bccrel_p); + } + ctx->force_keep_transforming = ctx->arch.it_conds[0] != 0xe; +} diff --git a/lib/arm64/jump-patch.h b/lib/arm64/jump-patch.h index aa818d3..cc94f90 100644 --- a/lib/arm64/jump-patch.h +++ b/lib/arm64/jump-patch.h @@ -1,7 +1,6 @@ #pragma once #include "arm64/assemble.h" #define MAX_JUMP_PATCH_SIZE 12 -#define MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */ static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc, UNUSED struct arch_dis_ctx arch, bool force) { diff --git a/lib/arm64/misc.h b/lib/arm64/misc.h index c7fa5c9..84bd638 100644 --- a/lib/arm64/misc.h +++ b/lib/arm64/misc.h @@ -4,4 +4,6 @@ #define TARGET_JUMP_PATCH_HDR "arm64/jump-patch.h" #define TARGET_TRANSFORM_DIS_HEADER "arm64/transform-dis-arm64.inc.h" #define MIN_INSN_SIZE 4 +#define TD_MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */ struct arch_dis_ctx {}; +static inline void arch_dis_ctx_init(UNUSED struct arch_dis_ctx *ctx) {} diff --git a/lib/arm64/transform-dis-arm64.inc.h b/lib/arm64/transform-dis-arm64.inc.h index 682613a..af2d4c7 100644 --- a/lib/arm64/transform-dis-arm64.inc.h +++ b/lib/arm64/transform-dis-arm64.inc.h @@ -16,3 +16,18 @@ static NOINLINE UNUSED void transform_dis_pcrel(struct transform_dis_ctx *ctx, } } +static NOINLINE UNUSED void transform_dis_branch(struct transform_dis_ctx *ctx, + uintptr_t dpc, int cc) { +#ifdef TRANSFORM_DIS_VERBOSE + printf("transform_dis (%p): branch => %p\n", (void *) ctx->pc, (void *) dpc); +#endif + if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) { + ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START; + return; + } + /* TODO */ + (void) cc; +} + +static void transform_dis_pre_dis(UNUSED struct transform_dis_ctx *ctx) {} +static void transform_dis_post_dis(UNUSED struct transform_dis_ctx *ctx) {} @@ -116,3 +116,9 @@ static inline void op32(void **codep, uint32_t op) { *codep += 4; } +static inline void op16(void **codep, uint16_t op) { + *(uint16_t *) *codep = op; + *codep += 2; +} + +#define CC_CONDITIONAL 0x100 diff --git a/lib/hook-functions.c b/lib/hook-functions.c index 9d34400..986990b 100644 --- a/lib/hook-functions.c +++ b/lib/hook-functions.c @@ -151,7 +151,7 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, struct hook_internal *hi = &his[i]; void *code = hook->function; struct arch_dis_ctx arch; - memset(&arch, 0, sizeof(arch)); + arch_dis_ctx_init(&arch); #ifdef __arm__ if ((uintptr_t) code & 1) { arch.pc_low_bit = true; @@ -183,9 +183,9 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, uintptr_t pc_patch_end = pc_patch_start + patch_size; /* Generate the rewritten start of the function for the outro * trampoline (complaining if any bad instructions are found). */ - uint8_t rewritten_temp[MAX_REWRITTEN_SIZE]; + uint8_t rewritten_temp[TD_MAX_REWRITTEN_SIZE]; void *rp = rewritten_temp; - if ((ret = transform_dis_main(code, &rp, pc_patch_start, pc_patch_end, + if ((ret = transform_dis_main(code, &rp, pc_patch_start, &pc_patch_end, arch, hi->offset_by_pcdiff))) goto end; /* Check some of the rest of the function for jumps back into the diff --git a/lib/jump-dis.c b/lib/jump-dis.c index 5e06460..7ba3608 100644 --- a/lib/jump-dis.c +++ b/lib/jump-dis.c @@ -116,6 +116,9 @@ static INLINE UNUSED void jump_dis_bad(struct jump_dis_ctx *ctx) { ctx->continue_after_this_insn = false; } +static INLINE UNUSED void jump_dis_thumb_it(UNUSED struct jump_dis_ctx *ctx) { +} + static void jump_dis_dis(struct jump_dis_ctx *ctx); bool jump_dis_main(void *code_ptr, uintptr_t pc_patch_start, uintptr_t pc_patch_end, diff --git a/lib/transform-dis.c b/lib/transform-dis.c index 1e66bd1..3210333 100644 --- a/lib/transform-dis.c +++ b/lib/transform-dis.c @@ -3,6 +3,7 @@ #include "substitute.h" #include "dis.h" +#include "transform-dis.h" #include <stdbool.h> #include <stdint.h> @@ -14,6 +15,8 @@ struct transform_dis_ctx { int err; uintptr_t pc_patch_start; + /* this is only tentative - it will be updated to include parts of + * instructions poking out, and instructions forced to be transformed by IT */ uintptr_t pc_patch_end; uintptr_t pc; int op_size; @@ -21,6 +24,9 @@ struct transform_dis_ctx { unsigned newop; unsigned newval[4]; + /* for IT - eww */ + bool force_keep_transforming; + const void *ptr; void **rewritten_ptr_ptr; void *write_newop_here; @@ -36,50 +42,46 @@ struct transform_dis_ctx { /* largely similar to jump_dis */ -static INLINE UNUSED void transform_dis_ret(struct transform_dis_ctx *ctx) { - /* ret is okay if it's at the end of the patch */ +static INLINE UNUSED +void transform_dis_ret(struct transform_dis_ctx *ctx) { + /* ret is okay if it's at the end of the required patch (past the original + * patch size is good too) */ if (ctx->pc + ctx->op_size < ctx->pc_patch_end) ctx->err = SUBSTITUTE_ERR_FUNC_TOO_SHORT; } -static INLINE UNUSED void transform_dis_branch(struct transform_dis_ctx *ctx, - uintptr_t dpc, UNUSED bool conditional) { -#ifdef TRANSFORM_DIS_VERBOSE - printf("transform_dis (%p): branch => %p\n", (void *) ctx->pc, (void *) dpc); -#endif - if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) { - /* don't support this for now */ - ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START; - } - /* branch out of bounds is fine */ - /* XXX just kidding, the instruction needs to be rewritten obviously. what - * was I thinking? */ -} - -static INLINE UNUSED void transform_dis_unidentified(UNUSED struct transform_dis_ctx *ctx) { +static INLINE UNUSED +void transform_dis_unidentified(UNUSED struct transform_dis_ctx *ctx) { #ifdef TRANSFORM_DIS_VERBOSE printf("transform_dis (%p): unidentified\n", (void *) ctx->pc); #endif /* this isn't exhaustive, so unidentified is fine */ } -static INLINE UNUSED void transform_dis_bad(struct transform_dis_ctx *ctx) { +static INLINE UNUSED +void transform_dis_bad(struct transform_dis_ctx *ctx) { ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START; } +static INLINE UNUSED +void transform_dis_thumb_it(UNUSED struct transform_dis_ctx *ctx) { + /* ignore, since it was turned into B */ +} static void transform_dis_dis(struct transform_dis_ctx *ctx); +static void transform_dis_pre_dis(struct transform_dis_ctx *ctx); +static void transform_dis_post_dis(struct transform_dis_ctx *ctx); int transform_dis_main(const void *restrict code_ptr, void **restrict rewritten_ptr_ptr, uintptr_t pc_patch_start, - uintptr_t pc_patch_end, + uintptr_t *pc_patch_end_p, struct arch_dis_ctx initial_arch_ctx, int *offset_by_pcdiff) { struct transform_dis_ctx ctx; memset(&ctx, 0, sizeof(ctx)); ctx.pc_patch_start = pc_patch_start; - ctx.pc_patch_end = pc_patch_end; + ctx.pc_patch_end = *pc_patch_end_p; ctx.pc = pc_patch_start; ctx.arch = initial_arch_ctx; /* data is written to rewritten both by this function directly and, in case @@ -88,12 +90,16 @@ int transform_dis_main(const void *restrict code_ptr, void *rewritten_start = *rewritten_ptr_ptr; int written_pcdiff = 0; offset_by_pcdiff[written_pcdiff++] = 0; - while (ctx.pc < ctx.pc_patch_end) { + while (ctx.pc < ctx.pc_patch_end && !ctx.force_keep_transforming) { ctx.modify = false; ctx.err = 0; ctx.ptr = code_ptr + (ctx.pc - pc_patch_start); + + transform_dis_pre_dis(&ctx); + void *rewritten_ptr = *rewritten_ptr_ptr; ctx.write_newop_here = rewritten_ptr; + transform_dis_dis(&ctx); if (ctx.err) @@ -111,11 +117,16 @@ int transform_dis_main(const void *restrict code_ptr, *rewritten_ptr_ptr += ctx.op_size; } ctx.pc += ctx.op_size; + + transform_dis_post_dis(&ctx); + int pcdiff = ctx.pc - ctx.pc_patch_start; while (written_pcdiff < pcdiff) offset_by_pcdiff[written_pcdiff++] = -1; - offset_by_pcdiff[written_pcdiff++] = (int) (*rewritten_ptr_ptr - rewritten_start); + offset_by_pcdiff[written_pcdiff++] = + (int) (*rewritten_ptr_ptr - rewritten_start); } + *pc_patch_end_p = ctx.pc; return SUBSTITUTE_OK; } diff --git a/lib/transform-dis.h b/lib/transform-dis.h index dec38a1..3ab120b 100644 --- a/lib/transform-dis.h +++ b/lib/transform-dis.h @@ -5,6 +5,6 @@ int transform_dis_main(const void *restrict code_ptr, void **restrict rewritten_ptr_ptr, uintptr_t pc_patch_start, - uintptr_t pc_patch_end, + uintptr_t *pc_patch_end, struct arch_dis_ctx initial_arch_ctx, int *offset_by_pcdiff); |