diff options
author | comex | 2015-02-03 02:52:51 -0500 |
---|---|---|
committer | comex | 2015-02-03 02:52:51 -0500 |
commit | c6c8f4abdabd58f9210e5e06c64a6fc36dbc709c (patch) | |
tree | bfa2b8577a9e9e5b99c967be0ae1722ce68d6df1 | |
parent | whoops, don't mean to always jump to thumb mode (diff) | |
download | substitute-c6c8f4abdabd58f9210e5e06c64a6fc36dbc709c.tar.gz |
fix ARM64 too, theoretically
Diffstat (limited to '')
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | generated/generic-dis-arm64.inc.h | 13 | ||||
-rw-r--r-- | lib/arm64/assemble.h | 12 | ||||
-rw-r--r-- | lib/arm64/dis-arm64.inc.h | 31 | ||||
-rw-r--r-- | lib/arm64/jump-patch.h | 4 | ||||
-rw-r--r-- | lib/arm64/misc.h | 31 | ||||
-rw-r--r-- | lib/arm64/transform-dis-arm64.inc.h | 29 | ||||
-rw-r--r-- | lib/hook-functions.c | 27 | ||||
-rw-r--r-- | lib/transform-dis.c | 5 | ||||
-rw-r--r-- | lib/transform-dis.h | 4 | ||||
-rw-r--r-- | test/test-transform-dis.c | 2 |
11 files changed, 117 insertions, 43 deletions
@@ -8,6 +8,7 @@ ARCH := -arch x86_64 XCFLAGS := -g -O3 -Wall -Wextra -Werror -Ilib $(ARCH) LIB_LDFLAGS := -lobjc -dynamiclib -fvisibility=hidden -install_name /usr/lib/libsubstitute.0.dylib -dead_strip IOS_APP_LDFLAGS := -framework UIKit -framework Foundation -dead_strip +IS_IOS := $(findstring -arch arm,$(CC) $(CFLAGS) $(XCFLAGS)) ifneq (,$(IS_IOS)) # I don't know anything in particular that would break this on older versions, # but I don't have any good way to test it and don't really care. So ensure it @@ -16,7 +17,6 @@ XCFLAGS := $(XCFLAGS) -miphoneos-version-min=7.0 endif override CC := $(CC) $(XCFLAGS) $(CFLAGS) override CXX := $(CXX) $(XCFLAGS) $(CFLAGS) -fno-exceptions -fno-asynchronous-unwind-tables -IS_IOS := $(findstring -arch arm,$(CC)) # These are only required to rebuild the generated disassemblers. IMAON2 := /Users/comex/c/imaon2 diff --git a/generated/generic-dis-arm64.inc.h b/generated/generic-dis-arm64.inc.h index dd7f8a1..26ca52c 100644 --- a/generated/generic-dis-arm64.inc.h +++ b/generated/generic-dis-arm64.inc.h @@ -12,7 +12,8 @@ /* adrplabel_label_unk_Xd_1_ADRP: ADRP */ /* am_b_target_addr_B_1_B: B */ /* am_bl_target_addr_1_BL: BL */ -/* am_brcond_target_B_5_Bcc: Bcc, CBNZW, CBNZX, CBZW, CBZX */ +/* ccode_cond_am_brcond_target_B_1_Bcc: Bcc */ +/* am_brcond_target_B_4_CBNZW: CBNZW, CBNZX, CBZW, CBZX */ /* am_ldrlit_label_unk_Rt_6_LDRDl: LDRDl, LDRQl, LDRSWl, LDRSl, LDRWl, LDRXl */ /* GPR64_Rn_1_RET: RET */ /* am_tbrcond_target_B_4_TBNZW: TBNZW, TBNZX, TBZW, TBZX */ @@ -101,9 +102,8 @@ case 13: { switch ((op >> 25) & 0x1) { case 0: { - insn_am_brcond_target_B_5_Bcc:; struct bitslice target = {.nruns = 1, .runs = (struct bitslice_run[]) {{5,0,19}}}; - return P(am_brcond_target_B_5_Bcc)(ctx, target); /* 0x34000000 | 0x81ffffff */ + return P(am_brcond_target_B_4_CBNZW)(ctx, target); /* 0x34000000 | 0x81ffffff */ } case 1: { struct bitslice target = {.nruns = 1, .runs = (struct bitslice_run[]) {{5,0,14}}}; @@ -115,7 +115,9 @@ switch ((op >> 25) & 0x1) { case 0: { if ((op & 0xff000010) == 0x54000000) { - goto insn_am_brcond_target_B_5_Bcc; /* 0x54000000 | 0x00ffffef */ + struct bitslice cond = {.nruns = 1, .runs = (struct bitslice_run[]) {{0,0,4}}}; + struct bitslice target = {.nruns = 1, .runs = (struct bitslice_run[]) {{5,0,19}}}; + return P(ccode_cond_am_brcond_target_B_1_Bcc)(ctx, cond, target); /* 0x54000000 | 0x00ffffef */ } else { return P(unidentified)(ctx); } @@ -145,8 +147,9 @@ static INLINE tdis_ret P(adrlabel_label_unk_Xd_1_ADR)(struct bitslice ctx, struc static INLINE tdis_ret P(adrplabel_label_unk_Xd_1_ADRP)(struct bitslice ctx, struct bitslice Xd, struct bitslice label) {} static INLINE tdis_ret P(am_b_target_addr_B_1_B)(struct bitslice ctx, struct bitslice addr) {} static INLINE tdis_ret P(am_bl_target_addr_1_BL)(struct bitslice ctx, struct bitslice addr) {} -static INLINE tdis_ret P(am_brcond_target_B_5_Bcc)(struct bitslice ctx, struct bitslice target) {} +static INLINE tdis_ret P(am_brcond_target_B_4_CBNZW)(struct bitslice ctx, struct bitslice target) {} static INLINE tdis_ret P(am_ldrlit_label_unk_Rt_6_LDRDl)(struct bitslice ctx, struct bitslice Rt, struct bitslice label) {} static INLINE tdis_ret P(am_tbrcond_target_B_4_TBNZW)(struct bitslice ctx, struct bitslice target) {} +static INLINE tdis_ret P(ccode_cond_am_brcond_target_B_1_Bcc)(struct bitslice ctx, struct bitslice cond, struct bitslice target) {} */ diff --git a/lib/arm64/assemble.h b/lib/arm64/assemble.h index 8a98b7b..1dca7eb 100644 --- a/lib/arm64/assemble.h +++ b/lib/arm64/assemble.h @@ -1,9 +1,15 @@ #pragma once #include "dis.h" + +static inline int size_of_MOVi64(uint64_t val) { + int num_nybbles = val == 0 ? 1 : ((64 - __builtin_clzll(val) + 15) / 16); + return 4 * num_nybbles; +} + static inline void MOVi64(void **codep, int Rd, uint64_t val) { int shift_nybbles = 0; do { - int k = shift_nybbles != 0 ? 1 : 0; + int k = shift_nybbles != 0; op32(codep, 0xd2800000 | k << 29 | Rd | (val & 0xffff) << 5 | shift_nybbles << 21); shift_nybbles++; @@ -54,3 +60,7 @@ static inline void BR(void **codep, int reg) { op32(codep, 0xd61f0000 | reg << 5); } +static inline void Bccrel(void **codep, int cc, int offset) { + op32(codep, 0x54000000 | (offset / 4) << 5 | cc); +} + diff --git a/lib/arm64/dis-arm64.inc.h b/lib/arm64/dis-arm64.inc.h index 14a3a92..04349f2 100644 --- a/lib/arm64/dis-arm64.inc.h +++ b/lib/arm64/dis-arm64.inc.h @@ -8,15 +8,32 @@ static INLINE void P(adrplabel_label_unk_Xd_1_ADRP)(tdis_ctx ctx, struct bitslic } static INLINE void P(am_b_target_addr_B_1_B)(tdis_ctx ctx, struct bitslice addr) { return P(branch)(ctx, ctx->pc + sext(bs_get(addr, ctx->op), 26) * 4, - /*cond*/ false); + /*cc*/ 0); } static INLINE void P(am_bl_target_addr_1_BL)(tdis_ctx ctx, struct bitslice addr) { return P(branch)(ctx, ctx->pc + sext(bs_get(addr, ctx->op), 26) * 4, - /*cond*/ false); + /*cc*/ 0); } -static INLINE void P(am_brcond_target_B_5_Bcc)(tdis_ctx ctx, struct bitslice target) { - return P(branch)(ctx, ctx->pc + sext(bs_get(target, ctx->op), 19) * 4, - /*cond*/ true); +static INLINE void P(ccode_cond_am_brcond_target_B_1_Bcc)(tdis_ctx ctx, struct bitslice cond, struct bitslice target) { + int bits = bs_get(cond, ctx->op); + /* Bcc with AL/NV (which is actually just another AL) is useless but possible. */ + int cc = bits >= 0xe ? 0 : (CC_ARMCC | bits); + return P(branch)(ctx, ctx->pc + sext(bs_get(target, ctx->op), 19) * 4, cc); +} +static INLINE void P(am_tbrcond_target_B_4_TBNZW)(tdis_ctx ctx, struct bitslice target) { + P(branch)(ctx, ctx->pc + sext(bs_get(target, ctx->op), 14) * 4, CC_XBXZ); + if (TDIS_CTX_MODIFY(ctx)) { + /* ditto CBNZ on ARM */ + int new_target = (TDIS_CTX_NEWVAL(ctx, 0) - ctx->pc) / 4; + unsigned new = bs_set(target, new_target, ctx->op); + if (TDIS_CTX_NEWVAL(ctx, 1)) + new ^= 1 << 24; + TDIS_CTX_SET_NEWOP(ctx, new); + } +} +static INLINE void P(am_brcond_target_B_4_CBNZW)(tdis_ctx ctx, struct bitslice target) { + /* both have the same bit to control Z/NZ */ + return P(am_tbrcond_target_B_4_TBNZW)(ctx, target); } static INLINE void P(am_ldrlit_label_unk_Rt_6_LDRDl)(tdis_ctx ctx, struct bitslice Rt, struct bitslice label) { enum pcrel_load_mode mode; @@ -38,10 +55,6 @@ static INLINE void P(am_ldrlit_label_unk_Rt_6_LDRDl)(tdis_ctx ctx, struct bitsli return P(pcrel)(ctx, ctx->pc + sext(bs_get(label, ctx->op), 19) * 4, bs_get(Rt, ctx->op), mode); } -static INLINE void P(am_tbrcond_target_B_4_TBNZW)(tdis_ctx ctx, struct bitslice target) { - return P(branch)(ctx, ctx->pc + sext(bs_get(target, ctx->op), 14) * 4, - /*cond*/ true); -} static INLINE void P(GPR64_Rn_1_RET)(tdis_ctx ctx, UNUSED struct bitslice Rn) { return P(ret)(ctx); } diff --git a/lib/arm64/jump-patch.h b/lib/arm64/jump-patch.h index cc94f90..3d3d653 100644 --- a/lib/arm64/jump-patch.h +++ b/lib/arm64/jump-patch.h @@ -14,8 +14,8 @@ static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc, } static inline void make_jump_patch(void **codep, uintptr_t pc, uintptr_t dpc, - UNUSED struct arch_dis_ctx arch) { - int reg = 15; + struct arch_dis_ctx arch) { + int reg = arm64_get_unwritten_temp_reg(&arch); intptr_t diff = (dpc & ~0xfff) - (pc & ~0xfff); if (!(diff >= -0x100000000 && diff < 0x100000000)) MOVi64(codep, reg, dpc); diff --git a/lib/arm64/misc.h b/lib/arm64/misc.h index 84bd638..c21bc0b 100644 --- a/lib/arm64/misc.h +++ b/lib/arm64/misc.h @@ -5,5 +5,32 @@ #define TARGET_TRANSFORM_DIS_HEADER "arm64/transform-dis-arm64.inc.h" #define MIN_INSN_SIZE 4 #define TD_MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */ -struct arch_dis_ctx {}; -static inline void arch_dis_ctx_init(UNUSED struct arch_dis_ctx *ctx) {} + +struct arch_dis_ctx { + /* For transform_dis only - used to get temporary registers. We assume + * that we can use any caller-saved or IP register which was not written, + * so r9-r18. + * This is a massive overestimate: we just OR in each instruction's bits + * 4:0 (Rd for data, Rt for loads, most common), 14:10 (Rt2 for load-pair + * instructions), and 20:16 (Rs for store-exclusive insturctions). It + * would be easy to restrict the latter two to the few instructions that + * actually use them, but with 10 available registers, and a patch of at + * most 3 instructions (and none of the instructions that require a temp + * use Rt2/Rs or could read their Rd, so the third doesn't count), we won't + * run out even with the dumbest possible thing. */ + uint32_t regs_possibly_written; +}; + +static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { + ctx->regs_possibly_written = 0; +} + +static inline int arm64_get_unwritten_temp_reg(struct arch_dis_ctx *ctx) { + uint32_t avail = ~ctx->regs_possibly_written & ((1 << 19) - (1 << 9)); + if (!avail) + __builtin_abort(); + return 31 - __builtin_clz(avail); +} + +#define CC_ARMCC (CC_CONDITIONAL | 0x200) +#define CC_XBXZ (CC_CONDITIONAL | 0x400) diff --git a/lib/arm64/transform-dis-arm64.inc.h b/lib/arm64/transform-dis-arm64.inc.h index 97a4aca..a98932d 100644 --- a/lib/arm64/transform-dis-arm64.inc.h +++ b/lib/arm64/transform-dis-arm64.inc.h @@ -6,11 +6,9 @@ void transform_dis_pcrel(struct transform_dis_ctx *ctx, uintptr_t dpc, unsigned ctx->write_newop_here = NULL; void **codep = ctx->rewritten_ptr_ptr; if (load_mode >= PLM_U32_SIMD) { - /* use x0 as scratch */ - op32(codep, 0xf81f0fe0); /* str x0, [sp, #-0x10]! */ + int reg = arm64_get_unwritten_temp_reg(&ctx->arch); MOVi64(codep, 0, dpc); LDRxi(codep, reg, 0, 0, true, load_mode); - op32(codep, 0xf84107e0); /* ldr x0, [sp], #0x10 */ } else { MOVi64(codep, reg, dpc); LDRxi(codep, reg, reg, 0, true, load_mode); @@ -26,9 +24,28 @@ void transform_dis_branch(struct transform_dis_ctx *ctx, uintptr_t dpc, int cc) ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START; return; } - /* TODO */ - (void) cc; + ctx->write_newop_here = NULL; + int mov_br_size = size_of_MOVi64(dpc) + 4; + + void **codep = ctx->rewritten_ptr_ptr; + if ((cc & CC_ARMCC) == CC_ARMCC) { + int icc = (cc & 0xf) ^ 1; + Bccrel(codep, icc, 4 + mov_br_size); + } else if ((cc & CC_XBXZ) == CC_XBXZ) { + ctx->modify = true; + ctx->newval[0] = ctx->pc + 4 + mov_br_size; + ctx->newval[1] = 1; /* do invert */ + ctx->write_newop_here = *codep; *codep += 4; + } + int reg = arm64_get_unwritten_temp_reg(&ctx->arch); + MOVi64(codep, reg, dpc); + BR(codep, reg); } static void transform_dis_pre_dis(UNUSED struct transform_dis_ctx *ctx) {} -static void transform_dis_post_dis(UNUSED struct transform_dis_ctx *ctx) {} +static void transform_dis_post_dis(struct transform_dis_ctx *ctx) { + uint32_t op = ctx->op; + ctx->arch.regs_possibly_written |= op & 31; + ctx->arch.regs_possibly_written |= op >> 10 & 31; + ctx->arch.regs_possibly_written |= op >> 16 & 31; +} diff --git a/lib/hook-functions.c b/lib/hook-functions.c index 986990b..e0516cb 100644 --- a/lib/hook-functions.c +++ b/lib/hook-functions.c @@ -168,6 +168,21 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, &hi->trampoline_page, arch))) goto end; + uintptr_t pc_patch_end = pc_patch_start + patch_size; + /* Generate the rewritten start of the function for the outro + * trampoline (complaining if any bad instructions are found) + * (on arm64, this modifies regs_possibly_written, which is used by the + * two make_jump_patch calls) */ + uint8_t rewritten_temp[TD_MAX_REWRITTEN_SIZE]; + void *rp = rewritten_temp; + if ((ret = transform_dis_main(code, &rp, pc_patch_start, &pc_patch_end, + &arch, hi->offset_by_pcdiff))) + goto end; + /* Check some of the rest of the function for jumps back into the + * patched region. */ + if ((ret = jump_dis_main(code, pc_patch_start, pc_patch_end, arch))) + goto end; + uintptr_t initial_target; if (need_intro_trampoline) { initial_target = (uintptr_t) trampoline_ptr; @@ -180,18 +195,6 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, make_jump_patch(&jp, pc_patch_start, initial_target, arch); hi->jump_patch_size = (uint8_t *) jp - hi->jump_patch; - uintptr_t pc_patch_end = pc_patch_start + patch_size; - /* Generate the rewritten start of the function for the outro - * trampoline (complaining if any bad instructions are found). */ - uint8_t rewritten_temp[TD_MAX_REWRITTEN_SIZE]; - void *rp = rewritten_temp; - if ((ret = transform_dis_main(code, &rp, pc_patch_start, &pc_patch_end, - arch, hi->offset_by_pcdiff))) - goto end; - /* Check some of the rest of the function for jumps back into the - * patched region. */ - if ((ret = jump_dis_main(code, pc_patch_start, pc_patch_end, arch))) - goto end; size_t rewritten_size = (uint8_t *) rp - rewritten_temp; size_t jumpback_size = diff --git a/lib/transform-dis.c b/lib/transform-dis.c index 3210333..867a981 100644 --- a/lib/transform-dis.c +++ b/lib/transform-dis.c @@ -76,14 +76,14 @@ int transform_dis_main(const void *restrict code_ptr, void **restrict rewritten_ptr_ptr, uintptr_t pc_patch_start, uintptr_t *pc_patch_end_p, - struct arch_dis_ctx initial_arch_ctx, + struct arch_dis_ctx *arch_ctx_p, int *offset_by_pcdiff) { struct transform_dis_ctx ctx; memset(&ctx, 0, sizeof(ctx)); ctx.pc_patch_start = pc_patch_start; ctx.pc_patch_end = *pc_patch_end_p; ctx.pc = pc_patch_start; - ctx.arch = initial_arch_ctx; + ctx.arch = *arch_ctx_p; /* data is written to rewritten both by this function directly and, in case * additional scaffolding is needed, by arch-specific transform_dis_* */ ctx.rewritten_ptr_ptr = rewritten_ptr_ptr; @@ -127,6 +127,7 @@ int transform_dis_main(const void *restrict code_ptr, (int) (*rewritten_ptr_ptr - rewritten_start); } *pc_patch_end_p = ctx.pc; + *arch_ctx_p = ctx.arch; return SUBSTITUTE_OK; } diff --git a/lib/transform-dis.h b/lib/transform-dis.h index 3ab120b..70fe57a 100644 --- a/lib/transform-dis.h +++ b/lib/transform-dis.h @@ -5,6 +5,6 @@ int transform_dis_main(const void *restrict code_ptr, void **restrict rewritten_ptr_ptr, uintptr_t pc_patch_start, - uintptr_t *pc_patch_end, - struct arch_dis_ctx initial_arch_ctx, + uintptr_t *pc_patch_end_p, + struct arch_dis_ctx *arch_ctx_p, int *offset_by_pcdiff); diff --git a/test/test-transform-dis.c b/test/test-transform-dis.c index ab52742..1f7dac0 100644 --- a/test/test-transform-dis.c +++ b/test/test-transform-dis.c @@ -27,7 +27,7 @@ int main(UNUSED int argc, char **argv) { &rewritten_ptr, pc_patch_start, &pc_patch_end, - arch, + &arch, offsets); printf("=> %d\n", ret); printf("#endif\n"); |