From eb93cee2a22cde812ccd6b9bd418d36185c058f5 Mon Sep 17 00:00:00 2001 From: comex Date: Sun, 8 Feb 2015 23:45:24 -0500 Subject: Refactor disassembly so x86 works, and add x86 transform-dis. This patch is a monolithic mess, because I was too lazy to do the refactor first (that would require some stash fun, since I wasn't actually sure before doing x86 transform-dis what would be needed). Anyway, the resulting code should be cleaner - less duplication. This breaks ARM/ARM64. --- lib/arm64/arch-dis.h | 37 ++++++++++++++++++++ lib/arm64/arch-transform-dis.inc.h | 52 ++++++++++++++++++++++++++++ lib/arm64/dis-arm64.inc.h | 69 ------------------------------------- lib/arm64/dis-main.inc.h | 69 +++++++++++++++++++++++++++++++++++++ lib/arm64/misc.h | 35 +------------------ lib/arm64/transform-dis-arm64.inc.h | 52 ---------------------------- 6 files changed, 159 insertions(+), 155 deletions(-) create mode 100644 lib/arm64/arch-dis.h create mode 100644 lib/arm64/arch-transform-dis.inc.h delete mode 100644 lib/arm64/dis-arm64.inc.h create mode 100644 lib/arm64/dis-main.inc.h delete mode 100644 lib/arm64/transform-dis-arm64.inc.h (limited to 'lib/arm64') diff --git a/lib/arm64/arch-dis.h b/lib/arm64/arch-dis.h new file mode 100644 index 0000000..f91328b --- /dev/null +++ b/lib/arm64/arch-dis.h @@ -0,0 +1,37 @@ +#pragma once +#define MIN_INSN_SIZE 4 +#define TD_MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */ + +struct arch_pcrel_info { + unsigned reg; + enum pcrel_load_mode lm; +}; + +struct arch_dis_ctx { + /* For transform_dis only - used to get temporary registers. We assume + * that we can use any caller-saved or IP register which was not written, + * so r9-r18. + * This is a massive overestimate: we just OR in each instruction's bits + * 4:0 (Rd for data, Rt for loads, most common), 14:10 (Rt2 for load-pair + * instructions), and 20:16 (Rs for store-exclusive insturctions). It + * would be easy to restrict the latter two to the few instructions that + * actually use them, but with 10 available registers, and a patch of at + * most 3 instructions (and none of the instructions that require a temp + * use Rt2/Rs or could read their Rd, so the third doesn't count), we won't + * run out even with the dumbest possible thing. */ + uint32_t regs_possibly_written; +}; + +static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { + ctx->regs_possibly_written = 0; +} + +static inline int arm64_get_unwritten_temp_reg(struct arch_dis_ctx *ctx) { + uint32_t avail = ~ctx->regs_possibly_written & ((1 << 19) - (1 << 9)); + if (!avail) + __builtin_abort(); + return 31 - __builtin_clz(avail); +} + +#define CC_ARMCC (CC_CONDITIONAL | 0x400) +#define CC_XBXZ (CC_CONDITIONAL | 0x800) diff --git a/lib/arm64/arch-transform-dis.inc.h b/lib/arm64/arch-transform-dis.inc.h new file mode 100644 index 0000000..d8f831d --- /dev/null +++ b/lib/arm64/arch-transform-dis.inc.h @@ -0,0 +1,52 @@ +#include "arm64/assemble.h" + +static NOINLINE UNUSED +void transform_dis_pcrel(struct transform_dis_ctx *ctx, uint_tptr dpc, unsigned reg, + enum pcrel_load_mode load_mode) { + ctx->write_newop_here = NULL; + void **codep = ctx->rewritten_ptr_ptr; + if (load_mode >= PLM_U32_SIMD) { + int reg = arm64_get_unwritten_temp_reg(&ctx->arch); + MOVi64(codep, 0, dpc); + LDRxi(codep, reg, 0, 0, true, load_mode); + } else { + MOVi64(codep, reg, dpc); + LDRxi(codep, reg, reg, 0, true, load_mode); + } +} + +static NOINLINE UNUSED +void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc, int cc) { + /* TODO fix BL */ +#ifdef TRANSFORM_DIS_VERBOSE + printf("transform_dis (%p): branch => %p\n", (void *) ctx->pc, (void *) dpc); +#endif + if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) { + ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START; + return; + } + ctx->write_newop_here = NULL; + int mov_br_size = size_of_MOVi64(dpc) + 4; + + void **codep = ctx->rewritten_ptr_ptr; + if ((cc & CC_ARMCC) == CC_ARMCC) { + int icc = (cc & 0xf) ^ 1; + Bccrel(codep, icc, 4 + mov_br_size); + } else if ((cc & CC_XBXZ) == CC_XBXZ) { + ctx->modify = true; + ctx->newval[0] = ctx->pc + 4 + mov_br_size; + ctx->newval[1] = 1; /* do invert */ + ctx->write_newop_here = *codep; *codep += 4; + } + int reg = arm64_get_unwritten_temp_reg(&ctx->arch); + MOVi64(codep, reg, dpc); + BR(codep, reg); +} + +static void transform_dis_pre_dis(UNUSED struct transform_dis_ctx *ctx) {} +static void transform_dis_post_dis(struct transform_dis_ctx *ctx) { + uint32_t op = ctx->op; + ctx->arch.regs_possibly_written |= op & 31; + ctx->arch.regs_possibly_written |= op >> 10 & 31; + ctx->arch.regs_possibly_written |= op >> 16 & 31; +} diff --git a/lib/arm64/dis-arm64.inc.h b/lib/arm64/dis-arm64.inc.h deleted file mode 100644 index 04349f2..0000000 --- a/lib/arm64/dis-arm64.inc.h +++ /dev/null @@ -1,69 +0,0 @@ -static INLINE void P(adrlabel_label_unk_Xd_1_ADR)(tdis_ctx ctx, struct bitslice Xd, struct bitslice label) { - return P(pcrel)(ctx, ctx->pc + sext(bs_get(label, ctx->op), 22), - bs_get(Xd, ctx->op), PLM_ADR); -} -static INLINE void P(adrplabel_label_unk_Xd_1_ADRP)(tdis_ctx ctx, struct bitslice Xd, struct bitslice label) { - return P(pcrel)(ctx, ctx->pc + (sext(bs_get(label, ctx->op), 22) << 12), - bs_get(Xd, ctx->op), PLM_ADR); -} -static INLINE void P(am_b_target_addr_B_1_B)(tdis_ctx ctx, struct bitslice addr) { - return P(branch)(ctx, ctx->pc + sext(bs_get(addr, ctx->op), 26) * 4, - /*cc*/ 0); -} -static INLINE void P(am_bl_target_addr_1_BL)(tdis_ctx ctx, struct bitslice addr) { - return P(branch)(ctx, ctx->pc + sext(bs_get(addr, ctx->op), 26) * 4, - /*cc*/ 0); -} -static INLINE void P(ccode_cond_am_brcond_target_B_1_Bcc)(tdis_ctx ctx, struct bitslice cond, struct bitslice target) { - int bits = bs_get(cond, ctx->op); - /* Bcc with AL/NV (which is actually just another AL) is useless but possible. */ - int cc = bits >= 0xe ? 0 : (CC_ARMCC | bits); - return P(branch)(ctx, ctx->pc + sext(bs_get(target, ctx->op), 19) * 4, cc); -} -static INLINE void P(am_tbrcond_target_B_4_TBNZW)(tdis_ctx ctx, struct bitslice target) { - P(branch)(ctx, ctx->pc + sext(bs_get(target, ctx->op), 14) * 4, CC_XBXZ); - if (TDIS_CTX_MODIFY(ctx)) { - /* ditto CBNZ on ARM */ - int new_target = (TDIS_CTX_NEWVAL(ctx, 0) - ctx->pc) / 4; - unsigned new = bs_set(target, new_target, ctx->op); - if (TDIS_CTX_NEWVAL(ctx, 1)) - new ^= 1 << 24; - TDIS_CTX_SET_NEWOP(ctx, new); - } -} -static INLINE void P(am_brcond_target_B_4_CBNZW)(tdis_ctx ctx, struct bitslice target) { - /* both have the same bit to control Z/NZ */ - return P(am_tbrcond_target_B_4_TBNZW)(ctx, target); -} -static INLINE void P(am_ldrlit_label_unk_Rt_6_LDRDl)(tdis_ctx ctx, struct bitslice Rt, struct bitslice label) { - enum pcrel_load_mode mode; - if ((ctx->op >> 26) & 1) { - switch (ctx->op >> 30) { - case 0: mode = PLM_U32_SIMD; break; - case 1: mode = PLM_U64_SIMD; break; - case 2: mode = PLM_U128_SIMD; break; - default: __builtin_abort(); - } - } else { - switch (ctx->op >> 30) { - case 0: mode = PLM_U32; break; - case 1: mode = PLM_U64; break; - case 2: mode = PLM_S32; break; - default: __builtin_abort(); - } - } - return P(pcrel)(ctx, ctx->pc + sext(bs_get(label, ctx->op), 19) * 4, - bs_get(Rt, ctx->op), mode); -} -static INLINE void P(GPR64_Rn_1_RET)(tdis_ctx ctx, UNUSED struct bitslice Rn) { - return P(ret)(ctx); -} - -static INLINE void P(dis)(tdis_ctx ctx) { - uint32_t op = ctx->op = *(uint32_t *) ctx->ptr; - ctx->op_size = 4; - /* clang doesn't realize that this is unreachable and generates code like - * "and ecx, 0x1f; cmp ecx, 0x1f; ja abort". Yeah, nice job there. */ - #include "../generated/generic-dis-arm64.inc.h" - __builtin_abort(); -} diff --git a/lib/arm64/dis-main.inc.h b/lib/arm64/dis-main.inc.h new file mode 100644 index 0000000..04349f2 --- /dev/null +++ b/lib/arm64/dis-main.inc.h @@ -0,0 +1,69 @@ +static INLINE void P(adrlabel_label_unk_Xd_1_ADR)(tdis_ctx ctx, struct bitslice Xd, struct bitslice label) { + return P(pcrel)(ctx, ctx->pc + sext(bs_get(label, ctx->op), 22), + bs_get(Xd, ctx->op), PLM_ADR); +} +static INLINE void P(adrplabel_label_unk_Xd_1_ADRP)(tdis_ctx ctx, struct bitslice Xd, struct bitslice label) { + return P(pcrel)(ctx, ctx->pc + (sext(bs_get(label, ctx->op), 22) << 12), + bs_get(Xd, ctx->op), PLM_ADR); +} +static INLINE void P(am_b_target_addr_B_1_B)(tdis_ctx ctx, struct bitslice addr) { + return P(branch)(ctx, ctx->pc + sext(bs_get(addr, ctx->op), 26) * 4, + /*cc*/ 0); +} +static INLINE void P(am_bl_target_addr_1_BL)(tdis_ctx ctx, struct bitslice addr) { + return P(branch)(ctx, ctx->pc + sext(bs_get(addr, ctx->op), 26) * 4, + /*cc*/ 0); +} +static INLINE void P(ccode_cond_am_brcond_target_B_1_Bcc)(tdis_ctx ctx, struct bitslice cond, struct bitslice target) { + int bits = bs_get(cond, ctx->op); + /* Bcc with AL/NV (which is actually just another AL) is useless but possible. */ + int cc = bits >= 0xe ? 0 : (CC_ARMCC | bits); + return P(branch)(ctx, ctx->pc + sext(bs_get(target, ctx->op), 19) * 4, cc); +} +static INLINE void P(am_tbrcond_target_B_4_TBNZW)(tdis_ctx ctx, struct bitslice target) { + P(branch)(ctx, ctx->pc + sext(bs_get(target, ctx->op), 14) * 4, CC_XBXZ); + if (TDIS_CTX_MODIFY(ctx)) { + /* ditto CBNZ on ARM */ + int new_target = (TDIS_CTX_NEWVAL(ctx, 0) - ctx->pc) / 4; + unsigned new = bs_set(target, new_target, ctx->op); + if (TDIS_CTX_NEWVAL(ctx, 1)) + new ^= 1 << 24; + TDIS_CTX_SET_NEWOP(ctx, new); + } +} +static INLINE void P(am_brcond_target_B_4_CBNZW)(tdis_ctx ctx, struct bitslice target) { + /* both have the same bit to control Z/NZ */ + return P(am_tbrcond_target_B_4_TBNZW)(ctx, target); +} +static INLINE void P(am_ldrlit_label_unk_Rt_6_LDRDl)(tdis_ctx ctx, struct bitslice Rt, struct bitslice label) { + enum pcrel_load_mode mode; + if ((ctx->op >> 26) & 1) { + switch (ctx->op >> 30) { + case 0: mode = PLM_U32_SIMD; break; + case 1: mode = PLM_U64_SIMD; break; + case 2: mode = PLM_U128_SIMD; break; + default: __builtin_abort(); + } + } else { + switch (ctx->op >> 30) { + case 0: mode = PLM_U32; break; + case 1: mode = PLM_U64; break; + case 2: mode = PLM_S32; break; + default: __builtin_abort(); + } + } + return P(pcrel)(ctx, ctx->pc + sext(bs_get(label, ctx->op), 19) * 4, + bs_get(Rt, ctx->op), mode); +} +static INLINE void P(GPR64_Rn_1_RET)(tdis_ctx ctx, UNUSED struct bitslice Rn) { + return P(ret)(ctx); +} + +static INLINE void P(dis)(tdis_ctx ctx) { + uint32_t op = ctx->op = *(uint32_t *) ctx->ptr; + ctx->op_size = 4; + /* clang doesn't realize that this is unreachable and generates code like + * "and ecx, 0x1f; cmp ecx, 0x1f; ja abort". Yeah, nice job there. */ + #include "../generated/generic-dis-arm64.inc.h" + __builtin_abort(); +} diff --git a/lib/arm64/misc.h b/lib/arm64/misc.h index f5a6154..066e9d5 100644 --- a/lib/arm64/misc.h +++ b/lib/arm64/misc.h @@ -1,36 +1,3 @@ #pragma once +#define TARGET_POINTER_SIZE 8 #define TARGET_DIS_SUPPORTED -#define TARGET_DIS_HEADER "arm64/dis-arm64.inc.h" -#define TARGET_JUMP_PATCH_HDR "arm64/jump-patch.h" -#define TARGET_TRANSFORM_DIS_HEADER "arm64/transform-dis-arm64.inc.h" -#define MIN_INSN_SIZE 4 -#define TD_MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */ - -struct arch_dis_ctx { - /* For transform_dis only - used to get temporary registers. We assume - * that we can use any caller-saved or IP register which was not written, - * so r9-r18. - * This is a massive overestimate: we just OR in each instruction's bits - * 4:0 (Rd for data, Rt for loads, most common), 14:10 (Rt2 for load-pair - * instructions), and 20:16 (Rs for store-exclusive insturctions). It - * would be easy to restrict the latter two to the few instructions that - * actually use them, but with 10 available registers, and a patch of at - * most 3 instructions (and none of the instructions that require a temp - * use Rt2/Rs or could read their Rd, so the third doesn't count), we won't - * run out even with the dumbest possible thing. */ - uint32_t regs_possibly_written; -}; - -static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { - ctx->regs_possibly_written = 0; -} - -static inline int arm64_get_unwritten_temp_reg(struct arch_dis_ctx *ctx) { - uint32_t avail = ~ctx->regs_possibly_written & ((1 << 19) - (1 << 9)); - if (!avail) - __builtin_abort(); - return 31 - __builtin_clz(avail); -} - -#define CC_ARMCC (CC_CONDITIONAL | 0x400) -#define CC_XBXZ (CC_CONDITIONAL | 0x800) diff --git a/lib/arm64/transform-dis-arm64.inc.h b/lib/arm64/transform-dis-arm64.inc.h deleted file mode 100644 index 792b835..0000000 --- a/lib/arm64/transform-dis-arm64.inc.h +++ /dev/null @@ -1,52 +0,0 @@ -#include "arm64/assemble.h" - -static NOINLINE UNUSED -void transform_dis_pcrel(struct transform_dis_ctx *ctx, uintptr_t dpc, unsigned reg, - enum pcrel_load_mode load_mode) { - ctx->write_newop_here = NULL; - void **codep = ctx->rewritten_ptr_ptr; - if (load_mode >= PLM_U32_SIMD) { - int reg = arm64_get_unwritten_temp_reg(&ctx->arch); - MOVi64(codep, 0, dpc); - LDRxi(codep, reg, 0, 0, true, load_mode); - } else { - MOVi64(codep, reg, dpc); - LDRxi(codep, reg, reg, 0, true, load_mode); - } -} - -static NOINLINE UNUSED -void transform_dis_branch(struct transform_dis_ctx *ctx, uintptr_t dpc, int cc) { - /* TODO fix BL */ -#ifdef TRANSFORM_DIS_VERBOSE - printf("transform_dis (%p): branch => %p\n", (void *) ctx->pc, (void *) dpc); -#endif - if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) { - ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START; - return; - } - ctx->write_newop_here = NULL; - int mov_br_size = size_of_MOVi64(dpc) + 4; - - void **codep = ctx->rewritten_ptr_ptr; - if ((cc & CC_ARMCC) == CC_ARMCC) { - int icc = (cc & 0xf) ^ 1; - Bccrel(codep, icc, 4 + mov_br_size); - } else if ((cc & CC_XBXZ) == CC_XBXZ) { - ctx->modify = true; - ctx->newval[0] = ctx->pc + 4 + mov_br_size; - ctx->newval[1] = 1; /* do invert */ - ctx->write_newop_here = *codep; *codep += 4; - } - int reg = arm64_get_unwritten_temp_reg(&ctx->arch); - MOVi64(codep, reg, dpc); - BR(codep, reg); -} - -static void transform_dis_pre_dis(UNUSED struct transform_dis_ctx *ctx) {} -static void transform_dis_post_dis(struct transform_dis_ctx *ctx) { - uint32_t op = ctx->op; - ctx->arch.regs_possibly_written |= op & 31; - ctx->arch.regs_possibly_written |= op >> 10 & 31; - ctx->arch.regs_possibly_written |= op >> 16 & 31; -} -- cgit v1.2.3