diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/arm/arch-dis.h | 60 | ||||
-rw-r--r-- | lib/arm/arch-transform-dis.inc.h (renamed from lib/arm/transform-dis-arm-multi.inc.h) | 0 | ||||
-rw-r--r-- | lib/arm/dis-main.inc.h (renamed from lib/arm/dis-arm-multi.inc.h) | 0 | ||||
-rw-r--r-- | lib/arm/misc.h | 58 | ||||
-rw-r--r-- | lib/arm64/arch-dis.h | 37 | ||||
-rw-r--r-- | lib/arm64/arch-transform-dis.inc.h (renamed from lib/arm64/transform-dis-arm64.inc.h) | 4 | ||||
-rw-r--r-- | lib/arm64/dis-main.inc.h (renamed from lib/arm64/dis-arm64.inc.h) | 0 | ||||
-rw-r--r-- | lib/arm64/misc.h | 35 | ||||
-rw-r--r-- | lib/dis.h | 26 | ||||
-rw-r--r-- | lib/hook-functions.c | 4 | ||||
-rw-r--r-- | lib/jump-dis.c | 46 | ||||
-rw-r--r-- | lib/jump-dis.h | 1 | ||||
-rw-r--r-- | lib/substitute-internal.h | 16 | ||||
-rw-r--r-- | lib/transform-dis.c | 55 | ||||
-rw-r--r-- | lib/transform-dis.h | 5 | ||||
-rw-r--r-- | lib/x86/arch-dis.h | 10 | ||||
-rw-r--r-- | lib/x86/arch-transform-dis.inc.h | 58 | ||||
-rw-r--r-- | lib/x86/dis-main.inc.h (renamed from lib/x86/dis-x86.inc.h) | 47 | ||||
-rw-r--r-- | lib/x86/jump-patch.h | 21 | ||||
-rw-r--r-- | lib/x86/misc.h | 12 |
20 files changed, 298 insertions, 197 deletions
diff --git a/lib/arm/arch-dis.h b/lib/arm/arch-dis.h new file mode 100644 index 0000000..c64ff2e --- /dev/null +++ b/lib/arm/arch-dis.h @@ -0,0 +1,60 @@ +#pragma once +#define MIN_INSN_SIZE 2 +/* each input instruction might turn into: + * - 2 bytes for Bcc, if in IT + * then ONE of: + * - 2/4 bytes for just the instruction + * - 2+8 bytes for branch (which in *valid* code rules out IT but whatever) + * - up to 7 4-byte insns for pcrel (if dest=pc, and while these can be subject + * to IT, there can only reasonably be two per block, and if there are both + * then that's an unconditional exit - but we don't enforce any of this + * currently) + * - up to 7 4-byte insns for similar moves to PC that fall under 'data' + * the maximum number of possible inputs is 4, plus 4 extras if the last one + * was an IT (but in that case it can't be one of the above cases) + * while this looks huge, it's overly conservative and doesn't matter much, + * since only the actually used space will be taken up in the final output + */ +#define TD_MAX_REWRITTEN_SIZE (7*4*7 + 4) /* 196 */ + +struct arch_pcrel_info { + unsigned reg; + enum pcrel_load_mode lm; +}; + +struct arch_dis_ctx { + /* thumb? */ + bool pc_low_bit; + /* if thumb, IT cond for the next 5 instructions + * (5 because we still advance after IT) */ + uint8_t it_conds[5]; + /* for transform_dis - did we add space for a Bccrel? */ + uint8_t bccrel_bits; + void *bccrel_p; +}; + +static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { + ctx->pc_low_bit = false; + ctx->bccrel_p = NULL; + memset(ctx->it_conds, 0xe, 5); +} + +static inline void advance_it_cond(struct arch_dis_ctx *ctx) { + ctx->it_conds[0] = ctx->it_conds[1]; + ctx->it_conds[1] = ctx->it_conds[2]; + ctx->it_conds[2] = ctx->it_conds[3]; + ctx->it_conds[3] = ctx->it_conds[4]; + ctx->it_conds[4] = 0xe; +} + +#define DFLAG_IS_LDRD_STRD (1 << 16) + +/* Types of conditionals for 'branch' */ +/* a regular old branch-with-condition */ +#define CC_ARMCC (CC_CONDITIONAL | 0x400) +/* already in an IT block - in transform_dis this will be rewritten to a branch + * anyway, so it can be treated as unconditional; in jump_dis we have to know + * to keep going */ +#define CC_ALREADY_IN_IT (CC_CONDITIONAL | 0x800) +/* CBZ/CBNZ is rewritten */ +#define CC_CBXZ (CC_CONDITIONAL | 0xc00) diff --git a/lib/arm/transform-dis-arm-multi.inc.h b/lib/arm/arch-transform-dis.inc.h index 6e91ff5..6e91ff5 100644 --- a/lib/arm/transform-dis-arm-multi.inc.h +++ b/lib/arm/arch-transform-dis.inc.h diff --git a/lib/arm/dis-arm-multi.inc.h b/lib/arm/dis-main.inc.h index bf2767e..bf2767e 100644 --- a/lib/arm/dis-arm-multi.inc.h +++ b/lib/arm/dis-main.inc.h diff --git a/lib/arm/misc.h b/lib/arm/misc.h index ef11a05..c18367d 100644 --- a/lib/arm/misc.h +++ b/lib/arm/misc.h @@ -1,59 +1,3 @@ #pragma once +#define TARGET_POINTER_SIZE 4 #define TARGET_DIS_SUPPORTED -#define TARGET_DIS_HEADER "arm/dis-arm-multi.inc.h" -#define TARGET_JUMP_PATCH_HDR "arm/jump-patch.h" -#define TARGET_TRANSFORM_DIS_HEADER "arm/transform-dis-arm-multi.inc.h" -#define MIN_INSN_SIZE 2 -/* each input instruction might turn into: - * - 2 bytes for Bcc, if in IT - * then ONE of: - * - 2/4 bytes for just the instruction - * - 2+8 bytes for branch (which in *valid* code rules out IT but whatever) - * - up to 7 4-byte insns for pcrel (if dest=pc, and while these can be subject - * to IT, there can only reasonably be two per block, and if there are both - * then that's an unconditional exit - but we don't enforce any of this - * currently) - * - up to 7 4-byte insns for similar moves to PC that fall under 'data' - * the maximum number of possible inputs is 4, plus 4 extras if the last one - * was an IT (but in that case it can't be one of the above cases) - * while this looks huge, it's overly conservative and doesn't matter much, - * since only the actually used space will be taken up in the final output - */ -#define TD_MAX_REWRITTEN_SIZE (7*4*7 + 4) /* 196 */ - -struct arch_dis_ctx { - /* thumb? */ - bool pc_low_bit; - /* if thumb, IT cond for the next 5 instructions - * (5 because we still advance after IT) */ - uint8_t it_conds[5]; - /* for transform_dis - did we add space for a Bccrel? */ - uint8_t bccrel_bits; - void *bccrel_p; -}; - -static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { - ctx->pc_low_bit = false; - ctx->bccrel_p = NULL; - memset(ctx->it_conds, 0xe, 5); -} - -static inline void advance_it_cond(struct arch_dis_ctx *ctx) { - ctx->it_conds[0] = ctx->it_conds[1]; - ctx->it_conds[1] = ctx->it_conds[2]; - ctx->it_conds[2] = ctx->it_conds[3]; - ctx->it_conds[3] = ctx->it_conds[4]; - ctx->it_conds[4] = 0xe; -} - -#define DFLAG_IS_LDRD_STRD (1 << 16) - -/* Types of conditionals for 'branch' */ -/* a regular old branch-with-condition */ -#define CC_ARMCC (CC_CONDITIONAL | 0x400) -/* already in an IT block - in transform_dis this will be rewritten to a branch - * anyway, so it can be treated as unconditional; in jump_dis we have to know - * to keep going */ -#define CC_ALREADY_IN_IT (CC_CONDITIONAL | 0x800) -/* CBZ/CBNZ is rewritten */ -#define CC_CBXZ (CC_CONDITIONAL | 0xc00) diff --git a/lib/arm64/arch-dis.h b/lib/arm64/arch-dis.h new file mode 100644 index 0000000..f91328b --- /dev/null +++ b/lib/arm64/arch-dis.h @@ -0,0 +1,37 @@ +#pragma once +#define MIN_INSN_SIZE 4 +#define TD_MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */ + +struct arch_pcrel_info { + unsigned reg; + enum pcrel_load_mode lm; +}; + +struct arch_dis_ctx { + /* For transform_dis only - used to get temporary registers. We assume + * that we can use any caller-saved or IP register which was not written, + * so r9-r18. + * This is a massive overestimate: we just OR in each instruction's bits + * 4:0 (Rd for data, Rt for loads, most common), 14:10 (Rt2 for load-pair + * instructions), and 20:16 (Rs for store-exclusive insturctions). It + * would be easy to restrict the latter two to the few instructions that + * actually use them, but with 10 available registers, and a patch of at + * most 3 instructions (and none of the instructions that require a temp + * use Rt2/Rs or could read their Rd, so the third doesn't count), we won't + * run out even with the dumbest possible thing. */ + uint32_t regs_possibly_written; +}; + +static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { + ctx->regs_possibly_written = 0; +} + +static inline int arm64_get_unwritten_temp_reg(struct arch_dis_ctx *ctx) { + uint32_t avail = ~ctx->regs_possibly_written & ((1 << 19) - (1 << 9)); + if (!avail) + __builtin_abort(); + return 31 - __builtin_clz(avail); +} + +#define CC_ARMCC (CC_CONDITIONAL | 0x400) +#define CC_XBXZ (CC_CONDITIONAL | 0x800) diff --git a/lib/arm64/transform-dis-arm64.inc.h b/lib/arm64/arch-transform-dis.inc.h index 792b835..d8f831d 100644 --- a/lib/arm64/transform-dis-arm64.inc.h +++ b/lib/arm64/arch-transform-dis.inc.h @@ -1,7 +1,7 @@ #include "arm64/assemble.h" static NOINLINE UNUSED -void transform_dis_pcrel(struct transform_dis_ctx *ctx, uintptr_t dpc, unsigned reg, +void transform_dis_pcrel(struct transform_dis_ctx *ctx, uint_tptr dpc, unsigned reg, enum pcrel_load_mode load_mode) { ctx->write_newop_here = NULL; void **codep = ctx->rewritten_ptr_ptr; @@ -16,7 +16,7 @@ void transform_dis_pcrel(struct transform_dis_ctx *ctx, uintptr_t dpc, unsigned } static NOINLINE UNUSED -void transform_dis_branch(struct transform_dis_ctx *ctx, uintptr_t dpc, int cc) { +void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc, int cc) { /* TODO fix BL */ #ifdef TRANSFORM_DIS_VERBOSE printf("transform_dis (%p): branch => %p\n", (void *) ctx->pc, (void *) dpc); diff --git a/lib/arm64/dis-arm64.inc.h b/lib/arm64/dis-main.inc.h index 04349f2..04349f2 100644 --- a/lib/arm64/dis-arm64.inc.h +++ b/lib/arm64/dis-main.inc.h diff --git a/lib/arm64/misc.h b/lib/arm64/misc.h index f5a6154..066e9d5 100644 --- a/lib/arm64/misc.h +++ b/lib/arm64/misc.h @@ -1,36 +1,3 @@ #pragma once +#define TARGET_POINTER_SIZE 8 #define TARGET_DIS_SUPPORTED -#define TARGET_DIS_HEADER "arm64/dis-arm64.inc.h" -#define TARGET_JUMP_PATCH_HDR "arm64/jump-patch.h" -#define TARGET_TRANSFORM_DIS_HEADER "arm64/transform-dis-arm64.inc.h" -#define MIN_INSN_SIZE 4 -#define TD_MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */ - -struct arch_dis_ctx { - /* For transform_dis only - used to get temporary registers. We assume - * that we can use any caller-saved or IP register which was not written, - * so r9-r18. - * This is a massive overestimate: we just OR in each instruction's bits - * 4:0 (Rd for data, Rt for loads, most common), 14:10 (Rt2 for load-pair - * instructions), and 20:16 (Rs for store-exclusive insturctions). It - * would be easy to restrict the latter two to the few instructions that - * actually use them, but with 10 available registers, and a patch of at - * most 3 instructions (and none of the instructions that require a temp - * use Rt2/Rs or could read their Rd, so the third doesn't count), we won't - * run out even with the dumbest possible thing. */ - uint32_t regs_possibly_written; -}; - -static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { - ctx->regs_possibly_written = 0; -} - -static inline int arm64_get_unwritten_temp_reg(struct arch_dis_ctx *ctx) { - uint32_t avail = ~ctx->regs_possibly_written & ((1 << 19) - (1 << 9)); - if (!avail) - __builtin_abort(); - return 31 - __builtin_clz(avail); -} - -#define CC_ARMCC (CC_CONDITIONAL | 0x400) -#define CC_XBXZ (CC_CONDITIONAL | 0x800) @@ -111,6 +111,11 @@ static const unsigned null_op = -0x100; #error "no disassembler for the target architecture yet" #endif +static inline void op64(void **codep, uint64_t op) { + *(uint64_t *) *codep = op; + *codep += 8; +} + static inline void op32(void **codep, uint32_t op) { *(uint32_t *) *codep = op; *codep += 4; @@ -121,5 +126,26 @@ static inline void op16(void **codep, uint16_t op) { *codep += 2; } +static inline void op8(void **codep, uint8_t op) { + *(uint8_t *) *codep = op; + (*codep)++; +} + #define CC_CONDITIONAL 0x100 #define CC_CALL 0x200 + +struct dis_ctx_base { + uint_tptr pc; + const void *ptr; +#if defined(TARGET_x86_64) || defined(TARGET_i386) + uint8_t newop[32]; +#else + uint8_t newop[4]; + uint32_t op; +#endif + uint32_t newval[4]; + bool modify; + int op_size, newop_size; +}; + +#include stringify(TARGET_DIR/arch-dis.h) diff --git a/lib/hook-functions.c b/lib/hook-functions.c index e0516cb..953683b 100644 --- a/lib/hook-functions.c +++ b/lib/hook-functions.c @@ -5,7 +5,7 @@ #include "transform-dis.h" #include "execmem.h" #include "stop-other-threads.h" -#include TARGET_JUMP_PATCH_HDR +#include stringify(TARGET_DIR/jump-patch.h) struct hook_internal { int offset_by_pcdiff[MAX_JUMP_PATCH_SIZE + 1]; @@ -168,7 +168,7 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, &hi->trampoline_page, arch))) goto end; - uintptr_t pc_patch_end = pc_patch_start + patch_size; + uint_tptr pc_patch_end = pc_patch_start + patch_size; /* Generate the rewritten start of the function for the outro * trampoline (complaining if any bad instructions are found) * (on arm64, this modifies regs_possibly_written, which is used by the diff --git a/lib/jump-dis.c b/lib/jump-dis.c index 3e29bf7..528cfc2 100644 --- a/lib/jump-dis.c +++ b/lib/jump-dis.c @@ -1,5 +1,6 @@ #include "substitute-internal.h" #ifdef TARGET_DIS_SUPPORTED +#define DIS_MAY_MODIFY 0 #include "dis.h" #include <stdint.h> #include <stdbool.h> @@ -22,15 +23,14 @@ struct jump_dis_ctx { bool bad_insn; bool continue_after_this_insn; - uintptr_t pc; - uintptr_t pc_patch_start; - uintptr_t pc_patch_end; - unsigned op; - const void *ptr; - int op_size; + struct dis_ctx_base base; + + uint_tptr pc_patch_start; + uint_tptr pc_patch_end; + uint8_t seen_mask[JUMP_ANALYSIS_MAX_INSNS / 8]; /* queue of instructions to visit */ - uintptr_t *queue; + uint_tptr *queue; size_t queue_write_off; size_t queue_read_off; size_t queue_size; @@ -43,12 +43,8 @@ struct jump_dis_ctx { #define P(x) jump_dis_##x #define tdis_ctx struct jump_dis_ctx * -#define TDIS_CTX_MODIFY(ctx) 0 -#define TDIS_CTX_NEWVAL(ctx, n) 0 -#define TDIS_CTX_NEWOP(ctx) 0 -#define TDIS_CTX_SET_NEWOP(ctx, new) ((void) 0) -static void jump_dis_add_to_queue(struct jump_dis_ctx *ctx, uintptr_t pc) { +static void jump_dis_add_to_queue(struct jump_dis_ctx *ctx, uint_tptr pc) { size_t diff = (pc - ctx->pc_patch_start) / MIN_INSN_SIZE; if (diff >= JUMP_ANALYSIS_MAX_INSNS) { #ifdef JUMP_DIS_VERBOSE @@ -89,8 +85,8 @@ void jump_dis_data(UNUSED struct jump_dis_ctx *ctx, } static INLINE UNUSED -void jump_dis_pcrel(struct jump_dis_ctx *ctx, uintptr_t dpc, - UNUSED unsigned reg, UNUSED bool is_load) { +void jump_dis_pcrel(struct jump_dis_ctx *ctx, uint_tptr dpc, + UNUSED struct arch_pcrel_info info) { ctx->bad_insn = dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end; } @@ -100,7 +96,7 @@ void jump_dis_ret(struct jump_dis_ctx *ctx) { } static NOINLINE UNUSED -void jump_dis_branch(struct jump_dis_ctx *ctx, uintptr_t dpc, bool conditional) { +void jump_dis_branch(struct jump_dis_ctx *ctx, uint_tptr dpc, int cc) { if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) { ctx->bad_insn = true; return; @@ -109,7 +105,7 @@ void jump_dis_branch(struct jump_dis_ctx *ctx, uintptr_t dpc, bool conditional) printf("jump-dis: enqueueing %llx\n", (unsigned long long) dpc); #endif jump_dis_add_to_queue(ctx, dpc); - ctx->continue_after_this_insn = conditional; + ctx->continue_after_this_insn = cc & (CC_CONDITIONAL | CC_CALL); } static INLINE UNUSED @@ -127,25 +123,25 @@ void jump_dis_thumb_it(UNUSED struct jump_dis_ctx *ctx) { static void jump_dis_dis(struct jump_dis_ctx *ctx); -bool jump_dis_main(void *code_ptr, uintptr_t pc_patch_start, uintptr_t pc_patch_end, +bool jump_dis_main(void *code_ptr, uint_tptr pc_patch_start, uint_tptr pc_patch_end, struct arch_dis_ctx initial_dis_ctx) { bool ret; struct jump_dis_ctx ctx; memset(&ctx, 0, sizeof(ctx)); ctx.pc_patch_start = pc_patch_start; ctx.pc_patch_end = pc_patch_end; - ctx.pc = pc_patch_end; + ctx.base.pc = pc_patch_end; ctx.arch = initial_dis_ctx; while (1) { ctx.bad_insn = false; ctx.continue_after_this_insn = true; - ctx.ptr = code_ptr + (ctx.pc - pc_patch_start); + ctx.base.ptr = code_ptr + (ctx.base.pc - pc_patch_start); jump_dis_dis(&ctx); #ifdef JUMP_DIS_VERBOSE printf("jump-dis: pc=%llx op=%08x size=%x bad=%d continue_after=%d\n", - (unsigned long long) ctx.pc, - ctx.op, - ctx.op_size, + (unsigned long long) ctx.base.pc, + ctx.base.op, + ctx.base.op_size, ctx.bad_insn, ctx.continue_after_this_insn); #endif @@ -154,12 +150,12 @@ bool jump_dis_main(void *code_ptr, uintptr_t pc_patch_start, uintptr_t pc_patch_ goto fail; } if (ctx.continue_after_this_insn) - jump_dis_add_to_queue(&ctx, ctx.pc + ctx.op_size); + jump_dis_add_to_queue(&ctx, ctx.base.pc + ctx.base.op_size); /* get next address */ if (ctx.queue_read_off == ctx.queue_write_off) break; - ctx.pc = ctx.queue[ctx.queue_read_off]; + ctx.base.pc = ctx.queue[ctx.queue_read_off]; ctx.queue_read_off = (ctx.queue_read_off + 1) % ctx.queue_size; ctx.queue_count--; } @@ -170,5 +166,5 @@ fail: return ret; } -#include TARGET_DIS_HEADER +#include stringify(TARGET_DIR/dis-main.inc.h) #endif /* TARGET_DIS_SUPPORTED */ diff --git a/lib/jump-dis.h b/lib/jump-dis.h index 575a84d..fccd1a6 100644 --- a/lib/jump-dis.h +++ b/lib/jump-dis.h @@ -1,6 +1,7 @@ #pragma once #include <stdint.h> #include <stdbool.h> +#include stringify(TARGET_DIR/arch-dis.h) bool jump_dis_main(void *code_ptr, uintptr_t pc_patch_start, uintptr_t pc_patch_end, struct arch_dis_ctx initial_dis_ctx); diff --git a/lib/substitute-internal.h b/lib/substitute-internal.h index 17ad6ec..9a91516 100644 --- a/lib/substitute-internal.h +++ b/lib/substitute-internal.h @@ -49,12 +49,22 @@ typedef struct section section_x; #endif #if defined(TARGET_arm) - #include "arm/misc.h" + #define TARGET_DIR arm #elif defined(TARGET_arm64) - #include "arm64/misc.h" + #define TARGET_DIR arm64 #elif defined(TARGET_x86_64) || defined(TARGET_i386) - #include "x86/misc.h" + #define TARGET_DIR x86 #endif +#define stringify_(x) #x +#define stringify(x) stringify_(x) +#include stringify(TARGET_DIR/misc.h) + +#if TARGET_POINTER_SIZE == 8 + typedef uint64_t uint_tptr; +#elif TARGET_POINTER_SIZE == 4 + typedef uint32_t uint_tptr; +#endif + #ifdef __APPLE__ /* This could graduate to a public API but is not yet. Needs more diff --git a/lib/transform-dis.c b/lib/transform-dis.c index 867a981..8f89fb3 100644 --- a/lib/transform-dis.c +++ b/lib/transform-dis.c @@ -1,5 +1,6 @@ #include "substitute-internal.h" #ifdef TARGET_DIS_SUPPORTED +#define DIS_MAY_MODIFY 1 #include "substitute.h" #include "dis.h" @@ -13,21 +14,15 @@ struct transform_dis_ctx { /* outputs */ bool modify; int err; + struct dis_ctx_base base; - uintptr_t pc_patch_start; + uint_tptr pc_patch_start; /* this is only tentative - it will be updated to include parts of * instructions poking out, and instructions forced to be transformed by IT */ - uintptr_t pc_patch_end; - uintptr_t pc; - int op_size; - unsigned op; - unsigned newop; - unsigned newval[4]; - + uint_tptr pc_patch_end; /* for IT - eww */ bool force_keep_transforming; - const void *ptr; void **rewritten_ptr_ptr; void *write_newop_here; @@ -35,10 +30,6 @@ struct transform_dis_ctx { }; #define tdis_ctx struct transform_dis_ctx * -#define TDIS_CTX_MODIFY(ctx) ((ctx)->modify) -#define TDIS_CTX_NEWVAL(ctx, n) ((ctx)->newval[n]) -#define TDIS_CTX_NEWOP(ctx) ((ctx)->newop) -#define TDIS_CTX_SET_NEWOP(ctx, new) ((ctx)->newop = (new)) /* largely similar to jump_dis */ @@ -46,14 +37,14 @@ static INLINE UNUSED void transform_dis_ret(struct transform_dis_ctx *ctx) { /* ret is okay if it's at the end of the required patch (past the original * patch size is good too) */ - if (ctx->pc + ctx->op_size < ctx->pc_patch_end) + if (ctx->base.pc + ctx->base.op_size < ctx->pc_patch_end) ctx->err = SUBSTITUTE_ERR_FUNC_TOO_SHORT; } static INLINE UNUSED void transform_dis_unidentified(UNUSED struct transform_dis_ctx *ctx) { #ifdef TRANSFORM_DIS_VERBOSE - printf("transform_dis (%p): unidentified\n", (void *) ctx->pc); + printf("transform_dis (%p): unidentified\n", (void *) ctx->base.pc); #endif /* this isn't exhaustive, so unidentified is fine */ } @@ -74,15 +65,15 @@ static void transform_dis_post_dis(struct transform_dis_ctx *ctx); int transform_dis_main(const void *restrict code_ptr, void **restrict rewritten_ptr_ptr, - uintptr_t pc_patch_start, - uintptr_t *pc_patch_end_p, + uint_tptr pc_patch_start, + uint_tptr *pc_patch_end_p, struct arch_dis_ctx *arch_ctx_p, int *offset_by_pcdiff) { struct transform_dis_ctx ctx; memset(&ctx, 0, sizeof(ctx)); ctx.pc_patch_start = pc_patch_start; ctx.pc_patch_end = *pc_patch_end_p; - ctx.pc = pc_patch_start; + ctx.base.pc = pc_patch_start; ctx.arch = *arch_ctx_p; /* data is written to rewritten both by this function directly and, in case * additional scaffolding is needed, by arch-specific transform_dis_* */ @@ -90,10 +81,10 @@ int transform_dis_main(const void *restrict code_ptr, void *rewritten_start = *rewritten_ptr_ptr; int written_pcdiff = 0; offset_by_pcdiff[written_pcdiff++] = 0; - while (ctx.pc < ctx.pc_patch_end && !ctx.force_keep_transforming) { - ctx.modify = false; + while (ctx.base.pc < ctx.pc_patch_end && !ctx.force_keep_transforming) { + ctx.base.modify = false; ctx.err = 0; - ctx.ptr = code_ptr + (ctx.pc - pc_patch_start); + ctx.base.ptr = code_ptr + (ctx.base.pc - pc_patch_start); transform_dis_pre_dis(&ctx); @@ -105,33 +96,29 @@ int transform_dis_main(const void *restrict code_ptr, if (ctx.err) return ctx.err; if (ctx.write_newop_here != NULL) { - if (!ctx.modify) - ctx.newop = ctx.op; - if (ctx.op_size == 4) - *(uint32_t *) ctx.write_newop_here = ctx.newop; - else if (ctx.op_size == 2) - *(uint16_t *) ctx.write_newop_here = ctx.newop; + if (ctx.base.modify) + memcpy(ctx.write_newop_here, ctx.base.newop, ctx.base.newop_size); else - __builtin_abort(); + memcpy(ctx.write_newop_here, ctx.base.ptr, ctx.base.op_size); if (*rewritten_ptr_ptr == rewritten_ptr) - *rewritten_ptr_ptr += ctx.op_size; + *rewritten_ptr_ptr += ctx.base.op_size; } - ctx.pc += ctx.op_size; + ctx.base.pc += ctx.base.op_size; transform_dis_post_dis(&ctx); - int pcdiff = ctx.pc - ctx.pc_patch_start; + int pcdiff = ctx.base.pc - ctx.pc_patch_start; while (written_pcdiff < pcdiff) offset_by_pcdiff[written_pcdiff++] = -1; offset_by_pcdiff[written_pcdiff++] = (int) (*rewritten_ptr_ptr - rewritten_start); } - *pc_patch_end_p = ctx.pc; + *pc_patch_end_p = ctx.base.pc; *arch_ctx_p = ctx.arch; return SUBSTITUTE_OK; } -#include TARGET_TRANSFORM_DIS_HEADER -#include TARGET_DIS_HEADER +#include stringify(TARGET_DIR/arch-transform-dis.inc.h) +#include stringify(TARGET_DIR/dis-main.inc.h) #endif /* TARGET_DIS_SUPPORTED */ diff --git a/lib/transform-dis.h b/lib/transform-dis.h index 70fe57a..c1de937 100644 --- a/lib/transform-dis.h +++ b/lib/transform-dis.h @@ -1,10 +1,11 @@ #pragma once #include <stdint.h> #include <stdbool.h> +#include stringify(TARGET_DIR/arch-dis.h) int transform_dis_main(const void *restrict code_ptr, void **restrict rewritten_ptr_ptr, - uintptr_t pc_patch_start, - uintptr_t *pc_patch_end_p, + uint_tptr pc_patch_start, + uint_tptr *pc_patch_end_p, struct arch_dis_ctx *arch_ctx_p, int *offset_by_pcdiff); diff --git a/lib/x86/arch-dis.h b/lib/x86/arch-dis.h new file mode 100644 index 0000000..6447f38 --- /dev/null +++ b/lib/x86/arch-dis.h @@ -0,0 +1,10 @@ +#pragma once +#define MIN_INSN_SIZE 1 +#define TD_MAX_REWRITTEN_SIZE 100 /* XXX */ + +struct arch_pcrel_info { + int reg; +}; + +struct arch_dis_ctx {}; +static inline void arch_dis_ctx_init(UNUSED struct arch_dis_ctx *ctx) {} diff --git a/lib/x86/arch-transform-dis.inc.h b/lib/x86/arch-transform-dis.inc.h new file mode 100644 index 0000000..bb86cf9 --- /dev/null +++ b/lib/x86/arch-transform-dis.inc.h @@ -0,0 +1,58 @@ +/* Pretty trivial, but in its own file to match the other architectures. */ +#include "x86/jump-patch.h" + +static void transform_dis_pcrel(struct transform_dis_ctx *ctx, uint64_t dpc, + struct arch_pcrel_info info) { + /* push %reg; mov $dpc, %reg; <orig but with reg instead>; pop %reg */ + /* reg is rcx, or rax if the instruction might be using rcx. */ + int rax = info.reg == 1; + void *code = *ctx->rewritten_ptr_ptr; + /* push */ + op8(&code, rax ? 0x50 : 0x51); + /* mov */ +#ifdef TARGET_x86_64 + op8(&code, 0x48); + op8(&code, rax ? 0xb8 : 0xb9); + op64(&code, dpc); +#else + op8(&code, rax ? 0xb8 : 0xb9); + op32(&code, dpc); +#endif + ctx->write_newop_here = code; + code += ctx->base.op_size; + /* pop */ + op8(&code, rax ? 0x58 : 0x59); + *ctx->rewritten_ptr_ptr = code; + ctx->base.newop[0] = rax ? 0 : 1; + ctx->base.modify = true; +} + +static void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc, + int cc) { + if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) { + ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START; + return; + } + void *code = *ctx->rewritten_ptr_ptr; + + ctx->write_newop_here = code; + code += ctx->base.op_size; + + struct arch_dis_ctx arch; + uintptr_t source = (uintptr_t) code + 2; + int size = jump_patch_size(source, dpc, arch, true); + /* if not taken, jmp past the big jump - this is a bit suboptimal but not that bad */ + op8(&code, 0xeb); + op8(&code, size); + make_jump_patch(&code, source, dpc, arch); + + *ctx->rewritten_ptr_ptr = code; + ctx->base.newop[0] = 2; + ctx->base.modify = true; + + if (!cc) + transform_dis_ret(ctx); +} + +static void transform_dis_pre_dis(UNUSED struct transform_dis_ctx *ctx) {} +static void transform_dis_post_dis(UNUSED struct transform_dis_ctx *ctx) {} diff --git a/lib/x86/dis-x86.inc.h b/lib/x86/dis-main.inc.h index e0259ea..45a0947 100644 --- a/lib/x86/dis-x86.inc.h +++ b/lib/x86/dis-main.inc.h @@ -41,7 +41,8 @@ VEX last byte 1:0: {none, 66, f3, f2} #define I_JMP 0x40 /* execution does not continue after this */ #define I_SPEC 0x60 /* special case */ #define I_TYPE_MASK 0x60 -#define I_JIMM (0x80|I_JMP) /* imm is jump offset */ +#define I_JIMM_ONLY 0x80 /* imm is jump offset */ +#define I_JIMM (0x80|I_JMP) #define I_BAD 0x80 #ifdef TARGET_x86_64 #define if64(_64, _32) _64 @@ -72,7 +73,7 @@ static const uint8_t onebyte_bits[] = { /*D0*/ REP4(I_MODA), i64(I_8), i64(I_8), I_BAD, 0, REP8(I_SPEC), /* don't treat ljmp as a jump for now */ /*E0*/ REP4(I_8|I_JIMM), REP4(I_8), - /*E8*/ (I_z|I_JIMM)&~I_JMP, I_z|I_JIMM, i64(I_p), I_8|I_JIMM, 0, 0, 0, 0, + /*E8*/ I_z|I_JIMM_ONLY, I_z|I_JIMM, i64(I_p), I_8|I_JIMM, 0, 0, 0, 0, /*F0*/ I_PFX, I_BAD, I_PFX, I_PFX, 0, 0, I_MODA, I_MODA, /*F8*/ 0, 0, 0, 0, 0, 0, I_MODA, I_SPEC, }; @@ -111,8 +112,8 @@ static const uint8_t _0f_bits[] = { _Static_assert(sizeof(_0f_bits) == 256, "_0f_bits"); static void P(dis)(tdis_ctx ctx) { - const uint8_t *orig = ctx->ptr; - const uint8_t *ptr = ctx->ptr; + const uint8_t *orig = ctx->base.ptr; + const uint8_t *ptr = ctx->base.ptr; int opnd_size = 4; int mod, rm = 0; @@ -212,9 +213,10 @@ got_bits: UNUSED } } UNUSED int modrm_off = ptr - orig; + UNUSED uint8_t modrm; if (bits & I_MOD) { modrm: UNUSED; - uint8_t modrm = *ptr++; + modrm = *ptr++; mod = modrm >> 6; rm |= modrm & 7; if (rm == 4) { @@ -249,11 +251,11 @@ got_bits: UNUSED __builtin_abort(); ptr += imm_size; - ctx->ptr = ptr; - ctx->op_size = ptr - orig; + ctx->base.ptr = ptr; + ctx->base.newop_size = ctx->base.op_size = ptr - orig; /* printf("bits=%x\n", bits); */ - if ((bits & I_JIMM) == I_JIMM) { + if (bits & I_JIMM_ONLY) { int32_t imm; const void *imm_ptr = orig + imm_off; switch (imm_size) { @@ -265,13 +267,13 @@ got_bits: UNUSED bool cond = (byte1 & 0xf0) != 0xe0; bool call = !(bits & I_JMP); - P(branch)(ctx, ctx->pc + ctx->op_size + imm, + P(branch)(ctx, ctx->base.pc + ctx->base.op_size + imm, cond * CC_CONDITIONAL | call * CC_CALL); - if (TDIS_CTX_MODIFY(ctx)) { + if (DIS_MAY_MODIFY && ctx->base.modify) { /* newval[0] should be the new immediate */ - int32_t new_imm = TDIS_CTX_NEWVAL(ctx, 0); - uint8_t *new_op = TDIS_CTX_NEWOP(ctx); - memcpy(new_op, orig, ctx->op_size); + int32_t new_imm = ctx->base.newval[0]; + uint8_t *new_op = ctx->base.newop; + memcpy(new_op, orig, ctx->base.op_size); uint8_t *new_imm_ptr = new_op + imm_off; switch (imm_size) { case 1: *(int8_t *) new_imm_ptr = new_imm; break; @@ -284,17 +286,22 @@ got_bits: UNUSED int32_t disp = *(int32_t *) (orig + modrm_off + 1); /* unlike ARM, we can always switch to non-pcrel without making the * instruction from scratch, so we don't have 'reg' and 'lm' */ - P(pcrel)(ctx, ctx->pc + ctx->op_size + disp); - if (TDIS_CTX_MODIFY(ctx)) { - uint8_t *new_op = TDIS_CTX_NEWOP(ctx); - memcpy(new_op, orig, ctx->op_size); + struct arch_pcrel_info info = {modrm >> 3 & 7}; + P(pcrel)(ctx, ctx->base.pc + ctx->base.op_size + disp, info); + if (DIS_MAY_MODIFY && ctx->base.modify) { + uint8_t *new_op = ctx->base.newop; + memcpy(new_op, orig, ctx->base.op_size); /* newval[0] should be the new register, which should be one that * fits in r/m directly since that's all I need; - * newval[1] should be the new displacement */ + * displacement is removed */ uint8_t *new_modrm_ptr = new_op + modrm_off; - *new_modrm_ptr = (*new_modrm_ptr & ~0xc7) | 4 << 6 | TDIS_CTX_NEWVAL(ctx, 0); - *(uint32_t *) (new_modrm_ptr + 1) = TDIS_CTX_NEWVAL(ctx, 1); + *new_modrm_ptr = (*new_modrm_ptr & ~0xc7) | + 0 << 6 | + ctx->base.newval[0]; + memmove(new_modrm_ptr + 1, new_modrm_ptr + 5, + ctx->base.op_size - modrm_off - 1); + ctx->base.newop_size -= 4; } #endif } else if ((bits & I_TYPE_MASK) == I_JMP) { diff --git a/lib/x86/jump-patch.h b/lib/x86/jump-patch.h index efd4825..4c0172d 100644 --- a/lib/x86/jump-patch.h +++ b/lib/x86/jump-patch.h @@ -1,5 +1,6 @@ #pragma once #define MAX_JUMP_PATCH_SIZE 5 +#include "dis.h" static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc, UNUSED struct arch_dis_ctx arch, @@ -12,21 +13,19 @@ static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc, return force ? (2+4+8) : -1; } -static inline void make_jump_patch(void **codep, UNUSED uintptr_t pc, - uintptr_t dpc, +static inline void make_jump_patch(void **codep, uintptr_t pc, uintptr_t dpc, UNUSED struct arch_dis_ctx arch) { uintptr_t diff = pc - (dpc + 5); - uint8_t *code = *codep; + void *code = *codep; if (diff == (uintptr_t) (int32_t) diff) { - *(uint8_t *) code = 0xe9; - *(uint32_t *) (code + 1) = diff; - *codep = code + 5; + op8(&code, 0xe9); + op32(&code, diff); } else { /* jmpq *(%rip) */ - *code++ = 0xff; - *code++ = 0x25; - *(uint32_t *) code = 0; code += 4; - *(uint64_t *) code = dpc; code += 8; - *codep = code; + op8(&code, 0xff); + op8(&code, 0x25); + op32(&code, 0); + op64(&code, dpc); } + *codep = code; } diff --git a/lib/x86/misc.h b/lib/x86/misc.h index c8eee19..e04f1f4 100644 --- a/lib/x86/misc.h +++ b/lib/x86/misc.h @@ -1,9 +1,7 @@ #pragma once +#ifdef TARGET_x86_64 +#define TARGET_POINTER_SIZE 8 +#else +#define TARGET_POINTER_SIZE 4 +#endif #define TARGET_DIS_SUPPORTED -#define TARGET_DIS_HEADER "x86/dis-x86.inc.h" -#define TARGET_JUMP_PATCH_HDR "x86/jump-patch.h" -#define MIN_INSN_SIZE 1 -#define TD_MAX_REWRITTEN_SIZE 100 /* XXX */ - -struct arch_dis_ctx {}; -static inline void arch_dis_ctx_init(UNUSED struct arch_dis_ctx *ctx) {} |