diff options
-rw-r--r-- | lib/arm/arch-dis.h | 4 | ||||
-rw-r--r-- | lib/arm64/arch-dis.h | 4 | ||||
-rw-r--r-- | lib/arm64/jump-patch.h | 5 | ||||
-rw-r--r-- | lib/hook-functions.c | 58 | ||||
-rw-r--r-- | lib/transform-dis.c | 14 | ||||
-rw-r--r-- | lib/transform-dis.h | 1 | ||||
-rw-r--r-- | lib/x86/arch-dis.h | 10 | ||||
-rw-r--r-- | lib/x86/arch-transform-dis.inc.h | 21 | ||||
-rw-r--r-- | lib/x86/dis-main.inc.h | 3 | ||||
-rw-r--r-- | lib/x86/jump-patch.h | 14 | ||||
-rw-r--r-- | test/test-transform-dis.c | 25 | ||||
-rw-r--r-- | test/transform-dis-cases-i386.S | 11 |
12 files changed, 111 insertions, 59 deletions
diff --git a/lib/arm/arch-dis.h b/lib/arm/arch-dis.h index bc98cb9..8f2400b 100644 --- a/lib/arm/arch-dis.h +++ b/lib/arm/arch-dis.h @@ -39,6 +39,10 @@ static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { memset(ctx->it_conds, 0xe, 5); } +static inline int arch_code_alignment(struct arch_dis_ctx ctx) { + return ctx.pc_low_bit ? 2 : 4; +} + static inline void advance_it_cond(struct arch_dis_ctx *ctx) { ctx->it_conds[0] = ctx->it_conds[1]; ctx->it_conds[1] = ctx->it_conds[2]; diff --git a/lib/arm64/arch-dis.h b/lib/arm64/arch-dis.h index 2990f5a..f5b0518 100644 --- a/lib/arm64/arch-dis.h +++ b/lib/arm64/arch-dis.h @@ -26,6 +26,10 @@ static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { ctx->regs_possibly_written = 0; } +static inline int arch_code_alignment(UNUSED struct arch_dis_ctx ctx) { + return 4; +} + static inline int arm64_get_unwritten_temp_reg(struct arch_dis_ctx *ctx) { uint32_t avail = ~ctx->regs_possibly_written & ((1 << 19) - (1 << 9)); if (!avail) diff --git a/lib/arm64/jump-patch.h b/lib/arm64/jump-patch.h index 3d3d653..0a276ba 100644 --- a/lib/arm64/jump-patch.h +++ b/lib/arm64/jump-patch.h @@ -1,12 +1,13 @@ #pragma once #include "arm64/assemble.h" -#define MAX_JUMP_PATCH_SIZE 12 +#define MAX_JUMP_PATCH_SIZE 20 + static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc, UNUSED struct arch_dis_ctx arch, bool force) { intptr_t diff = (dpc & ~0xfff) - (pc & ~0xfff); if (!(diff >= -0x100000000 && diff < 0x100000000)) - return force ? 16 : -1; + return force ? (size_of_MOVi64(dpc) + 4) : -1; else if (!(dpc & 0xfff)) return 8; else diff --git a/lib/hook-functions.c b/lib/hook-functions.c index 953683b..7db06d4 100644 --- a/lib/hook-functions.c +++ b/lib/hook-functions.c @@ -77,6 +77,8 @@ static int check_intro_trampoline(void **trampoline_ptr_p, if (*patch_size_p != -1) return SUBSTITUTE_OK; + *need_intro_trampoline_p = true; + /* Try existing trampoline */ *patch_size_p = jump_patch_size(pc, (uintptr_t) trampoline_ptr, arch, false); @@ -122,8 +124,7 @@ skip_after:; EXPORT int substitute_hook_functions(const struct substitute_function_hook *hooks, - size_t nhooks, - int options) { + size_t nhooks, int options) { struct hook_internal *his = malloc(nhooks * sizeof(*his)); if (!his) return SUBSTITUTE_ERR_OOM; @@ -169,20 +170,6 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, goto end; uint_tptr pc_patch_end = pc_patch_start + patch_size; - /* Generate the rewritten start of the function for the outro - * trampoline (complaining if any bad instructions are found) - * (on arm64, this modifies regs_possibly_written, which is used by the - * two make_jump_patch calls) */ - uint8_t rewritten_temp[TD_MAX_REWRITTEN_SIZE]; - void *rp = rewritten_temp; - if ((ret = transform_dis_main(code, &rp, pc_patch_start, &pc_patch_end, - &arch, hi->offset_by_pcdiff))) - goto end; - /* Check some of the rest of the function for jumps back into the - * patched region. */ - if ((ret = jump_dis_main(code, pc_patch_start, pc_patch_end, arch))) - goto end; - uintptr_t initial_target; if (need_intro_trampoline) { initial_target = (uintptr_t) trampoline_ptr; @@ -191,26 +178,26 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, } else { initial_target = (uintptr_t) hook->replacement; } + + /* Make the real jump patch for the target function. */ void *jp = hi->jump_patch; make_jump_patch(&jp, pc_patch_start, initial_target, arch); hi->jump_patch_size = (uint8_t *) jp - hi->jump_patch; + size_t align_bytes = (-(uintptr_t) trampoline_ptr) + & (arch_code_alignment(arch) - 1); + size_t outro_est = align_bytes + + TD_MAX_REWRITTEN_SIZE + MAX_JUMP_PATCH_SIZE; - size_t rewritten_size = (uint8_t *) rp - rewritten_temp; - size_t jumpback_size = - jump_patch_size((uintptr_t) trampoline_ptr + rewritten_size, - pc_patch_end, arch, /* force */ true); - size_t outro_size = rewritten_size + jumpback_size; - if (outro_size > trampoline_size_left) { + if (outro_est > trampoline_size_left) { /* Not enough space left in our existing block... */ if ((ret = execmem_alloc_unsealed(0, &trampoline_ptr, &trampoline_size_left))) goto end; hi->trampoline_page = trampoline_ptr; - jumpback_size = - jump_patch_size((uintptr_t) trampoline_ptr + rewritten_size, - pc_patch_end, arch, /* force */ true); - outro_size = rewritten_size + jumpback_size; + } else { + trampoline_ptr += align_bytes; + trampoline_size_left -= align_bytes; } hi->outro_trampoline = trampoline_ptr; @@ -221,10 +208,23 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, dpc++; } #endif - memcpy(trampoline_ptr, rewritten_temp, rewritten_size); - trampoline_ptr += rewritten_size; + /* Generate the rewritten start of the function for the outro + * trampoline (complaining if any bad instructions are found) + * (on arm64, this modifies regs_possibly_written, which is used by the + * ending make_jump_patch call) */ + if ((ret = transform_dis_main(code, &trampoline_ptr, pc_patch_start, + &pc_patch_end, (uintptr_t) trampoline_ptr, + &arch, hi->offset_by_pcdiff))) + goto end; + /* Now that transform_dis_main has given us the final pc_patch_end, + * check some of the rest of the function for jumps back into the + * patched region. */ + if ((ret = jump_dis_main(code, pc_patch_start, pc_patch_end, arch))) + goto end; + /* Okay, continue with the outro. */ make_jump_patch(&trampoline_ptr, (uintptr_t) trampoline_ptr, dpc, arch); - trampoline_size_left -= outro_size; + trampoline_size_left -= (uint8_t *) trampoline_ptr + - (uint8_t *) hi->outro_trampoline; } /* Now commit. */ diff --git a/lib/transform-dis.c b/lib/transform-dis.c index fe7482d..75abc82 100644 --- a/lib/transform-dis.c +++ b/lib/transform-dis.c @@ -16,6 +16,7 @@ struct transform_dis_ctx { int err; struct dis_ctx_base base; + uint_tptr pc_trampoline; uint_tptr pc_patch_start; /* this is only tentative - it will be updated to include parts of * instructions poking out, and instructions forced to be transformed by IT */ @@ -68,6 +69,7 @@ int transform_dis_main(const void *restrict code_ptr, void **restrict rewritten_ptr_ptr, uint_tptr pc_patch_start, uint_tptr *pc_patch_end_p, + uint_tptr pc_trampoline, struct arch_dis_ctx *arch_ctx_p, int *offset_by_pcdiff) { struct transform_dis_ctx ctx; @@ -86,6 +88,9 @@ int transform_dis_main(const void *restrict code_ptr, ctx.base.modify = false; ctx.err = 0; ctx.base.ptr = code_ptr + (ctx.base.pc - pc_patch_start); + ctx.pc_trampoline = pc_trampoline + + (*rewritten_ptr_ptr - rewritten_start); + const void *start = ctx.base.ptr; transform_dis_pre_dis(&ctx); @@ -94,13 +99,20 @@ int transform_dis_main(const void *restrict code_ptr, transform_dis_dis(&ctx); +#ifdef TRANSFORM_DIS_VERBOSE + printf("transform_dis (0x%llx): >> op_size=%d newop_size=%d\n", + (unsigned long long) ctx.base.pc, + ctx.base.op_size, + ctx.base.newop_size); +#endif + if (ctx.err) return ctx.err; if (ctx.write_newop_here != NULL) { if (ctx.base.modify) memcpy(ctx.write_newop_here, ctx.base.newop, ctx.base.newop_size); else - memcpy(ctx.write_newop_here, ctx.base.ptr, ctx.base.op_size); + memcpy(ctx.write_newop_here, start, ctx.base.op_size); if (*rewritten_ptr_ptr == rewritten_ptr) *rewritten_ptr_ptr += ctx.base.op_size; } diff --git a/lib/transform-dis.h b/lib/transform-dis.h index c1de937..4e853b0 100644 --- a/lib/transform-dis.h +++ b/lib/transform-dis.h @@ -7,5 +7,6 @@ int transform_dis_main(const void *restrict code_ptr, void **restrict rewritten_ptr_ptr, uint_tptr pc_patch_start, uint_tptr *pc_patch_end_p, + uint_tptr pc_trampoline, struct arch_dis_ctx *arch_ctx_p, int *offset_by_pcdiff); diff --git a/lib/x86/arch-dis.h b/lib/x86/arch-dis.h index 6447f38..d121549 100644 --- a/lib/x86/arch-dis.h +++ b/lib/x86/arch-dis.h @@ -1,6 +1,11 @@ #pragma once #define MIN_INSN_SIZE 1 -#define TD_MAX_REWRITTEN_SIZE 100 /* XXX */ +/* min([18 * 3, + * 4 + 18 + 15 + 18, + * 6 + 12]) + * See transform_dis_* for size figures. Technically unsafe, since we don't + * check for overlong x86 instructions. */ +#define TD_MAX_REWRITTEN_SIZE 55 struct arch_pcrel_info { int reg; @@ -8,3 +13,6 @@ struct arch_pcrel_info { struct arch_dis_ctx {}; static inline void arch_dis_ctx_init(UNUSED struct arch_dis_ctx *ctx) {} +static inline int arch_code_alignment(UNUSED struct arch_dis_ctx ctx) { + return 4; +} diff --git a/lib/x86/arch-transform-dis.inc.h b/lib/x86/arch-transform-dis.inc.h index 1e5d872..6ec6c16 100644 --- a/lib/x86/arch-transform-dis.inc.h +++ b/lib/x86/arch-transform-dis.inc.h @@ -23,8 +23,10 @@ static inline void push_mov_tail(void **code, bool rax) { UNUSED static void transform_dis_pcrel(struct transform_dis_ctx *ctx, uint64_t dpc, struct arch_pcrel_info info) { - /* push %reg; mov $dpc, %reg; <orig but with reg instead>; pop %reg */ - /* reg is rcx, or rax if the instruction might be using rcx. */ + /* push %reg; mov $dpc, %reg; <orig but with reg instead>; pop %reg + * reg is rcx, or rax if the instruction might be using rcx. + * Max size: 11 + orig + 1 + * Minimum size is 6 bytes, so there could be at most 1 in a patch area. */ bool rax = info.reg == 1; void *code = *ctx->rewritten_ptr_ptr; push_mov_head(&code, dpc, rax); @@ -32,7 +34,7 @@ static void transform_dis_pcrel(struct transform_dis_ctx *ctx, uint64_t dpc, code += ctx->base.op_size; push_mov_tail(&code, rax); *ctx->rewritten_ptr_ptr = code; - ctx->base.newop[0] = rax ? 0 : 1; + ctx->base.newval[0] = rax ? 0 : 1; ctx->base.modify = true; } @@ -41,7 +43,9 @@ static void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc, if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) { if (dpc == ctx->base.pc + ctx->base.op_size && (cc & CC_CALL)) { /* Probably a poor man's PC-rel - 'call .; pop %some'. - * Push the original address. */ + * Push the original address. + * Max size: orig + 1 + 11 + 5 + 1 + * Minimum call size is 4 bytes; at most 2. */ void *code = *ctx->rewritten_ptr_ptr; ctx->write_newop_here = NULL; @@ -72,15 +76,18 @@ static void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc, code += ctx->base.op_size; struct arch_dis_ctx arch; - uintptr_t source = (uintptr_t) code + 2; + uintptr_t source = ctx->pc_trampoline + 2; int size = jump_patch_size(source, dpc, arch, true); - /* if not taken, jmp past the big jump - this is a bit suboptimal but not that bad */ + /* If not taken, jmp past the big jump - this is a bit suboptimal but not + * that bad. + * Max size: orig + 2 + 14 + * Minimum jump size is 2 bytes; at most 3. */ op8(&code, 0xeb); op8(&code, size); make_jump_patch(&code, source, dpc, arch); *ctx->rewritten_ptr_ptr = code; - ctx->base.newop[0] = 2; + ctx->base.newval[0] = 2; ctx->base.modify = true; if (!cc) diff --git a/lib/x86/dis-main.inc.h b/lib/x86/dis-main.inc.h index 45a0947..f948026 100644 --- a/lib/x86/dis-main.inc.h +++ b/lib/x86/dis-main.inc.h @@ -97,7 +97,7 @@ static const uint8_t _0f_bits[] = { /*60*/ REP16(I_MODA), /*70*/ I_MODA, I_MOD|I_8, I_MOD|I_8, I_MOD|I_8, I_MODA, I_MODA, I_MODA, 0, /*78*/ I_MODA, I_MODA, I_BAD, I_BAD, REP4(I_MODA), -/*80*/ REP16(I_z), +/*80*/ REP16(I_z|I_JIMM), /*90*/ REP16(I_MODA), /*Ax*/ 0, 0, 0, 0, 0, 0, I_BAD, I_BAD, /*A8*/ 0, 0, 0, I_MODA, I_MODA|I_8, I_MODA, I_MODA, I_MODA, @@ -279,6 +279,7 @@ got_bits: UNUSED case 1: *(int8_t *) new_imm_ptr = new_imm; break; case 2: *(int16_t *) new_imm_ptr = new_imm; break; case 4: *(int32_t *) new_imm_ptr = new_imm; break; + default: __builtin_abort(); } } #ifdef TARGET_x86_64 diff --git a/lib/x86/jump-patch.h b/lib/x86/jump-patch.h index 4c0172d..8cd7d6f 100644 --- a/lib/x86/jump-patch.h +++ b/lib/x86/jump-patch.h @@ -1,23 +1,23 @@ #pragma once -#define MAX_JUMP_PATCH_SIZE 5 +#define MAX_JUMP_PATCH_SIZE 14 #include "dis.h" -static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc, +static inline int jump_patch_size(uint_tptr pc, uint_tptr dpc, UNUSED struct arch_dis_ctx arch, bool force) { - uintptr_t diff = pc - (dpc + 5); + uint_tptr diff = pc - (dpc + 5); /* fits in 32? */ - if (diff == (uintptr_t) (int32_t) diff) + if (diff == (uint_tptr) (int32_t) diff) return 5; else return force ? (2+4+8) : -1; } -static inline void make_jump_patch(void **codep, uintptr_t pc, uintptr_t dpc, +static inline void make_jump_patch(void **codep, uint_tptr pc, uint_tptr dpc, UNUSED struct arch_dis_ctx arch) { - uintptr_t diff = pc - (dpc + 5); + uint_tptr diff = pc - (dpc + 5); void *code = *codep; - if (diff == (uintptr_t) (int32_t) diff) { + if (diff == (uint_tptr) (int32_t) diff) { op8(&code, 0xe9); op32(&code, diff); } else { diff --git a/test/test-transform-dis.c b/test/test-transform-dis.c index f55daa9..2d7de9e 100644 --- a/test/test-transform-dis.c +++ b/test/test-transform-dis.c @@ -29,11 +29,13 @@ static void do_manual(uint8_t *in, size_t in_size, int patch_size, printf("\n#if 0\n"); uint_tptr pc_patch_start = 0x10000; uint_tptr pc_patch_end = pc_patch_start + patch_size; + uint_tptr pc_trampoline = 0xf000; int ret = transform_dis_main( in, &rewritten_ptr, pc_patch_start, &pc_patch_end, + pc_trampoline, &arch, offsets); printf("=> %d\n", ret); @@ -95,12 +97,17 @@ static void do_auto(uint8_t *in, size_t in_size, struct arch_dis_ctx arch) { if (!memcmp(expect, "_ERR", 4)) { expect_err = true; in += 4; + assert(!memcmp(in, "GIVEN", 5)); + in += 5; } else { - uint8_t *next = memmem(in, end - in, "GIVEN", 5); - if (!next) - next = end; - expect_size = next - expect; - in = next; + in = memmem(in, end - in, "GIVEN", 5); + if (in) { + expect_size = in - expect; + in += 5; + } else { + in = end; + expect_size = in - expect; + } } size_t patch_size = given_size; int offsets[patch_size + 15]; @@ -108,15 +115,19 @@ static void do_auto(uint8_t *in, size_t in_size, struct arch_dis_ctx arch) { void *rewritten_ptr = out; uint_tptr pc_patch_start = 0xdead0000; uint_tptr pc_patch_end = pc_patch_start + patch_size; + uint_tptr pc_trampoline = 0xdeac0000; int ret = transform_dis_main( given, &rewritten_ptr, pc_patch_start, &pc_patch_end, + pc_trampoline, &arch, offsets); if (ret) { - if (!expect_err) { + if (expect_err) { + printf("OK\n"); + } else { print_given(given, given_size); printf("got ret %d, expected success\n\n", ret); } @@ -132,6 +143,8 @@ static void do_auto(uint8_t *in, size_t in_size, struct arch_dis_ctx arch) { printf("but expected:\n"); hex_dump(expect, expect_size); printf("\n"); + } else { + printf("OK\n"); } } diff --git a/test/transform-dis-cases-i386.S b/test/transform-dis-cases-i386.S index 31508ca..57707cc 100644 --- a/test/transform-dis-cases-i386.S +++ b/test/transform-dis-cases-i386.S @@ -2,15 +2,16 @@ #define EXPECT .ascii "EXPECT"; #define EXPECT_ERR .ascii "EXPECT_ERR"; -GIVEN call .; pop %edx +GIVEN call 0f; 0: pop %edx /* XXX the extra push isn't necessary in 32-bit mode */ -EXPECT push %eax; push %eax; mov $0xdead0005, %eax; pop %eax; pop %edx +EXPECT push %eax; push %eax; mov $0xdead0005, %eax; mov %eax, 4(%esp); pop %eax; pop %edx -GIVEN jmp 0f; 0: +GIVEN jmp 0f; 0: nop EXPECT_ERR -GIVEN jne 0xdead1000 -EXPECT jne 0f; jmp 1f; 0: jmp 0xdead1000; 1: +GIVEN jne .+0x1000 +/* we expect to generate an unnecessarily long jump, so hardcode it */ +EXPECT 2: .byte 0x0f, 0x85; .long 2; jmp 1f; 0: jmp 2b+0x1000; 1: GIVEN loopne 0xdead0080 EXPECT loopne 0f; jmp 1f; 0: jmp 0xdead1000; 1: |