From 67ebaf0d22fefa885d29c3c697fbe61956d18354 Mon Sep 17 00:00:00 2001 From: comex Date: Sat, 14 Feb 2015 23:14:14 -0500 Subject: Trampoline fixes. The transformed code was incorrect because it assumed the pointer it was writing to was where the code would execute, but it was actually 'rewritten_temp'. Changed transform_dis_main to take a pc_trampoline pointer, which also helps the test harness. However, this means that it has to be called after the trampoline has been allocated, while before the trampoline allocation depended on the generated size; this change doesn't bother to use two passes or anything, but just allocates a new code buffer if the maximum possible size isn't available - not the end of the world, since trampoline_ptr will still only be increased by the actual size before the next hook in the series (if any). --- lib/arm/arch-dis.h | 4 +++ lib/arm64/arch-dis.h | 4 +++ lib/arm64/jump-patch.h | 5 ++-- lib/hook-functions.c | 58 ++++++++++++++++++++-------------------- lib/transform-dis.c | 14 +++++++++- lib/transform-dis.h | 1 + lib/x86/arch-dis.h | 10 ++++++- lib/x86/arch-transform-dis.inc.h | 21 ++++++++++----- lib/x86/dis-main.inc.h | 3 ++- lib/x86/jump-patch.h | 14 +++++----- test/test-transform-dis.c | 25 ++++++++++++----- test/transform-dis-cases-i386.S | 11 ++++---- 12 files changed, 111 insertions(+), 59 deletions(-) diff --git a/lib/arm/arch-dis.h b/lib/arm/arch-dis.h index bc98cb9..8f2400b 100644 --- a/lib/arm/arch-dis.h +++ b/lib/arm/arch-dis.h @@ -39,6 +39,10 @@ static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { memset(ctx->it_conds, 0xe, 5); } +static inline int arch_code_alignment(struct arch_dis_ctx ctx) { + return ctx.pc_low_bit ? 2 : 4; +} + static inline void advance_it_cond(struct arch_dis_ctx *ctx) { ctx->it_conds[0] = ctx->it_conds[1]; ctx->it_conds[1] = ctx->it_conds[2]; diff --git a/lib/arm64/arch-dis.h b/lib/arm64/arch-dis.h index 2990f5a..f5b0518 100644 --- a/lib/arm64/arch-dis.h +++ b/lib/arm64/arch-dis.h @@ -26,6 +26,10 @@ static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) { ctx->regs_possibly_written = 0; } +static inline int arch_code_alignment(UNUSED struct arch_dis_ctx ctx) { + return 4; +} + static inline int arm64_get_unwritten_temp_reg(struct arch_dis_ctx *ctx) { uint32_t avail = ~ctx->regs_possibly_written & ((1 << 19) - (1 << 9)); if (!avail) diff --git a/lib/arm64/jump-patch.h b/lib/arm64/jump-patch.h index 3d3d653..0a276ba 100644 --- a/lib/arm64/jump-patch.h +++ b/lib/arm64/jump-patch.h @@ -1,12 +1,13 @@ #pragma once #include "arm64/assemble.h" -#define MAX_JUMP_PATCH_SIZE 12 +#define MAX_JUMP_PATCH_SIZE 20 + static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc, UNUSED struct arch_dis_ctx arch, bool force) { intptr_t diff = (dpc & ~0xfff) - (pc & ~0xfff); if (!(diff >= -0x100000000 && diff < 0x100000000)) - return force ? 16 : -1; + return force ? (size_of_MOVi64(dpc) + 4) : -1; else if (!(dpc & 0xfff)) return 8; else diff --git a/lib/hook-functions.c b/lib/hook-functions.c index 953683b..7db06d4 100644 --- a/lib/hook-functions.c +++ b/lib/hook-functions.c @@ -77,6 +77,8 @@ static int check_intro_trampoline(void **trampoline_ptr_p, if (*patch_size_p != -1) return SUBSTITUTE_OK; + *need_intro_trampoline_p = true; + /* Try existing trampoline */ *patch_size_p = jump_patch_size(pc, (uintptr_t) trampoline_ptr, arch, false); @@ -122,8 +124,7 @@ skip_after:; EXPORT int substitute_hook_functions(const struct substitute_function_hook *hooks, - size_t nhooks, - int options) { + size_t nhooks, int options) { struct hook_internal *his = malloc(nhooks * sizeof(*his)); if (!his) return SUBSTITUTE_ERR_OOM; @@ -169,20 +170,6 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, goto end; uint_tptr pc_patch_end = pc_patch_start + patch_size; - /* Generate the rewritten start of the function for the outro - * trampoline (complaining if any bad instructions are found) - * (on arm64, this modifies regs_possibly_written, which is used by the - * two make_jump_patch calls) */ - uint8_t rewritten_temp[TD_MAX_REWRITTEN_SIZE]; - void *rp = rewritten_temp; - if ((ret = transform_dis_main(code, &rp, pc_patch_start, &pc_patch_end, - &arch, hi->offset_by_pcdiff))) - goto end; - /* Check some of the rest of the function for jumps back into the - * patched region. */ - if ((ret = jump_dis_main(code, pc_patch_start, pc_patch_end, arch))) - goto end; - uintptr_t initial_target; if (need_intro_trampoline) { initial_target = (uintptr_t) trampoline_ptr; @@ -191,26 +178,26 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, } else { initial_target = (uintptr_t) hook->replacement; } + + /* Make the real jump patch for the target function. */ void *jp = hi->jump_patch; make_jump_patch(&jp, pc_patch_start, initial_target, arch); hi->jump_patch_size = (uint8_t *) jp - hi->jump_patch; + size_t align_bytes = (-(uintptr_t) trampoline_ptr) + & (arch_code_alignment(arch) - 1); + size_t outro_est = align_bytes + + TD_MAX_REWRITTEN_SIZE + MAX_JUMP_PATCH_SIZE; - size_t rewritten_size = (uint8_t *) rp - rewritten_temp; - size_t jumpback_size = - jump_patch_size((uintptr_t) trampoline_ptr + rewritten_size, - pc_patch_end, arch, /* force */ true); - size_t outro_size = rewritten_size + jumpback_size; - if (outro_size > trampoline_size_left) { + if (outro_est > trampoline_size_left) { /* Not enough space left in our existing block... */ if ((ret = execmem_alloc_unsealed(0, &trampoline_ptr, &trampoline_size_left))) goto end; hi->trampoline_page = trampoline_ptr; - jumpback_size = - jump_patch_size((uintptr_t) trampoline_ptr + rewritten_size, - pc_patch_end, arch, /* force */ true); - outro_size = rewritten_size + jumpback_size; + } else { + trampoline_ptr += align_bytes; + trampoline_size_left -= align_bytes; } hi->outro_trampoline = trampoline_ptr; @@ -221,10 +208,23 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks, dpc++; } #endif - memcpy(trampoline_ptr, rewritten_temp, rewritten_size); - trampoline_ptr += rewritten_size; + /* Generate the rewritten start of the function for the outro + * trampoline (complaining if any bad instructions are found) + * (on arm64, this modifies regs_possibly_written, which is used by the + * ending make_jump_patch call) */ + if ((ret = transform_dis_main(code, &trampoline_ptr, pc_patch_start, + &pc_patch_end, (uintptr_t) trampoline_ptr, + &arch, hi->offset_by_pcdiff))) + goto end; + /* Now that transform_dis_main has given us the final pc_patch_end, + * check some of the rest of the function for jumps back into the + * patched region. */ + if ((ret = jump_dis_main(code, pc_patch_start, pc_patch_end, arch))) + goto end; + /* Okay, continue with the outro. */ make_jump_patch(&trampoline_ptr, (uintptr_t) trampoline_ptr, dpc, arch); - trampoline_size_left -= outro_size; + trampoline_size_left -= (uint8_t *) trampoline_ptr + - (uint8_t *) hi->outro_trampoline; } /* Now commit. */ diff --git a/lib/transform-dis.c b/lib/transform-dis.c index fe7482d..75abc82 100644 --- a/lib/transform-dis.c +++ b/lib/transform-dis.c @@ -16,6 +16,7 @@ struct transform_dis_ctx { int err; struct dis_ctx_base base; + uint_tptr pc_trampoline; uint_tptr pc_patch_start; /* this is only tentative - it will be updated to include parts of * instructions poking out, and instructions forced to be transformed by IT */ @@ -68,6 +69,7 @@ int transform_dis_main(const void *restrict code_ptr, void **restrict rewritten_ptr_ptr, uint_tptr pc_patch_start, uint_tptr *pc_patch_end_p, + uint_tptr pc_trampoline, struct arch_dis_ctx *arch_ctx_p, int *offset_by_pcdiff) { struct transform_dis_ctx ctx; @@ -86,6 +88,9 @@ int transform_dis_main(const void *restrict code_ptr, ctx.base.modify = false; ctx.err = 0; ctx.base.ptr = code_ptr + (ctx.base.pc - pc_patch_start); + ctx.pc_trampoline = pc_trampoline + + (*rewritten_ptr_ptr - rewritten_start); + const void *start = ctx.base.ptr; transform_dis_pre_dis(&ctx); @@ -94,13 +99,20 @@ int transform_dis_main(const void *restrict code_ptr, transform_dis_dis(&ctx); +#ifdef TRANSFORM_DIS_VERBOSE + printf("transform_dis (0x%llx): >> op_size=%d newop_size=%d\n", + (unsigned long long) ctx.base.pc, + ctx.base.op_size, + ctx.base.newop_size); +#endif + if (ctx.err) return ctx.err; if (ctx.write_newop_here != NULL) { if (ctx.base.modify) memcpy(ctx.write_newop_here, ctx.base.newop, ctx.base.newop_size); else - memcpy(ctx.write_newop_here, ctx.base.ptr, ctx.base.op_size); + memcpy(ctx.write_newop_here, start, ctx.base.op_size); if (*rewritten_ptr_ptr == rewritten_ptr) *rewritten_ptr_ptr += ctx.base.op_size; } diff --git a/lib/transform-dis.h b/lib/transform-dis.h index c1de937..4e853b0 100644 --- a/lib/transform-dis.h +++ b/lib/transform-dis.h @@ -7,5 +7,6 @@ int transform_dis_main(const void *restrict code_ptr, void **restrict rewritten_ptr_ptr, uint_tptr pc_patch_start, uint_tptr *pc_patch_end_p, + uint_tptr pc_trampoline, struct arch_dis_ctx *arch_ctx_p, int *offset_by_pcdiff); diff --git a/lib/x86/arch-dis.h b/lib/x86/arch-dis.h index 6447f38..d121549 100644 --- a/lib/x86/arch-dis.h +++ b/lib/x86/arch-dis.h @@ -1,6 +1,11 @@ #pragma once #define MIN_INSN_SIZE 1 -#define TD_MAX_REWRITTEN_SIZE 100 /* XXX */ +/* min([18 * 3, + * 4 + 18 + 15 + 18, + * 6 + 12]) + * See transform_dis_* for size figures. Technically unsafe, since we don't + * check for overlong x86 instructions. */ +#define TD_MAX_REWRITTEN_SIZE 55 struct arch_pcrel_info { int reg; @@ -8,3 +13,6 @@ struct arch_pcrel_info { struct arch_dis_ctx {}; static inline void arch_dis_ctx_init(UNUSED struct arch_dis_ctx *ctx) {} +static inline int arch_code_alignment(UNUSED struct arch_dis_ctx ctx) { + return 4; +} diff --git a/lib/x86/arch-transform-dis.inc.h b/lib/x86/arch-transform-dis.inc.h index 1e5d872..6ec6c16 100644 --- a/lib/x86/arch-transform-dis.inc.h +++ b/lib/x86/arch-transform-dis.inc.h @@ -23,8 +23,10 @@ static inline void push_mov_tail(void **code, bool rax) { UNUSED static void transform_dis_pcrel(struct transform_dis_ctx *ctx, uint64_t dpc, struct arch_pcrel_info info) { - /* push %reg; mov $dpc, %reg; ; pop %reg */ - /* reg is rcx, or rax if the instruction might be using rcx. */ + /* push %reg; mov $dpc, %reg; ; pop %reg + * reg is rcx, or rax if the instruction might be using rcx. + * Max size: 11 + orig + 1 + * Minimum size is 6 bytes, so there could be at most 1 in a patch area. */ bool rax = info.reg == 1; void *code = *ctx->rewritten_ptr_ptr; push_mov_head(&code, dpc, rax); @@ -32,7 +34,7 @@ static void transform_dis_pcrel(struct transform_dis_ctx *ctx, uint64_t dpc, code += ctx->base.op_size; push_mov_tail(&code, rax); *ctx->rewritten_ptr_ptr = code; - ctx->base.newop[0] = rax ? 0 : 1; + ctx->base.newval[0] = rax ? 0 : 1; ctx->base.modify = true; } @@ -41,7 +43,9 @@ static void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc, if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) { if (dpc == ctx->base.pc + ctx->base.op_size && (cc & CC_CALL)) { /* Probably a poor man's PC-rel - 'call .; pop %some'. - * Push the original address. */ + * Push the original address. + * Max size: orig + 1 + 11 + 5 + 1 + * Minimum call size is 4 bytes; at most 2. */ void *code = *ctx->rewritten_ptr_ptr; ctx->write_newop_here = NULL; @@ -72,15 +76,18 @@ static void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc, code += ctx->base.op_size; struct arch_dis_ctx arch; - uintptr_t source = (uintptr_t) code + 2; + uintptr_t source = ctx->pc_trampoline + 2; int size = jump_patch_size(source, dpc, arch, true); - /* if not taken, jmp past the big jump - this is a bit suboptimal but not that bad */ + /* If not taken, jmp past the big jump - this is a bit suboptimal but not + * that bad. + * Max size: orig + 2 + 14 + * Minimum jump size is 2 bytes; at most 3. */ op8(&code, 0xeb); op8(&code, size); make_jump_patch(&code, source, dpc, arch); *ctx->rewritten_ptr_ptr = code; - ctx->base.newop[0] = 2; + ctx->base.newval[0] = 2; ctx->base.modify = true; if (!cc) diff --git a/lib/x86/dis-main.inc.h b/lib/x86/dis-main.inc.h index 45a0947..f948026 100644 --- a/lib/x86/dis-main.inc.h +++ b/lib/x86/dis-main.inc.h @@ -97,7 +97,7 @@ static const uint8_t _0f_bits[] = { /*60*/ REP16(I_MODA), /*70*/ I_MODA, I_MOD|I_8, I_MOD|I_8, I_MOD|I_8, I_MODA, I_MODA, I_MODA, 0, /*78*/ I_MODA, I_MODA, I_BAD, I_BAD, REP4(I_MODA), -/*80*/ REP16(I_z), +/*80*/ REP16(I_z|I_JIMM), /*90*/ REP16(I_MODA), /*Ax*/ 0, 0, 0, 0, 0, 0, I_BAD, I_BAD, /*A8*/ 0, 0, 0, I_MODA, I_MODA|I_8, I_MODA, I_MODA, I_MODA, @@ -279,6 +279,7 @@ got_bits: UNUSED case 1: *(int8_t *) new_imm_ptr = new_imm; break; case 2: *(int16_t *) new_imm_ptr = new_imm; break; case 4: *(int32_t *) new_imm_ptr = new_imm; break; + default: __builtin_abort(); } } #ifdef TARGET_x86_64 diff --git a/lib/x86/jump-patch.h b/lib/x86/jump-patch.h index 4c0172d..8cd7d6f 100644 --- a/lib/x86/jump-patch.h +++ b/lib/x86/jump-patch.h @@ -1,23 +1,23 @@ #pragma once -#define MAX_JUMP_PATCH_SIZE 5 +#define MAX_JUMP_PATCH_SIZE 14 #include "dis.h" -static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc, +static inline int jump_patch_size(uint_tptr pc, uint_tptr dpc, UNUSED struct arch_dis_ctx arch, bool force) { - uintptr_t diff = pc - (dpc + 5); + uint_tptr diff = pc - (dpc + 5); /* fits in 32? */ - if (diff == (uintptr_t) (int32_t) diff) + if (diff == (uint_tptr) (int32_t) diff) return 5; else return force ? (2+4+8) : -1; } -static inline void make_jump_patch(void **codep, uintptr_t pc, uintptr_t dpc, +static inline void make_jump_patch(void **codep, uint_tptr pc, uint_tptr dpc, UNUSED struct arch_dis_ctx arch) { - uintptr_t diff = pc - (dpc + 5); + uint_tptr diff = pc - (dpc + 5); void *code = *codep; - if (diff == (uintptr_t) (int32_t) diff) { + if (diff == (uint_tptr) (int32_t) diff) { op8(&code, 0xe9); op32(&code, diff); } else { diff --git a/test/test-transform-dis.c b/test/test-transform-dis.c index f55daa9..2d7de9e 100644 --- a/test/test-transform-dis.c +++ b/test/test-transform-dis.c @@ -29,11 +29,13 @@ static void do_manual(uint8_t *in, size_t in_size, int patch_size, printf("\n#if 0\n"); uint_tptr pc_patch_start = 0x10000; uint_tptr pc_patch_end = pc_patch_start + patch_size; + uint_tptr pc_trampoline = 0xf000; int ret = transform_dis_main( in, &rewritten_ptr, pc_patch_start, &pc_patch_end, + pc_trampoline, &arch, offsets); printf("=> %d\n", ret); @@ -95,12 +97,17 @@ static void do_auto(uint8_t *in, size_t in_size, struct arch_dis_ctx arch) { if (!memcmp(expect, "_ERR", 4)) { expect_err = true; in += 4; + assert(!memcmp(in, "GIVEN", 5)); + in += 5; } else { - uint8_t *next = memmem(in, end - in, "GIVEN", 5); - if (!next) - next = end; - expect_size = next - expect; - in = next; + in = memmem(in, end - in, "GIVEN", 5); + if (in) { + expect_size = in - expect; + in += 5; + } else { + in = end; + expect_size = in - expect; + } } size_t patch_size = given_size; int offsets[patch_size + 15]; @@ -108,15 +115,19 @@ static void do_auto(uint8_t *in, size_t in_size, struct arch_dis_ctx arch) { void *rewritten_ptr = out; uint_tptr pc_patch_start = 0xdead0000; uint_tptr pc_patch_end = pc_patch_start + patch_size; + uint_tptr pc_trampoline = 0xdeac0000; int ret = transform_dis_main( given, &rewritten_ptr, pc_patch_start, &pc_patch_end, + pc_trampoline, &arch, offsets); if (ret) { - if (!expect_err) { + if (expect_err) { + printf("OK\n"); + } else { print_given(given, given_size); printf("got ret %d, expected success\n\n", ret); } @@ -132,6 +143,8 @@ static void do_auto(uint8_t *in, size_t in_size, struct arch_dis_ctx arch) { printf("but expected:\n"); hex_dump(expect, expect_size); printf("\n"); + } else { + printf("OK\n"); } } diff --git a/test/transform-dis-cases-i386.S b/test/transform-dis-cases-i386.S index 31508ca..57707cc 100644 --- a/test/transform-dis-cases-i386.S +++ b/test/transform-dis-cases-i386.S @@ -2,15 +2,16 @@ #define EXPECT .ascii "EXPECT"; #define EXPECT_ERR .ascii "EXPECT_ERR"; -GIVEN call .; pop %edx +GIVEN call 0f; 0: pop %edx /* XXX the extra push isn't necessary in 32-bit mode */ -EXPECT push %eax; push %eax; mov $0xdead0005, %eax; pop %eax; pop %edx +EXPECT push %eax; push %eax; mov $0xdead0005, %eax; mov %eax, 4(%esp); pop %eax; pop %edx -GIVEN jmp 0f; 0: +GIVEN jmp 0f; 0: nop EXPECT_ERR -GIVEN jne 0xdead1000 -EXPECT jne 0f; jmp 1f; 0: jmp 0xdead1000; 1: +GIVEN jne .+0x1000 +/* we expect to generate an unnecessarily long jump, so hardcode it */ +EXPECT 2: .byte 0x0f, 0x85; .long 2; jmp 1f; 0: jmp 2b+0x1000; 1: GIVEN loopne 0xdead0080 EXPECT loopne 0f; jmp 1f; 0: jmp 0xdead1000; 1: -- cgit v1.2.3