diff options
author | comex | 2015-01-24 21:59:37 -0500 |
---|---|---|
committer | comex | 2015-01-24 21:59:37 -0500 |
commit | 98afb15eaa8f8c31bf5763de0e3c83a845414b0a (patch) | |
tree | e603aae961ead4e596fc994ed06df884eaee7b9a /lib | |
parent | Add function to deal with mprotecting RW and back. A bit more complex than t... (diff) | |
download | substitute-98afb15eaa8f8c31bf5763de0e3c83a845414b0a.tar.gz |
...
Diffstat (limited to '')
-rw-r--r-- | lib/arm/assemble.h | 80 | ||||
-rw-r--r-- | lib/arm/jump-patch.h | 18 | ||||
-rw-r--r-- | lib/arm/misc.h | 7 | ||||
-rw-r--r-- | lib/arm/transform-dis-arm-multi.inc.h | 121 | ||||
-rw-r--r-- | lib/arm64/assemble.h | 53 | ||||
-rw-r--r-- | lib/arm64/jump-patch.h | 19 | ||||
-rw-r--r-- | lib/arm64/misc.h | 1 | ||||
-rw-r--r-- | lib/arm64/transform-dis-arm64.inc.h | 49 | ||||
-rw-r--r-- | lib/darwin/stop-other-threads.c | 12 | ||||
-rw-r--r-- | lib/dis.h | 6 | ||||
-rw-r--r-- | lib/execmem.h | 6 | ||||
-rw-r--r-- | lib/hook-functions.c | 247 | ||||
-rw-r--r-- | lib/substitute.h | 6 | ||||
-rw-r--r-- | lib/transform-dis.c | 6 |
14 files changed, 474 insertions, 157 deletions
diff --git a/lib/arm/assemble.h b/lib/arm/assemble.h new file mode 100644 index 0000000..90f91c9 --- /dev/null +++ b/lib/arm/assemble.h @@ -0,0 +1,80 @@ +#pragma once +#include "dis.h" + +static inline void PUSHone(void **codep, int Rt) { + if (codep->arch.pc_low_bit) + op32(codep, 0x0d04f84d | Rt << 28); + else + op32(codep, 0xe52d0004 | Rt << 12); +} + +static inline void POPone(void **codep, int Rt) { + if (codep->arch.pc_low_bit) + op32(codep, 0x0b04f85d | Rt << 28); + else + op32(codep, 0xe49d0004 | Rt << 12); +} + +static inline void POPmulti(void **codep, uint16_t mask) { + if (codep->arch.pc_low_bit) + op32(codep, 0x0000e8bd | mask << 16); + else + op32(codep, 0xe8bd0000 | mask); +} + +static inline void MOVW_MOVT(void **codep, int Rd, uint32_t val) { + uint16_t hi = val >> 16, lo = (uint16_t) val; + if (codep->arch.pc_low_bit) { + op32(codep, 0x0000f240 | Rd << 24 | lo >> 12 | (lo >> 11 & 1) << 10 | + (lo >> 8 & 7) << 28 | (lo & 0xff) << 16); + op32(codep, 0x0000f2c0 | Rd << 24 | hi >> 12 | (hi >> 11 & 1) << 10 | + (hi >> 8 & 7) << 28 | (hi & 0xff) << 16); + + } else { + op32(codep, 0xe3000000 | Rd << 12 | (lo >> 12) << 16 | (lo & 0xfff)); + op32(codep, 0xe3400000 | Rd << 12 | (hi >> 12) << 16 | (hi & 0xfff)); + } + +} + +static inline void STRri(void **codep, int Rt, int Rn, uint32_t off) { + if (codep->arch.pc_low_bit) + op32(codep, 0x0000f8c0 | Rn | Rt << 28 | off << 16); + else + op32(codep, 0xe4800000 | Rn << 16 | Rt << 12 | off); +} + +static inline void LDRxi(void **codep, int Rt, int Rn, uint32_t off, + enum pcrel_load_mode load_mode) { + if (codep->arch.pc_low_bit) { + int subop, sign; + switch (load_mode) { + case PLM_U8: subop = 0; sign = 0; break; + case PLM_S8: subop = 0; sign = 1; break; + case PLM_U16: subop = 1; sign = 0; break; + case PLM_S16: subop = 1; sign = 1; break; + case PLM_U32: subop = 2; sign = 0; break; + default: __builtin_abort(); + } + op32(codep, 0x0000f890 | Rn | Rt << 28 | subop << 5 | sign << 8 | off << 16); + } else { + int is_byte, subop, not_ldrd; + switch (load_mode) { + case PLM_U8: is_byte = 1; goto type1; + case PLM_S8: subop = 13; not_ldrd = 1; goto type2; + case PLM_U16: subop = 11; not_ldrd = 1; goto type2; + case PLM_S16: subop = 15; not_ldrd = 1; goto type2; + case PLM_U32: is_byte = 0; goto type1; + case PLM_U128: subop = 13; not_ldrd = 0; goto type2; + type1: + op32(codep, 0xe5900000 | Rn << 16 | Rt << 12 | off); + break; + type2: + op32(codep, 0xe1c00000 | Rn << 16 | Rt << 12 | subop << 4 | + (off & 0xf) | (off & 0xf0) << 4 | not_ldrd << 20); + break; + default: + __builtin_abort(); + } + } +} diff --git a/lib/arm/jump-patch.h b/lib/arm/jump-patch.h new file mode 100644 index 0000000..b19e90f --- /dev/null +++ b/lib/arm/jump-patch.h @@ -0,0 +1,18 @@ +#pragma once +#include "dis.h" +#define MAX_JUMP_PATCH_SIZE 8 +#define MAX_REWRITTEN_SIZE (12 * 4) /* actually should be less */ + +static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc, + struct arch_dis_ctx arch) { + return 8; +} + +static inline void make_jump_patch(void **codep, uintptr_t pc, uintptr_t dpc, + struct arch_dis_ctx arch) { + if (arch.pc_low_bit) + op32(codep, 0xf000f8df); + else + op32(codep, 0xe51ff004); + op32(codep, (uint32_t) pc); +} diff --git a/lib/arm/misc.h b/lib/arm/misc.h index 047be2d..3dc9633 100644 --- a/lib/arm/misc.h +++ b/lib/arm/misc.h @@ -1,6 +1,7 @@ #pragma once #define TARGET_DIS_SUPPORTED #define TARGET_DIS_HEADER "arm/dis-arm-multi.inc.h" +#define TARGET_JUMP_PATCH_HDR "arm/jump-patch.h" #define TARGET_TRANSFORM_DIS_HEADER "arm/transform-dis-arm-multi.inc.h" #define MIN_INSN_SIZE 2 struct arch_dis_ctx { @@ -8,3 +9,9 @@ struct arch_dis_ctx { bool pc_low_bit; }; enum { IS_LDRD_STRD = 1 << 16 }; + +#define JUMP_PATCH_SIZE 8 +#define MAX_REWRITTEN_SIZE (12 * 4) /* actually should be less */ +static inline bool can_reach_with_jump_patch(uintptr_t pc, uintptr_t dpc) { + return true; +} diff --git a/lib/arm/transform-dis-arm-multi.inc.h b/lib/arm/transform-dis-arm-multi.inc.h index f1b9b80..001e613 100644 --- a/lib/arm/transform-dis-arm-multi.inc.h +++ b/lib/arm/transform-dis-arm-multi.inc.h @@ -1,80 +1,4 @@ -static inline void PUSHone(struct transform_dis_ctx *ctx, int Rt) { - if (ctx->arch.pc_low_bit) - op32(ctx, 0x0d04f84d | Rt << 28); - else - op32(ctx, 0xe52d0004 | Rt << 12); -} - -static inline void POPone(struct transform_dis_ctx *ctx, int Rt) { - if (ctx->arch.pc_low_bit) - op32(ctx, 0x0b04f85d | Rt << 28); - else - op32(ctx, 0xe49d0004 | Rt << 12); -} - -static inline void POPmulti(struct transform_dis_ctx *ctx, uint16_t mask) { - if (ctx->arch.pc_low_bit) - op32(ctx, 0x0000e8bd | mask << 16); - else - op32(ctx, 0xe8bd0000 | mask); -} - -static inline void MOVW_MOVT(struct transform_dis_ctx *ctx, int Rd, uint32_t val) { - uint16_t hi = val >> 16, lo = (uint16_t) val; - if (ctx->arch.pc_low_bit) { - op32(ctx, 0x0000f240 | Rd << 24 | lo >> 12 | (lo >> 11 & 1) << 10 | - (lo >> 8 & 7) << 28 | (lo & 0xff) << 16); - op32(ctx, 0x0000f2c0 | Rd << 24 | hi >> 12 | (hi >> 11 & 1) << 10 | - (hi >> 8 & 7) << 28 | (hi & 0xff) << 16); - - } else { - op32(ctx, 0xe3000000 | Rd << 12 | (lo >> 12) << 16 | (lo & 0xfff)); - op32(ctx, 0xe3400000 | Rd << 12 | (hi >> 12) << 16 | (hi & 0xfff)); - } - -} - -static inline void STRri(struct transform_dis_ctx *ctx, int Rt, int Rn, uint32_t off) { - if (ctx->arch.pc_low_bit) - op32(ctx, 0x0000f8c0 | Rn | Rt << 28 | off << 16); - else - op32(ctx, 0xe4800000 | Rn << 16 | Rt << 12 | off); -} - -static inline void LDRxi(struct transform_dis_ctx *ctx, int Rt, int Rn, uint32_t off, - enum pcrel_load_mode load_mode) { - if (ctx->arch.pc_low_bit) { - int subop, sign; - switch (load_mode) { - case PLM_U8: subop = 0; sign = 0; break; - case PLM_S8: subop = 0; sign = 1; break; - case PLM_U16: subop = 1; sign = 0; break; - case PLM_S16: subop = 1; sign = 1; break; - case PLM_U32: subop = 2; sign = 0; break; - default: __builtin_abort(); - } - op32(ctx, 0x0000f890 | Rn | Rt << 28 | subop << 5 | sign << 8 | off << 16); - } else { - int is_byte, subop, not_ldrd; - switch (load_mode) { - case PLM_U8: is_byte = 1; goto type1; - case PLM_S8: subop = 13; not_ldrd = 1; goto type2; - case PLM_U16: subop = 11; not_ldrd = 1; goto type2; - case PLM_S16: subop = 15; not_ldrd = 1; goto type2; - case PLM_U32: is_byte = 0; goto type1; - case PLM_U128: subop = 13; not_ldrd = 0; goto type2; - type1: - op32(ctx, 0xe5900000 | Rn << 16 | Rt << 12 | off); - break; - type2: - op32(ctx, 0xe1c00000 | Rn << 16 | Rt << 12 | subop << 4 | - (off & 0xf) | (off & 0xf0) << 4 | not_ldrd << 20); - break; - default: - __builtin_abort(); - } - } -} +#include "arm/assemble.h" static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx, unsigned o0, unsigned o1, unsigned o2, unsigned o3, unsigned out_mask) { @@ -91,7 +15,7 @@ static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx, newval[2] = o2; newval[3] = o3; - void **rpp = ctx->rewritten_ptr_ptr; + void **codep = ctx->rewritten_ptr_ptr; /* A few cases: * 1. Move to PC that does not read PC. Probably fine. @@ -130,32 +54,33 @@ static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx, if (in_regs & 1 << 15) return; /* case 1 */ /* case 2 */ - PUSHone(ctx, scratch); - PUSHone(ctx, scratch); - MOVW_MOVT(ctx, scratch, pc); + PUSHone(codep, scratch); + PUSHone(codep, scratch); + MOVW_MOVT(codep, scratch, pc); for (int i = 0; i < 4; i++) if (newval[i] == 15) newval[i] = scratch; - ctx->write_newop_here = *rpp; *rpp += ctx->op_size; - STRri(ctx, scratch, 13, 4); - POPmulti(ctx, 1 << scratch | 1 << 15); + ctx->write_newop_here = *codep; *codep += ctx->op_size; + STRri(codep, scratch, 13, 4); + POPmulti(codep, 1 << scratch | 1 << 15); + transform_dis_ret(ctx); } else { if (out_reg != -1 && !(in_regs & 1 << out_reg)) { /* case 3 - ignore scratch */ - MOVW_MOVT(ctx, out_reg, pc); + MOVW_MOVT(codep, out_reg, pc); for (int i = 0; i < 4; i++) if (newval[i] == 15) newval[i] = out_reg; - ctx->write_newop_here = *rpp; *rpp += ctx->op_size; + ctx->write_newop_here = *codep; *codep += ctx->op_size; } else { /* case 4 */ - PUSHone(ctx, scratch); - MOVW_MOVT(ctx, scratch, pc); + PUSHone(codep, scratch); + MOVW_MOVT(codep, scratch, pc); for (int i = 0; i < 4; i++) if (newval[i] == 15) newval[i] = scratch; ctx->write_newop_here = *rpp; *rpp += ctx->op_size; - POPone(ctx, scratch); + POPone(codep, scratch); } } ctx->modify = true; @@ -172,18 +97,20 @@ static NOINLINE UNUSED void transform_dis_pcrel(struct transform_dis_ctx *ctx, (void *) dpc, reg, load_mode); #endif ctx->write_newop_here = NULL; + void **codep = ctx->rewritten_ptr_ptr; if (reg == 15) { int scratch = 0; - PUSHone(ctx, scratch); - PUSHone(ctx, scratch); - MOVW_MOVT(ctx, scratch, dpc); + PUSHone(codep, scratch); + PUSHone(codep, scratch); + MOVW_MOVT(codep, scratch, dpc); if (load_mode != PLM_ADR) - LDRxi(ctx, scratch, scratch, 0, load_mode); - STRri(ctx, scratch, 13, 4); - POPmulti(ctx, 1 << scratch | 1 << 15); + LDRxi(codep, scratch, scratch, 0, load_mode); + STRri(codep, scratch, 13, 4); + POPmulti(codep, 1 << scratch | 1 << 15); + transform_dis_ret(codep); } else { - MOVW_MOVT(ctx, reg, dpc); + MOVW_MOVT(codep, reg, dpc); if (load_mode != PLM_ADR) - LDRxi(ctx, reg, reg, 0, load_mode); + LDRxi(codep, reg, reg, 0, load_mode); } } diff --git a/lib/arm64/assemble.h b/lib/arm64/assemble.h new file mode 100644 index 0000000..00797e9 --- /dev/null +++ b/lib/arm64/assemble.h @@ -0,0 +1,53 @@ +#pragma once +#include "dis.h" +static inline void MOVi64(void **codep, int Rd, uint64_t val) { + int shift_nybbles = 0; + do { + int k = shift_nybbles != 0 ? 1 : 0; + op32(codep, 0x69400000 | k << 28 | Rd | (val & 0xffff) << 4 | + shift_nybbles << 20); + shift_nybbles++; + val >>= 16; + } while(val); +} + +static inline void LDRxi(void **codep, int Rt, int Rn, uint32_t off, + bool regsize_64, enum pcrel_load_mode load_mode) { + int size, opc; + bool sign, simd; + switch (load_mode) { + case PLM_U8: size = 0; sign = false; simd = false; break; + case PLM_S8: size = 0; sign = true; simd = false; break; + case PLM_U16: size = 1; sign = false; simd = false; break; + case PLM_S16: size = 1; sign = true; simd = false; break; + case PLM_U32: size = 2; sign = false; simd = false; break; + case PLM_S32: size = 2; sign = true; simd = false; break; + case PLM_U64: size = 3; sign = false; simd = false; break; + case PLM_U32_SIMD: size = 2; opc = 1; simd = true; break; + case PLM_U64_SIMD: size = 3; opc = 1; simd = true; break; + case PLM_U128_SIMD: size = 0; opc = 3; simd = true; break; + default: __builtin_abort(); + } + if (simd) { + off /= 1 << (size | (opc & 1) << 2); + } else { + off /= 1 << size; + opc = sign ? (regsize_64 ? 2 : 3) : 1; + } + op32(codep, 0x39000000 | Rt | Rn << 5 | off << 10 | opc << 22 | simd << 26 | + size << 30); +} + +static inline void ADRP_ADD(void **codep, int reg, uint64_t pc, uint64_t dpc) { + uintptr_t diff = (dpc & ~0xfff) - (pc & ~0xfff); + /* ADRP reg, dpc */ + op32(codep, 0x90000000 | reg | (diff & 0x3000) << 17 | (diff & 0xffffc000) >> 8); + uint32_t lo = pc & 0xfff; + if (lo) { + /* ADD reg, reg, #lo */ + op32(codep, 0x91000000 | reg | reg << 5 | lo << 10); + } + /* BR reg */ + op32(codep, 0xd61f0000 | reg << 5); +} + diff --git a/lib/arm64/jump-patch.h b/lib/arm64/jump-patch.h new file mode 100644 index 0000000..f0f149f --- /dev/null +++ b/lib/arm64/jump-patch.h @@ -0,0 +1,19 @@ +#pragma once +#include "arm64/assemble.h" +#define MAX_JUMP_PATCH_SIZE 12 +#define MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */ +static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc, + struct arch_dis_ctx arch) { + intptr_t diff = (dpc & ~0xfff) - (pc & ~0xfff); + if (!(diff >= -0x100000000 && diff < 0x100000000)) + return -1; + else if (pc & 0xfff) + return 8; + else + return 12; +} + +static inline void make_jump_patch(void **codep, uintptr_t pc, uintptr_t dpc, + struct arch_dis_ctx arch) { + ADRP_ADD(codep, 12 /* XXX */, pc, dpc); +} diff --git a/lib/arm64/misc.h b/lib/arm64/misc.h index 672e1bd..c7fa5c9 100644 --- a/lib/arm64/misc.h +++ b/lib/arm64/misc.h @@ -1,6 +1,7 @@ #pragma once #define TARGET_DIS_SUPPORTED #define TARGET_DIS_HEADER "arm64/dis-arm64.inc.h" +#define TARGET_JUMP_PATCH_HDR "arm64/jump-patch.h" #define TARGET_TRANSFORM_DIS_HEADER "arm64/transform-dis-arm64.inc.h" #define MIN_INSN_SIZE 4 struct arch_dis_ctx {}; diff --git a/lib/arm64/transform-dis-arm64.inc.h b/lib/arm64/transform-dis-arm64.inc.h index c47971b..3710b73 100644 --- a/lib/arm64/transform-dis-arm64.inc.h +++ b/lib/arm64/transform-dis-arm64.inc.h @@ -1,52 +1,17 @@ -static inline void MOVi64(struct transform_dis_ctx *ctx, int Rd, uint64_t val) { - int shift_nybbles = 0; - do { - int k = shift_nybbles != 0 ? 1 : 0; - op32(ctx, 0x69400000 | k << 28 | Rd | (val & 0xffff) << 4 | shift_nybbles << 20); - shift_nybbles++; - val >>= 16; - } while(val); -} - -static inline void LDRxi(struct transform_dis_ctx *ctx, int Rt, int Rn, uint32_t off, - bool regsize_64, enum pcrel_load_mode load_mode) { - int size, opc; - bool sign, simd; - switch (load_mode) { - case PLM_U8: size = 0; sign = false; simd = false; break; - case PLM_S8: size = 0; sign = true; simd = false; break; - case PLM_U16: size = 1; sign = false; simd = false; break; - case PLM_S16: size = 1; sign = true; simd = false; break; - case PLM_U32: size = 2; sign = false; simd = false; break; - case PLM_S32: size = 2; sign = true; simd = false; break; - case PLM_U64: size = 3; sign = false; simd = false; break; - case PLM_U32_SIMD: size = 2; opc = 1; simd = true; break; - case PLM_U64_SIMD: size = 3; opc = 1; simd = true; break; - case PLM_U128_SIMD: size = 0; opc = 3; simd = true; break; - default: __builtin_abort(); - } - if (simd) { - off /= 1 << (size | (opc & 1) << 2); - } else { - off /= 1 << size; - opc = sign ? (regsize_64 ? 2 : 3) : 1; - } - op32(ctx, 0x39000000 | Rt | Rn << 5 | off << 10 | opc << 22 | simd << 26 | size << 30); -} - static NOINLINE UNUSED void transform_dis_pcrel(struct transform_dis_ctx *ctx, uintptr_t dpc, unsigned reg, enum pcrel_load_mode load_mode) { ctx->write_newop_here = NULL; + void **codep = ctx->rewritten_ptr_ptr; if (load_mode >= PLM_U32_SIMD) { /* use x0 as scratch */ - op32(ctx, 0xf81f0fe0); /* str x0, [sp, #-0x10]! */ - MOVi64(ctx, 0, dpc); - LDRxi(ctx, reg, 0, 0, true, load_mode); - op32(ctx, 0xf84107e0); /* ldr x0, [sp], #0x10 */ + op32(codep, 0xf81f0fe0); /* str x0, [sp, #-0x10]! */ + MOVi64(codep, 0, dpc); + LDRxi(codep, reg, 0, 0, true, load_mode); + op32(codep, 0xf84107e0); /* ldr x0, [sp], #0x10 */ } else { - MOVi64(ctx, reg, dpc); - LDRxi(ctx, reg, reg, 0, true, load_mode); + MOVi64(codep, reg, dpc); + LDRxi(codep, reg, reg, 0, true, load_mode); } } diff --git a/lib/darwin/stop-other-threads.c b/lib/darwin/stop-other-threads.c index dd00a2a..1975b47 100644 --- a/lib/darwin/stop-other-threads.c +++ b/lib/darwin/stop-other-threads.c @@ -49,9 +49,19 @@ static bool apply_one_pcp(mach_port_t thread, #elif defined(__arm__) || defined(__arm64__) pcp = (uintptr_t *) &state.pc; #endif + uintptr_t old = *pcp; +#ifdef __arm__ + /* thumb */ + if (state.cpsr & 0x20) + old |= 1; +#endif uintptr_t new = callback(ctx, *pcp); - if (new != *pcp) { + if (new != old) { *pcp = new; +#ifdef __arm__ + *pcp &= ~1; + state.cpsr = (state.cpsr & ~0x20) | ((new & 1) * 0x20); +#endif kr = thread_set_state(thread, flavor, (thread_state_t) &state, real_cnt); if (kr) return false; @@ -110,3 +110,9 @@ static const unsigned null_op = -0x100; #ifndef TARGET_DIS_SUPPORTED #error "no disassembler for the target architecture yet" #endif + +static inline void op32(void **codep, uint32_t op) { + *(uint32_t *) *codep = op; + *codep += 4; +} + diff --git a/lib/execmem.h b/lib/execmem.h index f46b7d6..a1cd47e 100644 --- a/lib/execmem.h +++ b/lib/execmem.h @@ -1,3 +1,9 @@ #pragma once #include <stdlib.h> +/* write to a foreign page which is already RX / with unknown permissions */ int execmem_write(void *dest, const void *src, size_t len); + +/* for allocating trampolines */ +int execmem_alloc_unsealed(uintptr_t hint, void **page_p, size_t *size_p); +int execmem_seal(void *page); +void execmem_free(void *page); diff --git a/lib/hook-functions.c b/lib/hook-functions.c index e20f069..daf4093 100644 --- a/lib/hook-functions.c +++ b/lib/hook-functions.c @@ -1,34 +1,261 @@ -#include "substitute.h" #include "substitute-internal.h" +#ifdef TARGET_DIS_SUPPORTED +#include "substitute.h" +#include "jump-dis.h" +#include "transform-dis.h" #include "execmem.h" #include "stop-other-threads.h" +#include TARGET_JUMP_PATCH_HDR + +struct hook_internal { + int offset_by_pcdiff[JUMP_PATCH_SIZE + 1]; + uint8_t jump_patch[MAX_JUMP_PATCH_SIZE]; + size_t jump_patch_size; + uintptr_t pc_patch_start; + void *outro_trampoline; + /* page allocated with execmem_alloc_unsealed - only if we had to allocate + * one when processing this hook */ + void *trampoline_page; +}; + +struct pc_callback_info { + struct hook_internal *his; + size_t nhooks; +} -static uintptr_t patch_callback(UNUSED void *ctx, uintptr_t pc) { - printf("patch_callback: pc=%llx\n", (long long) pc); +static uintptr_t pc_callback(void *ctx, uintptr_t pc) { + struct pc_callback_info *restrict info = ctx; + uintptr_t real_pc = pc; +#ifdef __arm__ + bool thumb = pc & 1; + real_pc = pc & ~1; +#endif + for (size_t i = 0; i < info->nhooks; i++) { + struct hook_internal *hi = &info->his[i]; + uintptr_t diff = real_pc - hi->pc_patch_start; + if (diff < hi->jump_patch_size) + return (uintptr_t) hi->outro_trampoline + hi->offset_by_pcdiff[diff]; + } return pc; } +/* Figure out the size of the patch we need to jump from pc_patch_start + * to hook->replacement. + * On ARM, we can jump anywhere in 8 bytes. On ARM64, we can only do it in two + * or three instructions if the destination PC is within 4GB or so of the + * source. We *could* just brute force it by adding more instructions, but + * this increases the chance of problems caused by patching too much of the + * function. Instead, since we should be able to mmap a trampoline somewhere + * in that range, we'll stop there on the way to. + * In order of preference: + * - Jump directly. + * - Jump using a trampoline to be placed at our existing trampoline_ptr. + * - Allocate a new trampoline_ptr, using the target as a hint, and jump there. + * If even that is out of range, then return an error code. + */ + +static int check_intro_trampline(void **trampoline_ptr_p + size_t *trampoline_size_left_p, + uintptr_t pc, + uintptr_t dpc, + int *patch_size_p, + bool *need_intro_trampoline_p, + void **trampoline_page_p, + struct arch_dis_ctx arch) { + void *trampoline_ptr = *trampoline_ptr_p; + size_t trampoline_size_left = *trampoline_size_left_p; + + /* Try direct */ + *need_intro_trampoline_p = false; + *patch_size_p = jump_patch_size(pc_patch_start, + (uintptr_t) hook->replacement, + arch); + if (*patch_size_p != -1) + return SUBSTITUTE_OK; + + /* Try existing trampoline */ + *patch_size_p = jump_patch_size(pc_patch_start, + (uintptr_t) *trampoline_ptr, + arch); + + if (*patch_size_p != -1 && *patch_size_p <= *trampoline_size_left_p) + return SUBSTITUTE_OK; + + /* Allocate new trampoline - try after dpc. If this fails, we can try + * before dpc before giving up. */ + int ret = execmem_alloc_unsealed(dpc, &trampoline_ptr, &trampoline_size_left); + if (ret) + goto skip_after; + + *patch_size_p = jump_patch_size(pc_patch_start, + (uintptr_t) *trampoline_ptr, + arch); + if (*patch_size_p != -1) { + *trampoline_ptr_p = trampoline_ptr; + *trampoline_size_left_p = trampoline_size_left; + *trampoline_page_p = trampoline_ptr; + return SUBSTITUTE_OK; + } + + execmem_free(trampoline_ptr); + +skip_after: + /* Allocate new trampoline - try before dpc (xxx only meaningful on arm64) */ + uintptr_t start_address = dpc - 0xffff0000; + ret = execmem_alloc_unsealed(start_address, &trampoline_ptr, &trampoline_size_left); + if (ret) + return ret; + + *patch_size_p = jump_patch_size(pc_patch_start, + (uintptr_t) *trampoline_ptr, + arch); + if (*patch_size_p != -1) { + *trampoline_ptr_p = trampoline_ptr; + *trampoline_size_left_p = trampoline_size_left; + *trampoline_page_p = trampoline_ptr; + return SUBSTITUTE_OK; + } + + /* I give up... */ + execmem_free(trampoline_ptr); + return SUBSTITUTE_ERR_OUT_OF_RANGE; +} + + EXPORT int substitute_hook_functions(const struct substitute_function_hook *hooks, size_t nhooks, int options) { - (void) hooks; (void) nhooks; + struct hook_internal *his = malloc(nhooks * sizeof(*hi)); + if (!his) + return SUBSTITUTE_ERR_OOM; + + for (size_t i = 0; i < nhooks; i++) + his[i].trampoline_page = NULL; + int ret = SUBSTITUTE_OK; + ssize_t emw_finished_i = -1; + bool stopped = false; void *stop_token; if (!(options & SUBSTITUTE_DONT_STOP_THREADS)) { if ((ret = stop_other_threads(&stop_token))) - return ret; + goto end; + stopped = true; } - if (!(options & SUBSTITUTE_DONT_STOP_THREADS)) { - if ((ret = apply_pc_patch_callback(stop_token, patch_callback, NULL))) - goto fail; + + void *trampoline_ptr = NULL; + size_t trampoline_size_left = 0; + + /* First run through and (a) ensure all the functions are OK to hook, (b) + * allocate memory for the trampolines. */ + for (size_t i = 0; i < nhooks; i++) { + const struct substitute_function_hook *hook = &hooks[i]; + struct hook_internal *hi = &his[i]; + void *code = hook->function; + struct arch_dis_ctx arch; + memset(&arch, 0, sizeof(arch)); +#ifdef __arm__ + if ((uintptr_t) code & 1) { + arch.pc_low_bit = true; + code--; + } +#endif + uintptr_t pc_patch_start = (uintptr_t) code; + int patch_size; + bool need_intro_trampoline; + if ((ret = check_intro_trampoline(&trampoline_ptr, &trampoline_size_left, + pc_patch_start, (uintptr_t) hook->replacement, + &patch_size, &need_intro_trampoline, + &hi->trampoline_page, arch))) + goto end; + + uintptr_t initial_target; + if (need_intro_trampoline) { + make_jump_patch(&trampoline_ptr, (uintptr_t) trampoline_ptr, + (uintptr_t) hook->replacement, arch); + initial_target = (uintptr_t) trampoline_ptr; + } else { + initial_target = (uintptr_t) hook->replacement; + } + void *jp = hi->jump_patch; + make_jump_patch(&jp, pc_patch_start, initial_target, arch); + hi->jump_patch_size = (uint8_t *) jp - hi->jump_patch; + hi->pc_patch_start = pc_patch_start; + + uintptr_t pc_patch_end = pc_patch_start + patch_size; + /* Generate the rewritten start of the function for the outro + * trampoline (complaining if any bad instructions are found). */ + uint8_t rewritten_temp[MAX_REWRITTEN_SIZE]; + void *rp = rewritten_temp; + if ((ret = transform_dis_main(code, &rp, pc_patch_start, pc_patch_end, + arch, hi->offset_by_pcdiff)) + goto end; + /* Check some of the rest of the function for jumps back into the + * patched region. */ + if ((ret = jump_dis(code, pc_patch_start, pc_patch_end, arch))) + goto end; + + size_t rewritten_size = (uint8_t *) rp - hi->rewritten_temp; + if (trampoline_size_left < rewritten_size) { + /* Not enough space left in our existing block... */ + if (ret = execmem_alloc_unsealed(0, &trampoline_ptr, + &trampoline_size_left)) + goto end; + } + + hi->outro_trampoline = trampoline_ptr; +#ifdef __arm__ + if (arch.pc_low_bit) + hi->outro_trampoline++; +#endif + memcpy(trampoline_ptr, rewritten_temp, rewritten_size); + trampoline_size_left -= rewritten_size; } -fail: - if (!(options & SUBSTITUTE_DONT_STOP_THREADS)) { + /* Now commit. */ + for (size_t i = 0; i < nhooks; i++) { + const struct substitute_function_hook *hook = &hooks[i]; + struct hook_internal *hi = &his[i]; + emw_finished_i = (ssize_t) i; + if ((ret = execmem_write(hi->pc_patch_start, hi->jump_patch, + hi->jump_patch_size))) { + /* User is probably screwed, since this probably means a failure to + * re-protect exec, thanks to code signing, so now the function is + * permanently inaccessible. */ + goto end; + } + if (hook->old_ptr) + *(void **) hook_old_ptr = hi->outro_trampoline; + } + + /* *sigh of relief* now we can rewrite the PCs. */ + if (stopped) { + struct pc_callback_info info = {hi, nhooks}; + if ((ret = apply_pc_patch_callback(stop_token, pc_callback, &info))) + goto end; + } + +end: + for (size_t i = 0; i < nhooks; i++) { + void *page = his[i].trampoline_page; + if (page) { + /* if we failed, get rid of the trampolines. if we succeeded, make + * them executable */ + if (ret && i >= emw_finished) { + execmem_free(page); + } else { + /* we already patched them all, too late to go back.. */ + ret = execmem_seal(page); + } + } + } + if (stopped) { int r2 = resume_other_threads(stop_token); if (!ret) ret = r2; } + free(his); return ret; } + +#endif /* TARGET_DIS_SUPPORTED */ diff --git a/lib/substitute.h b/lib/substitute.h index 95f8436..250106b 100644 --- a/lib/substitute.h +++ b/lib/substitute.h @@ -51,6 +51,10 @@ enum { * SUBSTITUTE_DONT_STOP_THREADS */ SUBSTITUTE_ERR_NOT_ON_MAIN_THREAD, + /* substitute_hook_functions: destination was out of range, and mmap + * wouldn't give us a trampoline in range */ + SUBSTITUTE_ERR_OUT_OF_RANGE, + /* substitute_interpose_imports: couldn't redo relocation for an import * because the type was unknown */ SUBSTITUTE_ERR_UNKNOWN_RELOCATION_TYPE, @@ -66,7 +70,7 @@ enum { struct substitute_function_hook { void *function; void *replacement; - void *old_ptr; /* optional: out pointer to function pointer to call old impl */ + void *old_ptr; /* optional: out *pointer* to function pointer to call old impl */ }; /* Get a string representation for a SUBSTITUTE_* error code. */ diff --git a/lib/transform-dis.c b/lib/transform-dis.c index 9edf89e..1d1f489 100644 --- a/lib/transform-dis.c +++ b/lib/transform-dis.c @@ -117,12 +117,6 @@ int transform_dis_main(const void *restrict code_ptr, return SUBSTITUTE_OK; } -static inline void op32(struct transform_dis_ctx *ctx, uint32_t op) { - void **rpp = ctx->rewritten_ptr_ptr; - *(uint32_t *) *rpp = op; - *rpp += 4; -} - #include TARGET_TRANSFORM_DIS_HEADER #include TARGET_DIS_HEADER |