aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcomex2015-02-08 23:45:24 -0500
committercomex2015-02-08 23:45:24 -0500
commiteb93cee2a22cde812ccd6b9bd418d36185c058f5 (patch)
tree43a22ccf021a1513dba3a9c99f7b81822fe950fa
parentformatting (diff)
downloadsubstitute-eb93cee2a22cde812ccd6b9bd418d36185c058f5.tar.gz
Refactor disassembly so x86 works, and add x86 transform-dis.
This patch is a monolithic mess, because I was too lazy to do the refactor first (that would require some stash fun, since I wasn't actually sure before doing x86 transform-dis what would be needed). Anyway, the resulting code should be cleaner - less duplication. This breaks ARM/ARM64.
Diffstat (limited to '')
-rw-r--r--Makefile10
-rw-r--r--lib/arm/arch-dis.h60
-rw-r--r--lib/arm/arch-transform-dis.inc.h (renamed from lib/arm/transform-dis-arm-multi.inc.h)0
-rw-r--r--lib/arm/dis-main.inc.h (renamed from lib/arm/dis-arm-multi.inc.h)0
-rw-r--r--lib/arm/misc.h58
-rw-r--r--lib/arm64/arch-dis.h37
-rw-r--r--lib/arm64/arch-transform-dis.inc.h (renamed from lib/arm64/transform-dis-arm64.inc.h)4
-rw-r--r--lib/arm64/dis-main.inc.h (renamed from lib/arm64/dis-arm64.inc.h)0
-rw-r--r--lib/arm64/misc.h35
-rw-r--r--lib/dis.h26
-rw-r--r--lib/hook-functions.c4
-rw-r--r--lib/jump-dis.c46
-rw-r--r--lib/jump-dis.h1
-rw-r--r--lib/substitute-internal.h16
-rw-r--r--lib/transform-dis.c55
-rw-r--r--lib/transform-dis.h5
-rw-r--r--lib/x86/arch-dis.h10
-rw-r--r--lib/x86/arch-transform-dis.inc.h58
-rw-r--r--lib/x86/dis-main.inc.h (renamed from lib/x86/dis-x86.inc.h)47
-rw-r--r--lib/x86/jump-patch.h21
-rw-r--r--lib/x86/misc.h12
-rw-r--r--test/test-td-simple.c41
22 files changed, 317 insertions, 229 deletions
diff --git a/Makefile b/Makefile
index 91b8d90..b5c1bb4 100644
--- a/Makefile
+++ b/Makefile
@@ -105,8 +105,8 @@ out/%.bin: out/%.o Makefile
segedit -extract __TEXT __text $@ $<
define define_test
-out/test-$(1): test/test-$(2).[cm]* $(HEADERS) $(GENERATED) Makefile # out/libsubstitute.dylib
- $(3) -o $$@ $$< -Ilib -Isubstrate -Lout -dead_strip #-lsubstitute
+out/test-$(1): test/test-$(2).[cm]* $(HEADERS) $(GENERATED) Makefile out/libsubstitute.dylib
+ $(3) -o $$@ $$< -Ilib -Isubstrate -Lout -dead_strip -lsubstitute
install_name_tool -change /usr/lib/libsubstitute.0.dylib '@executable_path/libsubstitute.dylib' $$@
ifneq (,$(IS_IOS))
ldid -Sent.plist $$@
@@ -116,9 +116,9 @@ endef
$(eval $(call define_test,td-simple-arm,td-simple,$(CC) -std=c11 -DHDR='"arm/dis-arm.inc.h"' -Dxdis=dis_arm -DFORCE_TARGET_arm))
$(eval $(call define_test,td-simple-thumb,td-simple,$(CC) -std=c11 -DHDR='"arm/dis-thumb.inc.h"' -Dxdis=dis_thumb -DFORCE_TARGET_arm))
$(eval $(call define_test,td-simple-thumb2,td-simple,$(CC) -std=c11 -DHDR='"arm/dis-thumb2.inc.h"' -Dxdis=dis_thumb2 -DFORCE_TARGET_arm))
-$(eval $(call define_test,td-simple-arm64,td-simple,$(CC) -std=c11 -DHDR='"arm64/dis-arm64.inc.h"' -Dxdis=dis -DFORCE_TARGET_arm64))
-$(eval $(call define_test,td-simple-i386,td-simple,$(CC) -std=c11 -DHDR='"x86/dis-x86.inc.h"' -Dxdis=dis -DFORCE_TARGET_i386))
-$(eval $(call define_test,td-simple-x86-64,td-simple,$(CC) -std=c11 -DHDR='"x86/dis-x86.inc.h"' -Dxdis=dis -DFORCE_TARGET_x86_64))
+$(eval $(call define_test,td-simple-arm64,td-simple,$(CC) -std=c11 -DHDR='"arm64/dis-main.inc.h"' -Dxdis=dis -DFORCE_TARGET_arm64))
+$(eval $(call define_test,td-simple-i386,td-simple,$(CC) -std=c11 -DHDR='"x86/dis-main.inc.h"' -Dxdis=dis -DFORCE_TARGET_i386))
+$(eval $(call define_test,td-simple-x86-64,td-simple,$(CC) -std=c11 -DHDR='"x86/dis-main.inc.h"' -Dxdis=dis -DFORCE_TARGET_x86_64))
$(eval $(call define_test,dis-arm,dis,$(CC) -std=c11 -DFORCE_TARGET_arm))
$(eval $(call define_test,dis-arm64,dis,$(CC) -std=c11 -DFORCE_TARGET_arm64))
$(eval $(call define_test,jump-dis-arm,jump-dis,$(CC) -std=c11 -DFORCE_TARGET_arm -O0))
diff --git a/lib/arm/arch-dis.h b/lib/arm/arch-dis.h
new file mode 100644
index 0000000..c64ff2e
--- /dev/null
+++ b/lib/arm/arch-dis.h
@@ -0,0 +1,60 @@
+#pragma once
+#define MIN_INSN_SIZE 2
+/* each input instruction might turn into:
+ * - 2 bytes for Bcc, if in IT
+ * then ONE of:
+ * - 2/4 bytes for just the instruction
+ * - 2+8 bytes for branch (which in *valid* code rules out IT but whatever)
+ * - up to 7 4-byte insns for pcrel (if dest=pc, and while these can be subject
+ * to IT, there can only reasonably be two per block, and if there are both
+ * then that's an unconditional exit - but we don't enforce any of this
+ * currently)
+ * - up to 7 4-byte insns for similar moves to PC that fall under 'data'
+ * the maximum number of possible inputs is 4, plus 4 extras if the last one
+ * was an IT (but in that case it can't be one of the above cases)
+ * while this looks huge, it's overly conservative and doesn't matter much,
+ * since only the actually used space will be taken up in the final output
+ */
+#define TD_MAX_REWRITTEN_SIZE (7*4*7 + 4) /* 196 */
+
+struct arch_pcrel_info {
+ unsigned reg;
+ enum pcrel_load_mode lm;
+};
+
+struct arch_dis_ctx {
+ /* thumb? */
+ bool pc_low_bit;
+ /* if thumb, IT cond for the next 5 instructions
+ * (5 because we still advance after IT) */
+ uint8_t it_conds[5];
+ /* for transform_dis - did we add space for a Bccrel? */
+ uint8_t bccrel_bits;
+ void *bccrel_p;
+};
+
+static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) {
+ ctx->pc_low_bit = false;
+ ctx->bccrel_p = NULL;
+ memset(ctx->it_conds, 0xe, 5);
+}
+
+static inline void advance_it_cond(struct arch_dis_ctx *ctx) {
+ ctx->it_conds[0] = ctx->it_conds[1];
+ ctx->it_conds[1] = ctx->it_conds[2];
+ ctx->it_conds[2] = ctx->it_conds[3];
+ ctx->it_conds[3] = ctx->it_conds[4];
+ ctx->it_conds[4] = 0xe;
+}
+
+#define DFLAG_IS_LDRD_STRD (1 << 16)
+
+/* Types of conditionals for 'branch' */
+/* a regular old branch-with-condition */
+#define CC_ARMCC (CC_CONDITIONAL | 0x400)
+/* already in an IT block - in transform_dis this will be rewritten to a branch
+ * anyway, so it can be treated as unconditional; in jump_dis we have to know
+ * to keep going */
+#define CC_ALREADY_IN_IT (CC_CONDITIONAL | 0x800)
+/* CBZ/CBNZ is rewritten */
+#define CC_CBXZ (CC_CONDITIONAL | 0xc00)
diff --git a/lib/arm/transform-dis-arm-multi.inc.h b/lib/arm/arch-transform-dis.inc.h
index 6e91ff5..6e91ff5 100644
--- a/lib/arm/transform-dis-arm-multi.inc.h
+++ b/lib/arm/arch-transform-dis.inc.h
diff --git a/lib/arm/dis-arm-multi.inc.h b/lib/arm/dis-main.inc.h
index bf2767e..bf2767e 100644
--- a/lib/arm/dis-arm-multi.inc.h
+++ b/lib/arm/dis-main.inc.h
diff --git a/lib/arm/misc.h b/lib/arm/misc.h
index ef11a05..c18367d 100644
--- a/lib/arm/misc.h
+++ b/lib/arm/misc.h
@@ -1,59 +1,3 @@
#pragma once
+#define TARGET_POINTER_SIZE 4
#define TARGET_DIS_SUPPORTED
-#define TARGET_DIS_HEADER "arm/dis-arm-multi.inc.h"
-#define TARGET_JUMP_PATCH_HDR "arm/jump-patch.h"
-#define TARGET_TRANSFORM_DIS_HEADER "arm/transform-dis-arm-multi.inc.h"
-#define MIN_INSN_SIZE 2
-/* each input instruction might turn into:
- * - 2 bytes for Bcc, if in IT
- * then ONE of:
- * - 2/4 bytes for just the instruction
- * - 2+8 bytes for branch (which in *valid* code rules out IT but whatever)
- * - up to 7 4-byte insns for pcrel (if dest=pc, and while these can be subject
- * to IT, there can only reasonably be two per block, and if there are both
- * then that's an unconditional exit - but we don't enforce any of this
- * currently)
- * - up to 7 4-byte insns for similar moves to PC that fall under 'data'
- * the maximum number of possible inputs is 4, plus 4 extras if the last one
- * was an IT (but in that case it can't be one of the above cases)
- * while this looks huge, it's overly conservative and doesn't matter much,
- * since only the actually used space will be taken up in the final output
- */
-#define TD_MAX_REWRITTEN_SIZE (7*4*7 + 4) /* 196 */
-
-struct arch_dis_ctx {
- /* thumb? */
- bool pc_low_bit;
- /* if thumb, IT cond for the next 5 instructions
- * (5 because we still advance after IT) */
- uint8_t it_conds[5];
- /* for transform_dis - did we add space for a Bccrel? */
- uint8_t bccrel_bits;
- void *bccrel_p;
-};
-
-static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) {
- ctx->pc_low_bit = false;
- ctx->bccrel_p = NULL;
- memset(ctx->it_conds, 0xe, 5);
-}
-
-static inline void advance_it_cond(struct arch_dis_ctx *ctx) {
- ctx->it_conds[0] = ctx->it_conds[1];
- ctx->it_conds[1] = ctx->it_conds[2];
- ctx->it_conds[2] = ctx->it_conds[3];
- ctx->it_conds[3] = ctx->it_conds[4];
- ctx->it_conds[4] = 0xe;
-}
-
-#define DFLAG_IS_LDRD_STRD (1 << 16)
-
-/* Types of conditionals for 'branch' */
-/* a regular old branch-with-condition */
-#define CC_ARMCC (CC_CONDITIONAL | 0x400)
-/* already in an IT block - in transform_dis this will be rewritten to a branch
- * anyway, so it can be treated as unconditional; in jump_dis we have to know
- * to keep going */
-#define CC_ALREADY_IN_IT (CC_CONDITIONAL | 0x800)
-/* CBZ/CBNZ is rewritten */
-#define CC_CBXZ (CC_CONDITIONAL | 0xc00)
diff --git a/lib/arm64/arch-dis.h b/lib/arm64/arch-dis.h
new file mode 100644
index 0000000..f91328b
--- /dev/null
+++ b/lib/arm64/arch-dis.h
@@ -0,0 +1,37 @@
+#pragma once
+#define MIN_INSN_SIZE 4
+#define TD_MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */
+
+struct arch_pcrel_info {
+ unsigned reg;
+ enum pcrel_load_mode lm;
+};
+
+struct arch_dis_ctx {
+ /* For transform_dis only - used to get temporary registers. We assume
+ * that we can use any caller-saved or IP register which was not written,
+ * so r9-r18.
+ * This is a massive overestimate: we just OR in each instruction's bits
+ * 4:0 (Rd for data, Rt for loads, most common), 14:10 (Rt2 for load-pair
+ * instructions), and 20:16 (Rs for store-exclusive insturctions). It
+ * would be easy to restrict the latter two to the few instructions that
+ * actually use them, but with 10 available registers, and a patch of at
+ * most 3 instructions (and none of the instructions that require a temp
+ * use Rt2/Rs or could read their Rd, so the third doesn't count), we won't
+ * run out even with the dumbest possible thing. */
+ uint32_t regs_possibly_written;
+};
+
+static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) {
+ ctx->regs_possibly_written = 0;
+}
+
+static inline int arm64_get_unwritten_temp_reg(struct arch_dis_ctx *ctx) {
+ uint32_t avail = ~ctx->regs_possibly_written & ((1 << 19) - (1 << 9));
+ if (!avail)
+ __builtin_abort();
+ return 31 - __builtin_clz(avail);
+}
+
+#define CC_ARMCC (CC_CONDITIONAL | 0x400)
+#define CC_XBXZ (CC_CONDITIONAL | 0x800)
diff --git a/lib/arm64/transform-dis-arm64.inc.h b/lib/arm64/arch-transform-dis.inc.h
index 792b835..d8f831d 100644
--- a/lib/arm64/transform-dis-arm64.inc.h
+++ b/lib/arm64/arch-transform-dis.inc.h
@@ -1,7 +1,7 @@
#include "arm64/assemble.h"
static NOINLINE UNUSED
-void transform_dis_pcrel(struct transform_dis_ctx *ctx, uintptr_t dpc, unsigned reg,
+void transform_dis_pcrel(struct transform_dis_ctx *ctx, uint_tptr dpc, unsigned reg,
enum pcrel_load_mode load_mode) {
ctx->write_newop_here = NULL;
void **codep = ctx->rewritten_ptr_ptr;
@@ -16,7 +16,7 @@ void transform_dis_pcrel(struct transform_dis_ctx *ctx, uintptr_t dpc, unsigned
}
static NOINLINE UNUSED
-void transform_dis_branch(struct transform_dis_ctx *ctx, uintptr_t dpc, int cc) {
+void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc, int cc) {
/* TODO fix BL */
#ifdef TRANSFORM_DIS_VERBOSE
printf("transform_dis (%p): branch => %p\n", (void *) ctx->pc, (void *) dpc);
diff --git a/lib/arm64/dis-arm64.inc.h b/lib/arm64/dis-main.inc.h
index 04349f2..04349f2 100644
--- a/lib/arm64/dis-arm64.inc.h
+++ b/lib/arm64/dis-main.inc.h
diff --git a/lib/arm64/misc.h b/lib/arm64/misc.h
index f5a6154..066e9d5 100644
--- a/lib/arm64/misc.h
+++ b/lib/arm64/misc.h
@@ -1,36 +1,3 @@
#pragma once
+#define TARGET_POINTER_SIZE 8
#define TARGET_DIS_SUPPORTED
-#define TARGET_DIS_HEADER "arm64/dis-arm64.inc.h"
-#define TARGET_JUMP_PATCH_HDR "arm64/jump-patch.h"
-#define TARGET_TRANSFORM_DIS_HEADER "arm64/transform-dis-arm64.inc.h"
-#define MIN_INSN_SIZE 4
-#define TD_MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */
-
-struct arch_dis_ctx {
- /* For transform_dis only - used to get temporary registers. We assume
- * that we can use any caller-saved or IP register which was not written,
- * so r9-r18.
- * This is a massive overestimate: we just OR in each instruction's bits
- * 4:0 (Rd for data, Rt for loads, most common), 14:10 (Rt2 for load-pair
- * instructions), and 20:16 (Rs for store-exclusive insturctions). It
- * would be easy to restrict the latter two to the few instructions that
- * actually use them, but with 10 available registers, and a patch of at
- * most 3 instructions (and none of the instructions that require a temp
- * use Rt2/Rs or could read their Rd, so the third doesn't count), we won't
- * run out even with the dumbest possible thing. */
- uint32_t regs_possibly_written;
-};
-
-static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) {
- ctx->regs_possibly_written = 0;
-}
-
-static inline int arm64_get_unwritten_temp_reg(struct arch_dis_ctx *ctx) {
- uint32_t avail = ~ctx->regs_possibly_written & ((1 << 19) - (1 << 9));
- if (!avail)
- __builtin_abort();
- return 31 - __builtin_clz(avail);
-}
-
-#define CC_ARMCC (CC_CONDITIONAL | 0x400)
-#define CC_XBXZ (CC_CONDITIONAL | 0x800)
diff --git a/lib/dis.h b/lib/dis.h
index 6b189e0..7455749 100644
--- a/lib/dis.h
+++ b/lib/dis.h
@@ -111,6 +111,11 @@ static const unsigned null_op = -0x100;
#error "no disassembler for the target architecture yet"
#endif
+static inline void op64(void **codep, uint64_t op) {
+ *(uint64_t *) *codep = op;
+ *codep += 8;
+}
+
static inline void op32(void **codep, uint32_t op) {
*(uint32_t *) *codep = op;
*codep += 4;
@@ -121,5 +126,26 @@ static inline void op16(void **codep, uint16_t op) {
*codep += 2;
}
+static inline void op8(void **codep, uint8_t op) {
+ *(uint8_t *) *codep = op;
+ (*codep)++;
+}
+
#define CC_CONDITIONAL 0x100
#define CC_CALL 0x200
+
+struct dis_ctx_base {
+ uint_tptr pc;
+ const void *ptr;
+#if defined(TARGET_x86_64) || defined(TARGET_i386)
+ uint8_t newop[32];
+#else
+ uint8_t newop[4];
+ uint32_t op;
+#endif
+ uint32_t newval[4];
+ bool modify;
+ int op_size, newop_size;
+};
+
+#include stringify(TARGET_DIR/arch-dis.h)
diff --git a/lib/hook-functions.c b/lib/hook-functions.c
index e0516cb..953683b 100644
--- a/lib/hook-functions.c
+++ b/lib/hook-functions.c
@@ -5,7 +5,7 @@
#include "transform-dis.h"
#include "execmem.h"
#include "stop-other-threads.h"
-#include TARGET_JUMP_PATCH_HDR
+#include stringify(TARGET_DIR/jump-patch.h)
struct hook_internal {
int offset_by_pcdiff[MAX_JUMP_PATCH_SIZE + 1];
@@ -168,7 +168,7 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks,
&hi->trampoline_page, arch)))
goto end;
- uintptr_t pc_patch_end = pc_patch_start + patch_size;
+ uint_tptr pc_patch_end = pc_patch_start + patch_size;
/* Generate the rewritten start of the function for the outro
* trampoline (complaining if any bad instructions are found)
* (on arm64, this modifies regs_possibly_written, which is used by the
diff --git a/lib/jump-dis.c b/lib/jump-dis.c
index 3e29bf7..528cfc2 100644
--- a/lib/jump-dis.c
+++ b/lib/jump-dis.c
@@ -1,5 +1,6 @@
#include "substitute-internal.h"
#ifdef TARGET_DIS_SUPPORTED
+#define DIS_MAY_MODIFY 0
#include "dis.h"
#include <stdint.h>
#include <stdbool.h>
@@ -22,15 +23,14 @@ struct jump_dis_ctx {
bool bad_insn;
bool continue_after_this_insn;
- uintptr_t pc;
- uintptr_t pc_patch_start;
- uintptr_t pc_patch_end;
- unsigned op;
- const void *ptr;
- int op_size;
+ struct dis_ctx_base base;
+
+ uint_tptr pc_patch_start;
+ uint_tptr pc_patch_end;
+
uint8_t seen_mask[JUMP_ANALYSIS_MAX_INSNS / 8];
/* queue of instructions to visit */
- uintptr_t *queue;
+ uint_tptr *queue;
size_t queue_write_off;
size_t queue_read_off;
size_t queue_size;
@@ -43,12 +43,8 @@ struct jump_dis_ctx {
#define P(x) jump_dis_##x
#define tdis_ctx struct jump_dis_ctx *
-#define TDIS_CTX_MODIFY(ctx) 0
-#define TDIS_CTX_NEWVAL(ctx, n) 0
-#define TDIS_CTX_NEWOP(ctx) 0
-#define TDIS_CTX_SET_NEWOP(ctx, new) ((void) 0)
-static void jump_dis_add_to_queue(struct jump_dis_ctx *ctx, uintptr_t pc) {
+static void jump_dis_add_to_queue(struct jump_dis_ctx *ctx, uint_tptr pc) {
size_t diff = (pc - ctx->pc_patch_start) / MIN_INSN_SIZE;
if (diff >= JUMP_ANALYSIS_MAX_INSNS) {
#ifdef JUMP_DIS_VERBOSE
@@ -89,8 +85,8 @@ void jump_dis_data(UNUSED struct jump_dis_ctx *ctx,
}
static INLINE UNUSED
-void jump_dis_pcrel(struct jump_dis_ctx *ctx, uintptr_t dpc,
- UNUSED unsigned reg, UNUSED bool is_load) {
+void jump_dis_pcrel(struct jump_dis_ctx *ctx, uint_tptr dpc,
+ UNUSED struct arch_pcrel_info info) {
ctx->bad_insn = dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end;
}
@@ -100,7 +96,7 @@ void jump_dis_ret(struct jump_dis_ctx *ctx) {
}
static NOINLINE UNUSED
-void jump_dis_branch(struct jump_dis_ctx *ctx, uintptr_t dpc, bool conditional) {
+void jump_dis_branch(struct jump_dis_ctx *ctx, uint_tptr dpc, int cc) {
if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) {
ctx->bad_insn = true;
return;
@@ -109,7 +105,7 @@ void jump_dis_branch(struct jump_dis_ctx *ctx, uintptr_t dpc, bool conditional)
printf("jump-dis: enqueueing %llx\n", (unsigned long long) dpc);
#endif
jump_dis_add_to_queue(ctx, dpc);
- ctx->continue_after_this_insn = conditional;
+ ctx->continue_after_this_insn = cc & (CC_CONDITIONAL | CC_CALL);
}
static INLINE UNUSED
@@ -127,25 +123,25 @@ void jump_dis_thumb_it(UNUSED struct jump_dis_ctx *ctx) {
static void jump_dis_dis(struct jump_dis_ctx *ctx);
-bool jump_dis_main(void *code_ptr, uintptr_t pc_patch_start, uintptr_t pc_patch_end,
+bool jump_dis_main(void *code_ptr, uint_tptr pc_patch_start, uint_tptr pc_patch_end,
struct arch_dis_ctx initial_dis_ctx) {
bool ret;
struct jump_dis_ctx ctx;
memset(&ctx, 0, sizeof(ctx));
ctx.pc_patch_start = pc_patch_start;
ctx.pc_patch_end = pc_patch_end;
- ctx.pc = pc_patch_end;
+ ctx.base.pc = pc_patch_end;
ctx.arch = initial_dis_ctx;
while (1) {
ctx.bad_insn = false;
ctx.continue_after_this_insn = true;
- ctx.ptr = code_ptr + (ctx.pc - pc_patch_start);
+ ctx.base.ptr = code_ptr + (ctx.base.pc - pc_patch_start);
jump_dis_dis(&ctx);
#ifdef JUMP_DIS_VERBOSE
printf("jump-dis: pc=%llx op=%08x size=%x bad=%d continue_after=%d\n",
- (unsigned long long) ctx.pc,
- ctx.op,
- ctx.op_size,
+ (unsigned long long) ctx.base.pc,
+ ctx.base.op,
+ ctx.base.op_size,
ctx.bad_insn,
ctx.continue_after_this_insn);
#endif
@@ -154,12 +150,12 @@ bool jump_dis_main(void *code_ptr, uintptr_t pc_patch_start, uintptr_t pc_patch_
goto fail;
}
if (ctx.continue_after_this_insn)
- jump_dis_add_to_queue(&ctx, ctx.pc + ctx.op_size);
+ jump_dis_add_to_queue(&ctx, ctx.base.pc + ctx.base.op_size);
/* get next address */
if (ctx.queue_read_off == ctx.queue_write_off)
break;
- ctx.pc = ctx.queue[ctx.queue_read_off];
+ ctx.base.pc = ctx.queue[ctx.queue_read_off];
ctx.queue_read_off = (ctx.queue_read_off + 1) % ctx.queue_size;
ctx.queue_count--;
}
@@ -170,5 +166,5 @@ fail:
return ret;
}
-#include TARGET_DIS_HEADER
+#include stringify(TARGET_DIR/dis-main.inc.h)
#endif /* TARGET_DIS_SUPPORTED */
diff --git a/lib/jump-dis.h b/lib/jump-dis.h
index 575a84d..fccd1a6 100644
--- a/lib/jump-dis.h
+++ b/lib/jump-dis.h
@@ -1,6 +1,7 @@
#pragma once
#include <stdint.h>
#include <stdbool.h>
+#include stringify(TARGET_DIR/arch-dis.h)
bool jump_dis_main(void *code_ptr, uintptr_t pc_patch_start, uintptr_t pc_patch_end,
struct arch_dis_ctx initial_dis_ctx);
diff --git a/lib/substitute-internal.h b/lib/substitute-internal.h
index 17ad6ec..9a91516 100644
--- a/lib/substitute-internal.h
+++ b/lib/substitute-internal.h
@@ -49,12 +49,22 @@ typedef struct section section_x;
#endif
#if defined(TARGET_arm)
- #include "arm/misc.h"
+ #define TARGET_DIR arm
#elif defined(TARGET_arm64)
- #include "arm64/misc.h"
+ #define TARGET_DIR arm64
#elif defined(TARGET_x86_64) || defined(TARGET_i386)
- #include "x86/misc.h"
+ #define TARGET_DIR x86
#endif
+#define stringify_(x) #x
+#define stringify(x) stringify_(x)
+#include stringify(TARGET_DIR/misc.h)
+
+#if TARGET_POINTER_SIZE == 8
+ typedef uint64_t uint_tptr;
+#elif TARGET_POINTER_SIZE == 4
+ typedef uint32_t uint_tptr;
+#endif
+
#ifdef __APPLE__
/* This could graduate to a public API but is not yet. Needs more
diff --git a/lib/transform-dis.c b/lib/transform-dis.c
index 867a981..8f89fb3 100644
--- a/lib/transform-dis.c
+++ b/lib/transform-dis.c
@@ -1,5 +1,6 @@
#include "substitute-internal.h"
#ifdef TARGET_DIS_SUPPORTED
+#define DIS_MAY_MODIFY 1
#include "substitute.h"
#include "dis.h"
@@ -13,21 +14,15 @@ struct transform_dis_ctx {
/* outputs */
bool modify;
int err;
+ struct dis_ctx_base base;
- uintptr_t pc_patch_start;
+ uint_tptr pc_patch_start;
/* this is only tentative - it will be updated to include parts of
* instructions poking out, and instructions forced to be transformed by IT */
- uintptr_t pc_patch_end;
- uintptr_t pc;
- int op_size;
- unsigned op;
- unsigned newop;
- unsigned newval[4];
-
+ uint_tptr pc_patch_end;
/* for IT - eww */
bool force_keep_transforming;
- const void *ptr;
void **rewritten_ptr_ptr;
void *write_newop_here;
@@ -35,10 +30,6 @@ struct transform_dis_ctx {
};
#define tdis_ctx struct transform_dis_ctx *
-#define TDIS_CTX_MODIFY(ctx) ((ctx)->modify)
-#define TDIS_CTX_NEWVAL(ctx, n) ((ctx)->newval[n])
-#define TDIS_CTX_NEWOP(ctx) ((ctx)->newop)
-#define TDIS_CTX_SET_NEWOP(ctx, new) ((ctx)->newop = (new))
/* largely similar to jump_dis */
@@ -46,14 +37,14 @@ static INLINE UNUSED
void transform_dis_ret(struct transform_dis_ctx *ctx) {
/* ret is okay if it's at the end of the required patch (past the original
* patch size is good too) */
- if (ctx->pc + ctx->op_size < ctx->pc_patch_end)
+ if (ctx->base.pc + ctx->base.op_size < ctx->pc_patch_end)
ctx->err = SUBSTITUTE_ERR_FUNC_TOO_SHORT;
}
static INLINE UNUSED
void transform_dis_unidentified(UNUSED struct transform_dis_ctx *ctx) {
#ifdef TRANSFORM_DIS_VERBOSE
- printf("transform_dis (%p): unidentified\n", (void *) ctx->pc);
+ printf("transform_dis (%p): unidentified\n", (void *) ctx->base.pc);
#endif
/* this isn't exhaustive, so unidentified is fine */
}
@@ -74,15 +65,15 @@ static void transform_dis_post_dis(struct transform_dis_ctx *ctx);
int transform_dis_main(const void *restrict code_ptr,
void **restrict rewritten_ptr_ptr,
- uintptr_t pc_patch_start,
- uintptr_t *pc_patch_end_p,
+ uint_tptr pc_patch_start,
+ uint_tptr *pc_patch_end_p,
struct arch_dis_ctx *arch_ctx_p,
int *offset_by_pcdiff) {
struct transform_dis_ctx ctx;
memset(&ctx, 0, sizeof(ctx));
ctx.pc_patch_start = pc_patch_start;
ctx.pc_patch_end = *pc_patch_end_p;
- ctx.pc = pc_patch_start;
+ ctx.base.pc = pc_patch_start;
ctx.arch = *arch_ctx_p;
/* data is written to rewritten both by this function directly and, in case
* additional scaffolding is needed, by arch-specific transform_dis_* */
@@ -90,10 +81,10 @@ int transform_dis_main(const void *restrict code_ptr,
void *rewritten_start = *rewritten_ptr_ptr;
int written_pcdiff = 0;
offset_by_pcdiff[written_pcdiff++] = 0;
- while (ctx.pc < ctx.pc_patch_end && !ctx.force_keep_transforming) {
- ctx.modify = false;
+ while (ctx.base.pc < ctx.pc_patch_end && !ctx.force_keep_transforming) {
+ ctx.base.modify = false;
ctx.err = 0;
- ctx.ptr = code_ptr + (ctx.pc - pc_patch_start);
+ ctx.base.ptr = code_ptr + (ctx.base.pc - pc_patch_start);
transform_dis_pre_dis(&ctx);
@@ -105,33 +96,29 @@ int transform_dis_main(const void *restrict code_ptr,
if (ctx.err)
return ctx.err;
if (ctx.write_newop_here != NULL) {
- if (!ctx.modify)
- ctx.newop = ctx.op;
- if (ctx.op_size == 4)
- *(uint32_t *) ctx.write_newop_here = ctx.newop;
- else if (ctx.op_size == 2)
- *(uint16_t *) ctx.write_newop_here = ctx.newop;
+ if (ctx.base.modify)
+ memcpy(ctx.write_newop_here, ctx.base.newop, ctx.base.newop_size);
else
- __builtin_abort();
+ memcpy(ctx.write_newop_here, ctx.base.ptr, ctx.base.op_size);
if (*rewritten_ptr_ptr == rewritten_ptr)
- *rewritten_ptr_ptr += ctx.op_size;
+ *rewritten_ptr_ptr += ctx.base.op_size;
}
- ctx.pc += ctx.op_size;
+ ctx.base.pc += ctx.base.op_size;
transform_dis_post_dis(&ctx);
- int pcdiff = ctx.pc - ctx.pc_patch_start;
+ int pcdiff = ctx.base.pc - ctx.pc_patch_start;
while (written_pcdiff < pcdiff)
offset_by_pcdiff[written_pcdiff++] = -1;
offset_by_pcdiff[written_pcdiff++] =
(int) (*rewritten_ptr_ptr - rewritten_start);
}
- *pc_patch_end_p = ctx.pc;
+ *pc_patch_end_p = ctx.base.pc;
*arch_ctx_p = ctx.arch;
return SUBSTITUTE_OK;
}
-#include TARGET_TRANSFORM_DIS_HEADER
-#include TARGET_DIS_HEADER
+#include stringify(TARGET_DIR/arch-transform-dis.inc.h)
+#include stringify(TARGET_DIR/dis-main.inc.h)
#endif /* TARGET_DIS_SUPPORTED */
diff --git a/lib/transform-dis.h b/lib/transform-dis.h
index 70fe57a..c1de937 100644
--- a/lib/transform-dis.h
+++ b/lib/transform-dis.h
@@ -1,10 +1,11 @@
#pragma once
#include <stdint.h>
#include <stdbool.h>
+#include stringify(TARGET_DIR/arch-dis.h)
int transform_dis_main(const void *restrict code_ptr,
void **restrict rewritten_ptr_ptr,
- uintptr_t pc_patch_start,
- uintptr_t *pc_patch_end_p,
+ uint_tptr pc_patch_start,
+ uint_tptr *pc_patch_end_p,
struct arch_dis_ctx *arch_ctx_p,
int *offset_by_pcdiff);
diff --git a/lib/x86/arch-dis.h b/lib/x86/arch-dis.h
new file mode 100644
index 0000000..6447f38
--- /dev/null
+++ b/lib/x86/arch-dis.h
@@ -0,0 +1,10 @@
+#pragma once
+#define MIN_INSN_SIZE 1
+#define TD_MAX_REWRITTEN_SIZE 100 /* XXX */
+
+struct arch_pcrel_info {
+ int reg;
+};
+
+struct arch_dis_ctx {};
+static inline void arch_dis_ctx_init(UNUSED struct arch_dis_ctx *ctx) {}
diff --git a/lib/x86/arch-transform-dis.inc.h b/lib/x86/arch-transform-dis.inc.h
new file mode 100644
index 0000000..bb86cf9
--- /dev/null
+++ b/lib/x86/arch-transform-dis.inc.h
@@ -0,0 +1,58 @@
+/* Pretty trivial, but in its own file to match the other architectures. */
+#include "x86/jump-patch.h"
+
+static void transform_dis_pcrel(struct transform_dis_ctx *ctx, uint64_t dpc,
+ struct arch_pcrel_info info) {
+ /* push %reg; mov $dpc, %reg; <orig but with reg instead>; pop %reg */
+ /* reg is rcx, or rax if the instruction might be using rcx. */
+ int rax = info.reg == 1;
+ void *code = *ctx->rewritten_ptr_ptr;
+ /* push */
+ op8(&code, rax ? 0x50 : 0x51);
+ /* mov */
+#ifdef TARGET_x86_64
+ op8(&code, 0x48);
+ op8(&code, rax ? 0xb8 : 0xb9);
+ op64(&code, dpc);
+#else
+ op8(&code, rax ? 0xb8 : 0xb9);
+ op32(&code, dpc);
+#endif
+ ctx->write_newop_here = code;
+ code += ctx->base.op_size;
+ /* pop */
+ op8(&code, rax ? 0x58 : 0x59);
+ *ctx->rewritten_ptr_ptr = code;
+ ctx->base.newop[0] = rax ? 0 : 1;
+ ctx->base.modify = true;
+}
+
+static void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc,
+ int cc) {
+ if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) {
+ ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START;
+ return;
+ }
+ void *code = *ctx->rewritten_ptr_ptr;
+
+ ctx->write_newop_here = code;
+ code += ctx->base.op_size;
+
+ struct arch_dis_ctx arch;
+ uintptr_t source = (uintptr_t) code + 2;
+ int size = jump_patch_size(source, dpc, arch, true);
+ /* if not taken, jmp past the big jump - this is a bit suboptimal but not that bad */
+ op8(&code, 0xeb);
+ op8(&code, size);
+ make_jump_patch(&code, source, dpc, arch);
+
+ *ctx->rewritten_ptr_ptr = code;
+ ctx->base.newop[0] = 2;
+ ctx->base.modify = true;
+
+ if (!cc)
+ transform_dis_ret(ctx);
+}
+
+static void transform_dis_pre_dis(UNUSED struct transform_dis_ctx *ctx) {}
+static void transform_dis_post_dis(UNUSED struct transform_dis_ctx *ctx) {}
diff --git a/lib/x86/dis-x86.inc.h b/lib/x86/dis-main.inc.h
index e0259ea..45a0947 100644
--- a/lib/x86/dis-x86.inc.h
+++ b/lib/x86/dis-main.inc.h
@@ -41,7 +41,8 @@ VEX last byte 1:0: {none, 66, f3, f2}
#define I_JMP 0x40 /* execution does not continue after this */
#define I_SPEC 0x60 /* special case */
#define I_TYPE_MASK 0x60
-#define I_JIMM (0x80|I_JMP) /* imm is jump offset */
+#define I_JIMM_ONLY 0x80 /* imm is jump offset */
+#define I_JIMM (0x80|I_JMP)
#define I_BAD 0x80
#ifdef TARGET_x86_64
#define if64(_64, _32) _64
@@ -72,7 +73,7 @@ static const uint8_t onebyte_bits[] = {
/*D0*/ REP4(I_MODA), i64(I_8), i64(I_8), I_BAD, 0, REP8(I_SPEC),
/* don't treat ljmp as a jump for now */
/*E0*/ REP4(I_8|I_JIMM), REP4(I_8),
- /*E8*/ (I_z|I_JIMM)&~I_JMP, I_z|I_JIMM, i64(I_p), I_8|I_JIMM, 0, 0, 0, 0,
+ /*E8*/ I_z|I_JIMM_ONLY, I_z|I_JIMM, i64(I_p), I_8|I_JIMM, 0, 0, 0, 0,
/*F0*/ I_PFX, I_BAD, I_PFX, I_PFX, 0, 0, I_MODA, I_MODA,
/*F8*/ 0, 0, 0, 0, 0, 0, I_MODA, I_SPEC,
};
@@ -111,8 +112,8 @@ static const uint8_t _0f_bits[] = {
_Static_assert(sizeof(_0f_bits) == 256, "_0f_bits");
static void P(dis)(tdis_ctx ctx) {
- const uint8_t *orig = ctx->ptr;
- const uint8_t *ptr = ctx->ptr;
+ const uint8_t *orig = ctx->base.ptr;
+ const uint8_t *ptr = ctx->base.ptr;
int opnd_size = 4;
int mod, rm = 0;
@@ -212,9 +213,10 @@ got_bits: UNUSED
}
}
UNUSED int modrm_off = ptr - orig;
+ UNUSED uint8_t modrm;
if (bits & I_MOD) {
modrm: UNUSED;
- uint8_t modrm = *ptr++;
+ modrm = *ptr++;
mod = modrm >> 6;
rm |= modrm & 7;
if (rm == 4) {
@@ -249,11 +251,11 @@ got_bits: UNUSED
__builtin_abort();
ptr += imm_size;
- ctx->ptr = ptr;
- ctx->op_size = ptr - orig;
+ ctx->base.ptr = ptr;
+ ctx->base.newop_size = ctx->base.op_size = ptr - orig;
/* printf("bits=%x\n", bits); */
- if ((bits & I_JIMM) == I_JIMM) {
+ if (bits & I_JIMM_ONLY) {
int32_t imm;
const void *imm_ptr = orig + imm_off;
switch (imm_size) {
@@ -265,13 +267,13 @@ got_bits: UNUSED
bool cond = (byte1 & 0xf0) != 0xe0;
bool call = !(bits & I_JMP);
- P(branch)(ctx, ctx->pc + ctx->op_size + imm,
+ P(branch)(ctx, ctx->base.pc + ctx->base.op_size + imm,
cond * CC_CONDITIONAL | call * CC_CALL);
- if (TDIS_CTX_MODIFY(ctx)) {
+ if (DIS_MAY_MODIFY && ctx->base.modify) {
/* newval[0] should be the new immediate */
- int32_t new_imm = TDIS_CTX_NEWVAL(ctx, 0);
- uint8_t *new_op = TDIS_CTX_NEWOP(ctx);
- memcpy(new_op, orig, ctx->op_size);
+ int32_t new_imm = ctx->base.newval[0];
+ uint8_t *new_op = ctx->base.newop;
+ memcpy(new_op, orig, ctx->base.op_size);
uint8_t *new_imm_ptr = new_op + imm_off;
switch (imm_size) {
case 1: *(int8_t *) new_imm_ptr = new_imm; break;
@@ -284,17 +286,22 @@ got_bits: UNUSED
int32_t disp = *(int32_t *) (orig + modrm_off + 1);
/* unlike ARM, we can always switch to non-pcrel without making the
* instruction from scratch, so we don't have 'reg' and 'lm' */
- P(pcrel)(ctx, ctx->pc + ctx->op_size + disp);
- if (TDIS_CTX_MODIFY(ctx)) {
- uint8_t *new_op = TDIS_CTX_NEWOP(ctx);
- memcpy(new_op, orig, ctx->op_size);
+ struct arch_pcrel_info info = {modrm >> 3 & 7};
+ P(pcrel)(ctx, ctx->base.pc + ctx->base.op_size + disp, info);
+ if (DIS_MAY_MODIFY && ctx->base.modify) {
+ uint8_t *new_op = ctx->base.newop;
+ memcpy(new_op, orig, ctx->base.op_size);
/* newval[0] should be the new register, which should be one that
* fits in r/m directly since that's all I need;
- * newval[1] should be the new displacement */
+ * displacement is removed */
uint8_t *new_modrm_ptr = new_op + modrm_off;
- *new_modrm_ptr = (*new_modrm_ptr & ~0xc7) | 4 << 6 | TDIS_CTX_NEWVAL(ctx, 0);
- *(uint32_t *) (new_modrm_ptr + 1) = TDIS_CTX_NEWVAL(ctx, 1);
+ *new_modrm_ptr = (*new_modrm_ptr & ~0xc7) |
+ 0 << 6 |
+ ctx->base.newval[0];
+ memmove(new_modrm_ptr + 1, new_modrm_ptr + 5,
+ ctx->base.op_size - modrm_off - 1);
+ ctx->base.newop_size -= 4;
}
#endif
} else if ((bits & I_TYPE_MASK) == I_JMP) {
diff --git a/lib/x86/jump-patch.h b/lib/x86/jump-patch.h
index efd4825..4c0172d 100644
--- a/lib/x86/jump-patch.h
+++ b/lib/x86/jump-patch.h
@@ -1,5 +1,6 @@
#pragma once
#define MAX_JUMP_PATCH_SIZE 5
+#include "dis.h"
static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc,
UNUSED struct arch_dis_ctx arch,
@@ -12,21 +13,19 @@ static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc,
return force ? (2+4+8) : -1;
}
-static inline void make_jump_patch(void **codep, UNUSED uintptr_t pc,
- uintptr_t dpc,
+static inline void make_jump_patch(void **codep, uintptr_t pc, uintptr_t dpc,
UNUSED struct arch_dis_ctx arch) {
uintptr_t diff = pc - (dpc + 5);
- uint8_t *code = *codep;
+ void *code = *codep;
if (diff == (uintptr_t) (int32_t) diff) {
- *(uint8_t *) code = 0xe9;
- *(uint32_t *) (code + 1) = diff;
- *codep = code + 5;
+ op8(&code, 0xe9);
+ op32(&code, diff);
} else {
/* jmpq *(%rip) */
- *code++ = 0xff;
- *code++ = 0x25;
- *(uint32_t *) code = 0; code += 4;
- *(uint64_t *) code = dpc; code += 8;
- *codep = code;
+ op8(&code, 0xff);
+ op8(&code, 0x25);
+ op32(&code, 0);
+ op64(&code, dpc);
}
+ *codep = code;
}
diff --git a/lib/x86/misc.h b/lib/x86/misc.h
index c8eee19..e04f1f4 100644
--- a/lib/x86/misc.h
+++ b/lib/x86/misc.h
@@ -1,9 +1,7 @@
#pragma once
+#ifdef TARGET_x86_64
+#define TARGET_POINTER_SIZE 8
+#else
+#define TARGET_POINTER_SIZE 4
+#endif
#define TARGET_DIS_SUPPORTED
-#define TARGET_DIS_HEADER "x86/dis-x86.inc.h"
-#define TARGET_JUMP_PATCH_HDR "x86/jump-patch.h"
-#define MIN_INSN_SIZE 1
-#define TD_MAX_REWRITTEN_SIZE 100 /* XXX */
-
-struct arch_dis_ctx {};
-static inline void arch_dis_ctx_init(UNUSED struct arch_dis_ctx *ctx) {}
diff --git a/test/test-td-simple.c b/test/test-td-simple.c
index 4768177..6478b46 100644
--- a/test/test-td-simple.c
+++ b/test/test-td-simple.c
@@ -5,29 +5,16 @@
#include "dis.h"
typedef struct tc {
- uint32_t pc;
- const void *ptr;
-#if defined(TARGET_x86_64) || defined(TARGET_i386)
- uint8_t newop[16];
-#else
- uint32_t op;
- uint32_t newop;
-#endif
- uint32_t newval[4];
- bool modify;
- int op_size;
+ struct dis_ctx_base base;
struct arch_dis_ctx arch;
} *tdis_ctx;
#define P(x) P_##x
-#define TDIS_CTX_MODIFY(ctx) ((ctx)->modify)
-#define TDIS_CTX_NEWVAL(ctx, n) ((ctx)->newval[n])
-#define TDIS_CTX_NEWOP(ctx) ((ctx)->newop)
-#define TDIS_CTX_SET_NEWOP(ctx, new) ((ctx)->newop = (new))
-
+#define DIS_MAY_MODIFY 0
#if defined(TARGET_x86_64) || defined(TARGET_i386)
NOINLINE UNUSED
-static void P_pcrel(UNUSED struct tc *ctx, uint32_t dpc) {
+static void P_pcrel(UNUSED struct tc *ctx, uint32_t dpc,
+ UNUSED struct arch_pcrel_info info) {
printf("adr => %08x\n", dpc);
}
#else
@@ -47,8 +34,8 @@ static void P_data(UNUSED struct tc *ctx, unsigned o0, unsigned o1, unsigned o2,
}
NOINLINE UNUSED
static void P_pcrel(UNUSED struct tc *ctx, uint32_t dpc,
- unsigned reg, enum pcrel_load_mode lm) {
- printf("adr => %08x r%u lm:%d\n", dpc, reg, lm);
+ struct arch_pcrel_info info) {
+ printf("adr => %08x r%u lm:%d\n", dpc, info.reg, info.lm);
}
NOINLINE UNUSED
static void P_thumb_it(UNUSED struct tc *ctx) {
@@ -85,7 +72,7 @@ static void P_bad(UNUSED struct tc *ctx) {
int main(UNUSED int argc, char **argv) {
struct tc ctx;
- ctx.pc = 0xdead0000;
+ ctx.base.pc = 0xdead0000;
const char *op_str = argv[1];
#if defined(TARGET_x86_64) || defined(TARGET_i386)
uint8_t op[20] = {0};
@@ -106,18 +93,18 @@ int main(UNUSED int argc, char **argv) {
}
op[i/2] = byte;
}
- ctx.ptr = op;
- ctx.modify = false;
+ ctx.base.ptr = op;
+ ctx.base.modify = false;
P_(xdis)(&ctx);
- printf("(size=%d/%zd)\n", ctx.op_size, len / 2);
+ printf("(size=%d/%zd)\n", ctx.base.op_size, len / 2);
#else
uint32_t op = strtoll(op_str ? op_str : "deadbeef", NULL, 16);
- ctx.ptr = &op;
- ctx.newop = 0;
- ctx.modify = false;
+ ctx.base.ptr = &op;
+ ctx.base.newop = 0;
+ ctx.base.modify = false;
printf("%08x: ", op);
P_(xdis)(&ctx);
- printf("==> %x (size=%d)\n", ctx.newop, ctx.op_size);
+ printf("==> %x (size=%d)\n", ctx.base.newop, ctx.base.op_size);
#endif
}