aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYifan Lu2016-11-23 14:34:33 -0600
committerYifan Lu2016-11-23 14:34:33 -0600
commitbd5ebb7a0a4e102731de72832f3e12e9f54d541a (patch)
treebef4f637a270d3f7d2551ac74a89e6b4bad8adec
parentFixed proper encoding of PUSH (STMDB) as ARM manual was wrong... (diff)
parentavoid UB (diff)
downloadsubstitute-bd5ebb7a0a4e102731de72832f3e12e9f54d541a.tar.gz
Merge branch 'master' of https://github.com/comex/substitute
-rwxr-xr-xconfigure7
-rw-r--r--lib/arm/arch-transform-dis.inc.h30
-rw-r--r--lib/arm/assemble.h20
-rw-r--r--lib/arm/dis-arm.inc.h7
-rw-r--r--lib/darwin/read.c4
-rw-r--r--lib/substitute-internal.h4
-rw-r--r--script/mconfig.py4
-rwxr-xr-xscript/test-transform-dis.sh10
-rw-r--r--test/lol.c100
-rw-r--r--test/test-transform-dis.c2
-rw-r--r--test/transform-dis-cases-arm.S15
-rw-r--r--test/transform-dis-cases-arm64.S9
-rw-r--r--test/transform-dis-cases-i386.S10
-rw-r--r--test/transform-dis-cases.h4
14 files changed, 187 insertions, 39 deletions
diff --git a/configure b/configure
index 37b54ef..480d027 100755
--- a/configure
+++ b/configure
@@ -156,8 +156,13 @@ if settings.enable_tests:
('transform-dis-cases-arm64.o', 'transform-dis-cases-arm64.S', [], machs[3]),
('transform-dis-cases-i386.o', 'transform-dis-cases-i386.S', [], machs[1]),
('transform-dis-cases-x86_64.o', 'transform-dis-cases-x86_64.S', [], machs[0]),
+ ('transform-dis-cases-arm.o', 'transform-dis-cases-arm.S', [], machs[2]),
+ ('transform-dis-cases-thumb.o', 'transform-dis-cases-arm.S', ['-DTHUMB'], machs[2]),
]:
- mconfig.build_c_objs(emitter, mach, settings.specialize(override_obj_fn='(out)/'+ofile), ['(src)/test/'+sfile])
+ mconfig.build_c_objs(emitter, mach, settings.specialize(
+ override_obj_fn='(out)/'+ofile,
+ override_cflags=cflags+settings.host.cflags
+ ), ['(src)/test/'+sfile])
o_to_bin('(out)/'+ofile)
tests = [
diff --git a/lib/arm/arch-transform-dis.inc.h b/lib/arm/arch-transform-dis.inc.h
index 1b8e3d5..1e9579d 100644
--- a/lib/arm/arch-transform-dis.inc.h
+++ b/lib/arm/arch-transform-dis.inc.h
@@ -12,7 +12,8 @@ static struct assemble_ctx tdctx_to_actx(const struct transform_dis_ctx *ctx) {
}
return (struct assemble_ctx) {
ctx->rewritten_ptr_ptr,
- (uint_tptr) (uintptr_t) ctx->rewritten_ptr_ptr,
+ *ctx->rewritten_ptr_ptr,
+ (uint_tptr) (uintptr_t) *ctx->rewritten_ptr_ptr,
ctx->arch.pc_low_bit,
cond
};
@@ -20,8 +21,7 @@ static struct assemble_ctx tdctx_to_actx(const struct transform_dis_ctx *ctx) {
}
static int invert_arm_cond(int cc) {
- if (cc >= 0xe)
- __builtin_abort();
+ substitute_assert(cc < 0xe);
return cc ^ 1;
}
@@ -152,9 +152,10 @@ void transform_dis_pcrel(struct transform_dis_ctx *ctx, uint_tptr dpc,
static NOINLINE UNUSED
void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc, int cc) {
#ifdef TRANSFORM_DIS_VERBOSE
- printf("transform_dis (0x%llx): branch => 0x%llx\n",
+ printf("transform_dis (0x%llx): branch => 0x%llx cc=%x\n",
(unsigned long long) ctx->base.pc,
- (unsigned long long) dpc);
+ (unsigned long long) dpc,
+ cc);
#endif
/* The check in transform_dis_branch_top is correct under the simplifying
* assumption here that functions will not try to branch into the middle of
@@ -164,26 +165,24 @@ void transform_dis_branch(struct transform_dis_ctx *ctx, uint_tptr dpc, int cc)
transform_dis_branch_top(ctx, dpc, cc);
struct assemble_ctx actx = tdctx_to_actx(ctx);
ctx->write_newop_here = NULL;
+ int replacement_size = 8 + (actx.thumb ? 2 : 4);
if ((cc & CC_ARMCC) == CC_ARMCC) {
+ replacement_size += actx.thumb ? 2 : 4;
actx.cond = invert_arm_cond(cc & 0xf);
- Bccrel(actx, 2+8);
+ Bccrel(actx, replacement_size);
} else if ((cc & CC_CBXZ) == CC_CBXZ) {
+ replacement_size += 2;
ctx->base.modify = true;
- ctx->base.newval[0] = ctx->base.pc + 2+8;
+ ctx->base.newval[0] = actx.pc_of_code_base + replacement_size;
ctx->base.newval[1] = 1; /* do invert */
void **codep = ctx->rewritten_ptr_ptr;
ctx->write_newop_here = *codep; *codep += 2;
}
/* If it's a call, we should jump back after the call */
actx.cond = 0xe;
- if ((cc & CC_CALL)) {
- PUSHmulti(actx, 1 << 7 | 1 << 14); // save lr, r7 (for stack alignment, chosen arbitary)
- ADD_PC(actx, 14, actx.thumb ? (actx.pc & 2 ? 12 : 8) | 1 : 4);
- LDR_PC(actx, dpc | ctx->arch.pc_low_bit);
- POPmulti(actx, 1 << 7 | 1 << 14); // restore lr, r7 (for stack alignment)
- } else {
- LDR_PC(actx, dpc | ctx->arch.pc_low_bit);
- }
+ MOVW_MOVT(actx, 14, dpc | ctx->arch.pc_low_bit);
+ BLXr(actx, 14);
+ substitute_assert(*actx.codep - actx.code_base == replacement_size);
}
static void transform_dis_pre_dis(struct transform_dis_ctx *ctx) {
@@ -201,6 +200,7 @@ static void transform_dis_pre_dis(struct transform_dis_ctx *ctx) {
static void transform_dis_post_dis(struct transform_dis_ctx *ctx) {
if (ctx->arch.bccrel_p) {
struct assemble_ctx actx = {&ctx->arch.bccrel_p,
+ ctx->arch.bccrel_p,
(uint_tptr) (uintptr_t) ctx->arch.bccrel_p,
/*thumb*/ true,
ctx->arch.bccrel_bits};
diff --git a/lib/arm/assemble.h b/lib/arm/assemble.h
index 1baeb0f..3066802 100644
--- a/lib/arm/assemble.h
+++ b/lib/arm/assemble.h
@@ -3,11 +3,16 @@
struct assemble_ctx {
void **codep;
- uint_tptr pc;
+ void *code_base;
+ uint_tptr pc_of_code_base;
bool thumb;
int cond;
};
+static inline uint_tptr actx_pc(struct assemble_ctx ctx) {
+ return ctx.pc_of_code_base + (*ctx.codep - ctx.code_base);
+}
+
static inline void PUSHone(struct assemble_ctx ctx, int Rt) {
if (ctx.thumb)
op32(ctx.codep, 0x0d04f84d | Rt << 28);
@@ -70,7 +75,7 @@ static inline void LDRxi(struct assemble_ctx ctx, int Rt, int Rn, uint32_t off,
case PLM_U16: subop = 1; sign = 0; break;
case PLM_S16: subop = 1; sign = 1; break;
case PLM_U32: subop = 2; sign = 0; break;
- default: __builtin_abort();
+ default: substitute_assert(false);
}
op32(ctx.codep, 0x0000f890 | Rn | Rt << 28 | subop << 5 | sign << 8 |
off << 16);
@@ -92,11 +97,18 @@ static inline void LDRxi(struct assemble_ctx ctx, int Rt, int Rn, uint32_t off,
ctx.cond << 28);
break;
default:
- __builtin_abort();
+ substitute_assert(false);
}
}
}
+static inline void BLXr(struct assemble_ctx ctx, int Rm) {
+ if (ctx.thumb)
+ op16(ctx.codep, 0x4780 | Rm << 3);
+ else
+ op32(ctx.codep, 0xe12fff30 | Rm | ctx.cond << 28);
+}
+
static inline void Bccrel(struct assemble_ctx ctx, int offset) {
if (ctx.thumb) {
offset = (offset - 4) / 2;
@@ -108,7 +120,7 @@ static inline void Bccrel(struct assemble_ctx ctx, int offset) {
}
static inline void LDR_PC(struct assemble_ctx ctx, uint32_t dpc) {
- if (ctx.pc & 2)
+ if (actx_pc(ctx) & 2)
op16(ctx.codep, 0xbf00);
if (ctx.thumb)
op32(ctx.codep, 0xf000f8df);
diff --git a/lib/arm/dis-arm.inc.h b/lib/arm/dis-arm.inc.h
index 18285cd..021227c 100644
--- a/lib/arm/dis-arm.inc.h
+++ b/lib/arm/dis-arm.inc.h
@@ -164,7 +164,7 @@ static INLINE void P(adrlabel_label_unk_Rd_1_ADR)(tdis_ctx ctx, struct bitslice
}
static INLINE void P(br_target_target_pred_p_B_1_Bcc)(tdis_ctx ctx, struct bitslice target, struct bitslice p) {
unsigned p_val = bs_get(p, ctx->base.op);
- return P(branch)(ctx, ctx->base.pc + 8 + sext(bs_get(target, ctx->base.op), 24),
+ return P(branch)(ctx, ctx->base.pc + 8 + 4 * sext(bs_get(target, ctx->base.op), 24),
p_val == 0xe ? 0 : (CC_ARMCC | p_val));
}
static INLINE void P(ldst_so_reg_addr_unk_Rt_2_LDRB_PRE_REG)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
@@ -210,8 +210,9 @@ static INLINE void P(GPR_func_3_BLX)(tdis_ctx ctx, UNUSED struct bitslice func)
return P(indirect_call)(ctx);
}
static INLINE void P(bl_target_func_2_BL)(tdis_ctx ctx, struct bitslice func) {
- return P(branch)(ctx, ctx->base.pc + 8 + sext(bs_get(func, ctx->base.op), 24),
- CC_CALL);
+ unsigned p_val = ctx->base.op >> 28; // XXX fix this to actually be an op
+ return P(branch)(ctx, ctx->base.pc + 8 + 4 * sext(bs_get(func, ctx->base.op), 24),
+ CC_CALL | (p_val == 0xe ? 0 : (CC_ARMCC | p_val)));
}
static INLINE void P(dis_arm)(tdis_ctx ctx) {
diff --git a/lib/darwin/read.c b/lib/darwin/read.c
index 2e5b746..a0a5d8e 100644
--- a/lib/darwin/read.c
+++ b/lib/darwin/read.c
@@ -9,11 +9,11 @@ bool read_leb128(void **ptr, void *end, bool is_signed, uint64_t *out) {
return false;
bit = *p++;
uint64_t k = bit & 0x7f;
- if (shift < sizeof(uint64_t) * 8)
+ if (shift < 64)
result |= k << shift;
shift += 7;
} while (bit & 0x80);
- if (is_signed && (bit & 0x40))
+ if (is_signed && (bit & 0x40) && shift < 64)
result |= ~((uint64_t) 0) << shift;
*ptr = p;
if (out)
diff --git a/lib/substitute-internal.h b/lib/substitute-internal.h
index 3e0691d..8859436 100644
--- a/lib/substitute-internal.h
+++ b/lib/substitute-internal.h
@@ -109,3 +109,7 @@ static UNUSED const char *xbasename(const char *path) {
return slash ? slash + 1 : path;
}
+#define substitute_assert(x) do { \
+ if (!(x)) { __builtin_abort(); } \
+} while(0)
+
diff --git a/script/mconfig.py b/script/mconfig.py
index 0030f60..39d801a 100644
--- a/script/mconfig.py
+++ b/script/mconfig.py
@@ -274,7 +274,7 @@ class Option(object):
if not self.show:
# If you didn't mention the option in help, you don't get no stinking value. This is for ignored options only.
return
- if value is (False if self.bool else None):
+ if value is None:
value = self.default
if callable(value): # Pending
value = value()
@@ -388,11 +388,13 @@ def _make_argparse(include_unused, include_env):
action='store_true' if opt.bool else 'store',
dest=opt.name[2:],
help=opt.help,
+ default=None,
**kw)
if opt.bool and include_unused:
ag.add_argument(opt.opposite,
action='store_false',
dest=opt.name[2:],
+ default=None,
**kw)
return parser
diff --git a/script/test-transform-dis.sh b/script/test-transform-dis.sh
index 3a9adf3..0b45fd2 100755
--- a/script/test-transform-dis.sh
+++ b/script/test-transform-dis.sh
@@ -1,6 +1,12 @@
#!/bin/sh
set -xe
-make -j8 out/transform-dis-cases-$1.bin out/test-transform-dis-$1
-out/test-transform-dis-$1 auto < out/transform-dis-cases-$1.bin
+barch="$1"
+is_thumb=0
+if [ "$1" = "thumb" ]; then
+ barch=arm
+ is_thumb=1
+fi
+make -j8 out/transform-dis-cases-$1.bin out/test-transform-dis-"$barch"
+out/test-transform-dis-"$barch" auto "$is_thumb" < out/transform-dis-cases-$1.bin
diff --git a/test/lol.c b/test/lol.c
new file mode 100644
index 0000000..846cb5d
--- /dev/null
+++ b/test/lol.c
@@ -0,0 +1,100 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#define IF_BOTHER_WITH_MODIFY(...) __VA_ARGS__
+#include "dis.h"
+
+typedef struct tc {
+ struct dis_ctx_base base;
+ struct arch_dis_ctx arch;
+} *tdis_ctx;
+#define P(x) P_##x
+#define DIS_MAY_MODIFY 0
+
+static enum {
+ NOPPY,
+ JUMPY,
+ BAD
+} type;
+
+NOINLINE UNUSED
+static void P_data(UNUSED struct tc *ctx, unsigned o0, unsigned o1, unsigned o2,
+ unsigned o3, unsigned out_mask) {
+ unsigned ops[] = {o0, o1, o2, o3};
+ type = NOPPY;
+ for (int i = 0; i < 4; i++) {
+ if (ops[i] != null_op && (out_mask & (1 << i))) {
+ if (ops[i] == 15) {
+ type = JUMPY;
+ break;
+ } else if (ops[i] != 12 && ops[i] != 9) {
+ type = BAD;
+ }
+ }
+ }
+}
+NOINLINE UNUSED
+static void P_pcrel(UNUSED struct tc *ctx, uint32_t dpc,
+ UNUSED struct arch_pcrel_info info) {
+ return P_data(ctx, info.reg, null_op, null_op, null_op, 1);
+}
+NOINLINE UNUSED
+static void P_thumb_it(UNUSED struct tc *ctx) {
+ type = NOPPY;
+}
+
+NOINLINE UNUSED
+static void P_ret(UNUSED struct tc *ctx) {
+ type = JUMPY;
+}
+
+NOINLINE UNUSED
+static void P_indirect_call(UNUSED struct tc *ctx) {
+ type = JUMPY;
+}
+
+NOINLINE UNUSED
+static void P_branch(UNUSED struct tc *ctx, uint64_t dpc, int cc) {
+ type = BAD;
+}
+
+NOINLINE UNUSED
+static void P_unidentified(UNUSED struct tc *ctx) {
+ type = BAD;
+}
+
+NOINLINE UNUSED
+static void P_bad(UNUSED struct tc *ctx) {
+ type = JUMPY;
+}
+
+#include "arm/dis-main.inc.h"
+
+int main(UNUSED int argc, char **argv) {
+ struct tc ctx;
+ ctx.base.pc = 0xdead0000;
+ memset(ctx.base.newop, 0, sizeof(ctx.base.newop));
+ ctx.base.modify = false;
+ for (uint32_t hi = 0; hi < (1 << 12); hi++) {
+ for (uint32_t lo = 0; lo < (1 << 13); lo++) {
+ uint32_t op = (0b1111 << 28) | (hi << 16) | (0b111 << 13) | lo;
+
+ if ((op & 0x0f100010) == 0x0e100010)
+ continue;
+
+ ctx.base.ptr = &op;
+ ctx.arch.pc_low_bit = false;
+ type = BAD;
+ P(dis)(&ctx);
+ if (type != JUMPY)
+ continue;
+ ctx.arch.pc_low_bit = true;
+ type = BAD;
+ P(dis)(&ctx);
+ if (type != NOPPY)
+ continue;
+ printf("%x\n", op);
+ }
+ }
+
+}
diff --git a/test/test-transform-dis.c b/test/test-transform-dis.c
index 98c98e3..d147a49 100644
--- a/test/test-transform-dis.c
+++ b/test/test-transform-dis.c
@@ -127,7 +127,7 @@ static void do_auto(uint8_t *in, size_t in_size, struct arch_dis_ctx arch) {
pc_trampoline,
&arch,
offsets,
- TRANSFORM_DIS_BAN_CALLS);
+ 0);//TRANSFORM_DIS_BAN_CALLS);
if (ret) {
if (expect_err) {
printf("OK\n");
diff --git a/test/transform-dis-cases-arm.S b/test/transform-dis-cases-arm.S
new file mode 100644
index 0000000..a9e01b1
--- /dev/null
+++ b/test/transform-dis-cases-arm.S
@@ -0,0 +1,15 @@
+#include "transform-dis-cases.h"
+#ifdef THUMB
+.thumb
+#endif
+
+
+#ifndef THUMB
+GIVEN blne 0f; nop; nop; 0:
+EXPECT beq 1f; movw lr, #0x000c; movt lr, #0xdead; blx lr; 1: nop; nop
+GIVEN bl 0f; 0:
+EXPECT movw lr, #0x0004; movt lr, #0xdead; blx lr
+#else
+GIVEN bl 0f; 0:
+EXPECT movw lr, #0x0005; movt lr, #0xdead; blx lr
+#endif
diff --git a/test/transform-dis-cases-arm64.S b/test/transform-dis-cases-arm64.S
index cc3bd4c..99a353b 100644
--- a/test/transform-dis-cases-arm64.S
+++ b/test/transform-dis-cases-arm64.S
@@ -1,6 +1,4 @@
-#define GIVEN .ascii "GIVEN";
-#define EXPECT .ascii "EXPECT";
-#define EXPECT_ERR .ascii "EXPECT_ERR";
+#include "transform-dis-cases.h"
/* yay clang, no semicolons allowed */
@@ -12,7 +10,10 @@ EXPECT
GIVEN
blr x5
nop
-EXPECT_ERR
+//EXPECT_ERR (with ban_calls)
+EXPECT
+ blr x5
+
GIVEN
cbnz x8, .+0x100
diff --git a/test/transform-dis-cases-i386.S b/test/transform-dis-cases-i386.S
index c02a044..a39ee02 100644
--- a/test/transform-dis-cases-i386.S
+++ b/test/transform-dis-cases-i386.S
@@ -1,6 +1,4 @@
-#define GIVEN .ascii "GIVEN";
-#define EXPECT .ascii "EXPECT";
-#define EXPECT_ERR .ascii "EXPECT_ERR";
+#include "transform-dis-cases.h"
GIVEN call 0f; 0: pop %edx
/* XXX the extra push isn't necessary in 32-bit mode */
@@ -10,9 +8,9 @@ GIVEN jmp 0f; 0: nop
EXPECT_ERR
GIVEN jne .+0x1000
-/* we expect to generate an unnecessarily long jump, so hardcode it the 0x10000
- * is because we pretend our trampoline is 0x10000 bytes before the original
- * function */
+/* we expect to generate an unnecessarily long jump, so hardcode it
+ * the 0x10000 is because we pretend our trampoline is 0x10000 bytes before the
+ * original function */
EXPECT 2: .byte 0x0f, 0x85; .long 2; jmp 1f; 0: jmp .+0x10000+0x1000-8; 1:
GIVEN loopne .+0x80
diff --git a/test/transform-dis-cases.h b/test/transform-dis-cases.h
new file mode 100644
index 0000000..94f39bd
--- /dev/null
+++ b/test/transform-dis-cases.h
@@ -0,0 +1,4 @@
+#pragma once
+#define GIVEN .ascii "GIVEN";
+#define EXPECT .ascii "EXPECT";
+#define EXPECT_ERR .ascii "EXPECT_ERR";