aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/arm/assemble.h38
-rw-r--r--lib/arm/dis-arm.inc.h17
-rw-r--r--lib/arm/dis-thumb.inc.h44
-rw-r--r--lib/arm/dis-thumb2.inc.h24
-rw-r--r--lib/arm/jump-patch.h9
-rw-r--r--lib/arm/misc.h51
-rw-r--r--lib/arm/transform-dis-arm-multi.inc.h85
-rw-r--r--lib/arm64/jump-patch.h1
-rw-r--r--lib/arm64/misc.h2
-rw-r--r--lib/arm64/transform-dis-arm64.inc.h15
-rw-r--r--lib/dis.h6
-rw-r--r--lib/hook-functions.c6
-rw-r--r--lib/jump-dis.c3
-rw-r--r--lib/transform-dis.c55
-rw-r--r--lib/transform-dis.h2
15 files changed, 276 insertions, 82 deletions
diff --git a/lib/arm/assemble.h b/lib/arm/assemble.h
index 6f1e8e7..c0af020 100644
--- a/lib/arm/assemble.h
+++ b/lib/arm/assemble.h
@@ -4,27 +4,28 @@
struct assemble_ctx {
void **codep;
bool thumb;
+ int cond;
};
static inline void PUSHone(struct assemble_ctx ctx, int Rt) {
if (ctx.thumb)
op32(ctx.codep, 0x0d04f84d | Rt << 28);
else
- op32(ctx.codep, 0xe52d0004 | Rt << 12);
+ op32(ctx.codep, 0x052d0004 | Rt << 12 | ctx.cond << 28);
}
static inline void POPone(struct assemble_ctx ctx, int Rt) {
if (ctx.thumb)
op32(ctx.codep, 0x0b04f85d | Rt << 28);
else
- op32(ctx.codep, 0xe49d0004 | Rt << 12);
+ op32(ctx.codep, 0x049d0004 | Rt << 12 | ctx.cond << 28);
}
static inline void POPmulti(struct assemble_ctx ctx, uint16_t mask) {
if (ctx.thumb)
op32(ctx.codep, 0x0000e8bd | mask << 16);
else
- op32(ctx.codep, 0xe8bd0000 | mask);
+ op32(ctx.codep, 0x08bd0000 | mask | ctx.cond << 28);
}
static inline void MOVW_MOVT(struct assemble_ctx ctx, int Rd, uint32_t val) {
@@ -36,8 +37,10 @@ static inline void MOVW_MOVT(struct assemble_ctx ctx, int Rd, uint32_t val) {
(hi >> 8 & 7) << 28 | (hi & 0xff) << 16);
} else {
- op32(ctx.codep, 0xe3000000 | Rd << 12 | (lo >> 12) << 16 | (lo & 0xfff));
- op32(ctx.codep, 0xe3400000 | Rd << 12 | (hi >> 12) << 16 | (hi & 0xfff));
+ op32(ctx.codep, 0x03000000 | Rd << 12 | (lo >> 12) << 16 | (lo & 0xfff) |
+ ctx.cond << 28);
+ op32(ctx.codep, 0x03400000 | Rd << 12 | (hi >> 12) << 16 | (hi & 0xfff) |
+ ctx.cond << 28);
}
}
@@ -46,7 +49,7 @@ static inline void STRri(struct assemble_ctx ctx, int Rt, int Rn, uint32_t off)
if (ctx.thumb)
op32(ctx.codep, 0x0000f8c0 | Rn | Rt << 28 | off << 16);
else
- op32(ctx.codep, 0xe4800000 | Rn << 16 | Rt << 12 | off);
+ op32(ctx.codep, 0x04800000 | Rn << 16 | Rt << 12 | off | ctx.cond << 28);
}
static inline void LDRxi(struct assemble_ctx ctx, int Rt, int Rn, uint32_t off,
@@ -76,11 +79,30 @@ static inline void LDRxi(struct assemble_ctx ctx, int Rt, int Rn, uint32_t off,
op32(ctx.codep, 0xe5900000 | Rn << 16 | Rt << 12 | off);
break;
type2:
- op32(ctx.codep, 0xe1c00000 | Rn << 16 | Rt << 12 | subop << 4 |
- (off & 0xf) | (off & 0xf0) << 4 | not_ldrd << 20);
+ op32(ctx.codep, 0x01c00000 | Rn << 16 | Rt << 12 | subop << 4 |
+ (off & 0xf) | (off & 0xf0) << 4 | not_ldrd << 20 |
+ ctx.cond << 28);
break;
default:
__builtin_abort();
}
}
}
+
+static inline void Bccrel(struct assemble_ctx ctx, int offset) {
+ if (ctx.thumb) {
+ offset = (offset - 4) / 2;
+ op16(ctx.codep, 0xd000 | ctx.cond << 8 | offset);
+ } else {
+ offset = (offset - 8) / 4;
+ op32(ctx.codep, 0x0a000000 | offset | ctx.cond << 28);
+ }
+}
+
+static inline void LDR_PC(struct assemble_ctx ctx, uint32_t dpc) {
+ if (ctx.thumb)
+ op32(ctx.codep, 0xf000f8df);
+ else
+ op32(ctx.codep, 0x051ff004 | ctx.cond << 28);
+ op32(ctx.codep, (uint32_t) dpc);
+}
diff --git a/lib/arm/dis-arm.inc.h b/lib/arm/dis-arm.inc.h
index 2f06234..8f4d776 100644
--- a/lib/arm/dis-arm.inc.h
+++ b/lib/arm/dis-arm.inc.h
@@ -65,7 +65,7 @@ static INLINE void P(GPR_Rt_addr_offset_none_addr_am2offset_reg_offset_S_4_STRBT
data(r(addr), rs(offset, 0, 4), r(Rt));
}
static INLINE void P(GPR_Rt_addr_offset_none_addr_am3offset_offset_S_2_STRD_POST)(tdis_ctx ctx, struct bitslice offset, struct bitslice Rt, struct bitslice addr) {
- data_flags(IS_LDRD_STRD, r(Rt), r(addr), rs(offset, 0, 4));
+ data_flags(DFLAG_IS_LDRD_STRD, r(Rt), r(addr), rs(offset, 0, 4));
}
static INLINE void P(GPR_Rt_addr_offset_none_addr_postidx_imm8_offset_S_1_STRHTi)(tdis_ctx ctx, UNUSED struct bitslice offset, struct bitslice Rt, struct bitslice addr) {
data(r(addr), r(Rt));
@@ -73,9 +73,9 @@ static INLINE void P(GPR_Rt_addr_offset_none_addr_postidx_imm8_offset_S_1_STRHTi
static INLINE void P(GPR_Rt_addrmode3_addr_S_2_STRD)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
unsigned addr_val = bs_get(addr, ctx->op);
if (addr_val & 1 << 13)
- data_flags(IS_LDRD_STRD, r(Rt), rs(addr, 9, 4));
+ data_flags(DFLAG_IS_LDRD_STRD, r(Rt), rs(addr, 9, 4));
else
- data_flags(IS_LDRD_STRD, r(Rt), rs(addr, 9, 4), rs(addr, 0, 4));
+ data_flags(DFLAG_IS_LDRD_STRD, r(Rt), rs(addr, 9, 4), rs(addr, 0, 4));
}
static INLINE void P(GPR_Rt_addrmode3_pre_addr_S_2_STRD_PRE)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
return P(GPR_Rt_addrmode3_addr_S_2_STRD)(ctx, addr, Rt);
@@ -133,9 +133,9 @@ static INLINE void P(addrmode3_addr_unk_Rt_4_LDRD)(tdis_ctx ctx, struct bitslice
/* ignoring Rt2 = Rt + 1, but LDRD itself isn't supposed to load PC anyway */
unsigned addr_val = bs_get(addr, ctx->op);
if (addr_val & 1 << 13)
- data_flags(IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4));
+ data_flags(DFLAG_IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4));
else
- data_flags(IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4), rs(addr, 0, 4));
+ data_flags(DFLAG_IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4), rs(addr, 0, 4));
}
static INLINE void P(addrmode3_pre_addr_unk_Rt_4_LDRD_PRE)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
return P(addrmode3_addr_unk_Rt_4_LDRD)(ctx, addr, Rt);
@@ -161,9 +161,10 @@ static INLINE void P(addrmode_imm12_pre_addr_unk_Rt_2_LDRB_PRE_IMM)(tdis_ctx ctx
static INLINE void P(adrlabel_label_unk_Rd_1_ADR)(tdis_ctx ctx, struct bitslice label, struct bitslice Rd) {
return P(pcrel)(ctx, ctx->pc + 8 + bs_get(label, ctx->op), bs_get(Rd, ctx->op), PLM_ADR);
}
-static INLINE void P(br_target_target_B_1_Bcc)(tdis_ctx ctx, struct bitslice target) {
- bool cond = (ctx->op >> 28) != 0xe;
- return P(branch)(ctx, ctx->pc + 8 + sext(bs_get(target, ctx->op), 24), /*cond*/ cond);
+static INLINE void P(br_target_target_pred_p_B_1_Bcc)(tdis_ctx ctx, struct bitslice target, struct bitslice p) {
+ unsigned p_val = bs_get(p, ctx->op);
+ return P(branch)(ctx, ctx->pc + 8 + sext(bs_get(target, ctx->op), 24),
+ p_val == 0xe ? 0 : (CC_ARMCC | p_val));
}
static INLINE void P(ldst_so_reg_addr_unk_Rt_2_LDRB_PRE_REG)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
data(rout(Rt), rs(addr, 0, 4), rs(addr, 13, 4));
diff --git a/lib/arm/dis-thumb.inc.h b/lib/arm/dis-thumb.inc.h
index 4e6d106..8be137e 100644
--- a/lib/arm/dis-thumb.inc.h
+++ b/lib/arm/dis-thumb.inc.h
@@ -49,31 +49,45 @@ static INLINE void P(t_addrmode_pc_addr_unk_Rt_1_tLDRpci)(tdis_ctx ctx, struct b
static INLINE void P(t_adrlabel_addr_unk_Rd_1_tADR)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rd) {
return P(pcrel)(ctx, ((ctx->pc + 4) & ~2) + bs_get(addr, ctx->op), bs_get(Rd, ctx->op), PLM_ADR);
}
-static INLINE void P(t_bcctarget_target_B_1_tBcc)(tdis_ctx ctx, struct bitslice target) {
- return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 8), /*cond*/ true);
+static INLINE void P(t_bcctarget_target_pred_p_B_1_tBcc)(tdis_ctx ctx, struct bitslice target, struct bitslice p) {
+ return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 8),
+ CC_ARMCC | bs_get(p, ctx->op));
}
static INLINE void P(t_brtarget_target_B_1_tB)(tdis_ctx ctx, struct bitslice target) {
- bool cond = ctx->arch.thumb_it_length > 0;
- return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 11), cond);
+ int cc = ctx->arch.it_conds[0] != 0xe ? CC_ALREADY_IN_IT : 0;
+ return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 11), cc);
}
static INLINE void P(t_cbtarget_target_B_2_tCBNZ)(tdis_ctx ctx, struct bitslice target) {
- return P(branch)(ctx, ctx->pc + 4 + 2 * bs_get(target, ctx->op), /*cond*/ true);
-}
-static INLINE void P(it_pred_cc_it_mask_mask_1_t2IT)(tdis_ctx ctx, struct bitslice mask, UNUSED struct bitslice cc) {
+ P(branch)(ctx, ctx->pc + 4 + 2 * bs_get(target, ctx->op), CC_CBXZ);
+ if (TDIS_CTX_MODIFY(ctx)) {
+ /* change target, and flip z/nz if necessary (i.e. always) */
+ unsigned new = bs_set(target, TDIS_CTX_NEWVAL(ctx, 0), ctx->op);
+ if (TDIS_CTX_NEWVAL(ctx, 1))
+ new ^= 1 << 11;
+ TDIS_CTX_SET_NEWOP(ctx, new);
+ }
+}
+static INLINE void P(it_pred_cc_it_mask_mask_1_t2IT)(tdis_ctx ctx, struct bitslice mask, struct bitslice cc) {
/* why */
unsigned mask_val = bs_get(mask, ctx->op);
- unsigned length = __builtin_ctz(mask_val);
- if (length >= 3)
+ unsigned cc_val = bs_get(cc, ctx->op);
+ if (mask_val == 0)
return P(unidentified)(ctx); /* nop */
- ctx->arch.thumb_it_length = length;
- return P(unidentified)(ctx);
+ int length = 4 - __builtin_ctz(mask_val);
+ ctx->arch.it_conds[1] = cc_val;
+ for (int i = 0; i < length; i++)
+ ctx->arch.it_conds[i+2] = (cc_val & ~1) | (mask_val >> (3 - i) & 1);
+ return P(thumb_it)(ctx);
}
-static INLINE void P(dis_thumb)(tdis_ctx ctx) {
+static INLINE void P(thumb_do_it)(tdis_ctx ctx) {
uint16_t op = ctx->op = *(uint16_t *) ctx->ptr;
- ctx->op_size = 2;
- if (ctx->arch.thumb_it_length)
- ctx->arch.thumb_it_length--;
#include "../generated/generic-dis-thumb.inc.h"
__builtin_abort();
}
+
+static INLINE void P(dis_thumb)(tdis_ctx ctx) {
+ ctx->op_size = 2;
+ P(thumb_do_it)(ctx);
+ advance_it_cond(&ctx->arch);
+}
diff --git a/lib/arm/dis-thumb2.inc.h b/lib/arm/dis-thumb2.inc.h
index a9d7f9d..6f651c5 100644
--- a/lib/arm/dis-thumb2.inc.h
+++ b/lib/arm/dis-thumb2.inc.h
@@ -87,8 +87,9 @@ static INLINE void P(addrmode5_pre_addr_4_t2LDC2L_PRE)(tdis_ctx ctx, struct bits
static INLINE void P(addrmode5_pre_addr_S_4_t2STC2L_PRE)(tdis_ctx ctx, struct bitslice addr) {
data(rs(addr, 9, 4));
}
-static INLINE void P(brtarget_target_B_1_t2Bcc)(tdis_ctx ctx, struct bitslice target) {
- return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 20), /*cond*/ true);
+static INLINE void P(brtarget_target_pred_p_B_1_t2Bcc)(tdis_ctx ctx, struct bitslice target, struct bitslice p) {
+ return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 20),
+ CC_ARMCC | bs_get(p, ctx->op));
}
static INLINE void P(rGPR_Rt_t2addrmode_imm0_1020s4_addr_unk_Rd_S_1_t2STREX)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt, struct bitslice Rd) {
data(rout(Rd), r(Rt), rs(addr, 8, 4));
@@ -100,10 +101,10 @@ static INLINE void P(rGPR_Rt_t2addrmode_imm8_pre_addr_S_2_t2STRB_PRE)(tdis_ctx c
data(r(Rt), rs(addr, 9, 4));
}
static INLINE void P(rGPR_Rt_t2addrmode_imm8s4_addr_S_1_t2STRDi8)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
- data_flags(IS_LDRD_STRD, r(Rt), rs(addr, 9, 4));
+ data_flags(DFLAG_IS_LDRD_STRD, r(Rt), rs(addr, 9, 4));
}
static INLINE void P(rGPR_Rt_t2addrmode_imm8s4_pre_addr_S_1_t2STRD_PRE)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
- data_flags(IS_LDRD_STRD, r(Rt), rs(addr, 9, 4));
+ data_flags(DFLAG_IS_LDRD_STRD, r(Rt), rs(addr, 9, 4));
}
static INLINE void P(rGPR_Rt_t2addrmode_negimm8_addr_S_2_t2STRBi8)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
data(r(Rt), rs(addr, 9, 4));
@@ -130,10 +131,10 @@ static INLINE void P(addr_offset_none_Rn_t2am_imm8_offset_offset_unk_Rt_5_t2LDRB
data(rout(Rt), r(Rn));
}
static INLINE void P(t2addrmode_imm8s4_addr_unk_Rt_1_t2LDRDi8)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
- data_flags(IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4));
+ data_flags(DFLAG_IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4));
}
static INLINE void P(t2addrmode_imm8s4_pre_addr_unk_Rt_1_t2LDRD_PRE)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
- data_flags(IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4));
+ data_flags(DFLAG_IS_LDRD_STRD, rout(Rt), rs(addr, 9, 4));
}
static INLINE void P(t2addrmode_negimm8_addr_unk_Rt_5_t2LDRBi8)(tdis_ctx ctx, struct bitslice addr, struct bitslice Rt) {
data(rout(Rt), rs(addr, 9, 4));
@@ -151,8 +152,8 @@ static INLINE void P(t2ldrlabel_addr_unk_Rt_5_t2LDRBpci)(tdis_ctx ctx, struct bi
return P(pcrel)(ctx, ((ctx->pc + 4) & ~2) + (bs_get(addr, ctx->op) & ((1 << 12) - 1)), bs_get(Rt, ctx->op), get_thumb2_load_mode(ctx->op));
}
static INLINE void P(uncondbrtarget_target_B_1_t2B)(tdis_ctx ctx, struct bitslice target) {
- bool cond = ctx->arch.thumb_it_length > 0;
- return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 24), cond);
+ int cc = ctx->arch.it_conds[0] != 0xe ? CC_ALREADY_IN_IT : 0;
+ return P(branch)(ctx, ctx->pc + 4 + 2 * sext(bs_get(target, ctx->op), 24), cc);
}
static INLINE void P(unk_Rd_3_t2MOVTi16)(tdis_ctx ctx, struct bitslice Rd) {
data(rout(Rd));
@@ -165,7 +166,7 @@ static INLINE void P(unk_Rt_13_VMOVRRD)(tdis_ctx ctx, UNUSED struct bitslice Rt)
return P(unidentified)(ctx);
}
-static INLINE void do_it(tdis_ctx ctx) {
+static INLINE void P(thumb2_do_it)(tdis_ctx ctx) {
uint32_t op = ctx->op;
#include "../generated/generic-dis-thumb2.inc.h"
__builtin_abort();
@@ -174,12 +175,11 @@ static INLINE void do_it(tdis_ctx ctx) {
static INLINE void P(dis_thumb2)(tdis_ctx ctx) {
ctx->op = *(uint32_t *) ctx->ptr;
ctx->op_size = 4;
- if (ctx->arch.thumb_it_length)
- ctx->arch.thumb_it_length--;
/* LLVM likes to think about Thumb2 instructions the way the ARM manual
* does - 15..0 15..0 rather than 31..0 as actually laid out in memory... */
ctx->op = flip16(ctx->op);
- do_it(ctx);
+ P(thumb2_do_it)(ctx);
+ advance_it_cond(&ctx->arch);
TDIS_CTX_SET_NEWOP(ctx, flip16(TDIS_CTX_NEWOP(ctx)));
ctx->op = flip16(ctx->op);
}
diff --git a/lib/arm/jump-patch.h b/lib/arm/jump-patch.h
index 238d56e..b65a97d 100644
--- a/lib/arm/jump-patch.h
+++ b/lib/arm/jump-patch.h
@@ -1,7 +1,7 @@
#pragma once
#include "dis.h"
+#include "arm/assemble.h"
#define MAX_JUMP_PATCH_SIZE 8
-#define MAX_REWRITTEN_SIZE (12 * 4) /* actually should be less */
static inline int jump_patch_size(UNUSED uintptr_t pc,
UNUSED uintptr_t dpc,
@@ -13,9 +13,6 @@ static inline int jump_patch_size(UNUSED uintptr_t pc,
static inline void make_jump_patch(void **codep, UNUSED uintptr_t pc,
uintptr_t dpc,
struct arch_dis_ctx arch) {
- if (arch.pc_low_bit)
- op32(codep, 0xf000f8df);
- else
- op32(codep, 0xe51ff004);
- op32(codep, (uint32_t) dpc);
+ struct assemble_ctx actx = {codep, arch.pc_low_bit, 0xe};
+ LDR_PC(actx, dpc);
}
diff --git a/lib/arm/misc.h b/lib/arm/misc.h
index f8d593e..02b06fe 100644
--- a/lib/arm/misc.h
+++ b/lib/arm/misc.h
@@ -4,9 +4,56 @@
#define TARGET_JUMP_PATCH_HDR "arm/jump-patch.h"
#define TARGET_TRANSFORM_DIS_HEADER "arm/transform-dis-arm-multi.inc.h"
#define MIN_INSN_SIZE 2
+/* each input instruction might turn into:
+ * - 2 bytes for Bcc, if in IT
+ * then ONE of:
+ * - 2/4 bytes for just the instruction
+ * - 2+8 bytes for branch (which in *valid* code rules out IT but whatever)
+ * - up to 7 4-byte insns for pcrel (if dest=pc, and while these can be subject
+ * to IT, there can only reasonably be two per block, and if there are both
+ * then that's an unconditional exit - but we don't enforce any of this
+ * currently)
+ * - up to 7 4-byte insns for similar moves to PC that fall under 'data'
+ * the maximum number of possible inputs is 4, plus 4 extras if the last one
+ * was an IT (but in that case it can't be one of the above cases)
+ * while this looks huge, it's overly conservative and doesn't matter much,
+ * since only the actually used space will be taken up in the final output
+ */
+#define TD_MAX_REWRITTEN_SIZE (7*4*7 + 4) /* 196 */
+
struct arch_dis_ctx {
- unsigned thumb_it_length;
+ /* thumb? */
bool pc_low_bit;
+ /* if thumb, IT cond for the next 5 instructions
+ * (5 because we still advance after IT) */
+ uint8_t it_conds[5];
+ /* for transform_dis - did we add space for a Bccrel? */
+ uint8_t bccrel_bits;
+ void *bccrel_p;
};
-enum { IS_LDRD_STRD = 1 << 16 };
+static inline void arch_dis_ctx_init(struct arch_dis_ctx *ctx) {
+ ctx->pc_low_bit = false;
+ ctx->bccrel_p = NULL;
+ memset(ctx->it_conds, 0xe, 5);
+}
+
+static inline void advance_it_cond(struct arch_dis_ctx *ctx) {
+ ctx->it_conds[0] = ctx->it_conds[1];
+ ctx->it_conds[1] = ctx->it_conds[2];
+ ctx->it_conds[2] = ctx->it_conds[3];
+ ctx->it_conds[3] = ctx->it_conds[4];
+ ctx->it_conds[4] = 0xe;
+}
+
+#define DFLAG_IS_LDRD_STRD (1 << 16)
+
+/* Types of conditionals for 'branch' */
+/* a regular old branch-with-condition */
+#define CC_ARMCC (CC_CONDITIONAL | 0x200)
+/* already in an IT block - in transform_dis this will be rewritten to a branch
+ * anyway, so it can be treated as unconditional; in jump_dis we have to know
+ * to keep going */
+#define CC_ALREADY_IN_IT (CC_CONDITIONAL | 0x400)
+/* CBZ/CBNZ is rewritten */
+#define CC_CBXZ (CC_CONDITIONAL | 0x800)
diff --git a/lib/arm/transform-dis-arm-multi.inc.h b/lib/arm/transform-dis-arm-multi.inc.h
index 2e6a62d..18fa4c1 100644
--- a/lib/arm/transform-dis-arm-multi.inc.h
+++ b/lib/arm/transform-dis-arm-multi.inc.h
@@ -1,5 +1,28 @@
#include "arm/assemble.h"
+static struct assemble_ctx tdctx_to_actx(const struct transform_dis_ctx *ctx) {
+ int cond;
+ if (ctx->arch.pc_low_bit) {
+ cond = ctx->op >> 28;
+ if (cond == 0xf)
+ cond = 0xe;
+ } else {
+ cond = 0;
+ }
+ return (struct assemble_ctx) {
+ ctx->rewritten_ptr_ptr,
+ ctx->arch.pc_low_bit,
+ cond
+ };
+
+}
+
+static int invert_arm_cond(int cc) {
+ if (cc >= 0xe)
+ __builtin_abort();
+ return cc ^ 1;
+}
+
static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx,
unsigned o0, unsigned o1, unsigned o2, unsigned o3, unsigned out_mask) {
#ifdef TRANSFORM_DIS_VERBOSE
@@ -16,7 +39,7 @@ static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx,
newval[3] = o3;
void **codep = ctx->rewritten_ptr_ptr;
- struct assemble_ctx actx = {ctx->rewritten_ptr_ptr, ctx->arch.pc_low_bit};
+ struct assemble_ctx actx = tdctx_to_actx(ctx);
/* A few cases:
* 1. Move to PC that does not read PC. Probably fine.
@@ -41,7 +64,7 @@ static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx,
else if (newval[i] != null_op)
in_regs |= 1 << newval[i];
}
- if (out_mask & IS_LDRD_STRD)
+ if (out_mask & DFLAG_IS_LDRD_STRD)
in_regs |= 1 << (newval[0] + 1);
uint32_t pc = ctx->pc + (ctx->arch.pc_low_bit ? 4 : 8);
int scratch = __builtin_ctz(~(in_regs | (1 << out_reg)));
@@ -64,7 +87,8 @@ static NOINLINE UNUSED void transform_dis_data(struct transform_dis_ctx *ctx,
ctx->write_newop_here = *codep; *codep += ctx->op_size;
STRri(actx, scratch, 13, 4);
POPmulti(actx, 1 << scratch | 1 << 15);
- transform_dis_ret(ctx);
+ if (actx.cond != 0xe)
+ transform_dis_ret(ctx);
} else {
if (out_reg != -1 && !(in_regs & 1 << out_reg)) {
/* case 3 - ignore scratch */
@@ -98,7 +122,7 @@ static NOINLINE UNUSED void transform_dis_pcrel(struct transform_dis_ctx *ctx,
(void *) dpc, reg, load_mode);
#endif
ctx->write_newop_here = NULL;
- struct assemble_ctx actx = {ctx->rewritten_ptr_ptr, ctx->arch.pc_low_bit};
+ struct assemble_ctx actx = tdctx_to_actx(ctx);
if (reg == 15) {
int scratch = 0;
PUSHone(actx, scratch);
@@ -115,3 +139,56 @@ static NOINLINE UNUSED void transform_dis_pcrel(struct transform_dis_ctx *ctx,
LDRxi(actx, reg, reg, 0, load_mode);
}
}
+
+static NOINLINE UNUSED void transform_dis_branch(struct transform_dis_ctx *ctx,
+ uintptr_t dpc, int cc) {
+#ifdef TRANSFORM_DIS_VERBOSE
+ printf("transform_dis (%p): branch => %p\n", (void *) ctx->pc, (void *) dpc);
+#endif
+ if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) {
+ /* don't support this for now */
+ /* making the simplifying assumption here that functions will not try
+ * to branch into the middle of an IT block, which is the case where
+ * pc_patch_end changes to include additional instructions (as opposed
+ * to include the end of a partially included instruction, which is
+ * common) */
+ ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START;
+ return;
+ }
+ struct assemble_ctx actx = tdctx_to_actx(ctx);
+ ctx->write_newop_here = NULL;
+ if ((cc & CC_ARMCC) == CC_ARMCC) {
+ actx.cond = invert_arm_cond(cc & 0xf);
+ Bccrel(actx, 8);
+ } else if ((cc & CC_CBXZ) == CC_CBXZ) {
+ ctx->modify = true;
+ ctx->newval[0] = 2+8;
+ ctx->newval[1] = 1; /* do invert */
+ void **codep = ctx->rewritten_ptr_ptr;
+ ctx->write_newop_here = *codep; *codep += 2;
+ }
+ actx.cond = 0xe;
+ LDR_PC(actx, dpc | 1);
+}
+
+static void transform_dis_pre_dis(struct transform_dis_ctx *ctx) {
+ /* for simplicity we turn IT into a series of branches for each
+ * instruction, so... */
+ if (ctx->arch.it_conds[0] != 0xe) {
+ ctx->arch.bccrel_bits = invert_arm_cond(ctx->arch.it_conds[0]);
+ ctx->arch.bccrel_p = *ctx->rewritten_ptr_ptr;
+ *ctx->rewritten_ptr_ptr += 2;
+ } else {
+ ctx->arch.bccrel_p = NULL;
+ }
+}
+
+static void transform_dis_post_dis(struct transform_dis_ctx *ctx) {
+ if (ctx->arch.bccrel_p) {
+ struct assemble_ctx actx = {&ctx->arch.bccrel_p,
+ /*thumb*/ true,
+ ctx->arch.bccrel_bits};
+ Bccrel(actx, *ctx->rewritten_ptr_ptr - ctx->arch.bccrel_p);
+ }
+ ctx->force_keep_transforming = ctx->arch.it_conds[0] != 0xe;
+}
diff --git a/lib/arm64/jump-patch.h b/lib/arm64/jump-patch.h
index aa818d3..cc94f90 100644
--- a/lib/arm64/jump-patch.h
+++ b/lib/arm64/jump-patch.h
@@ -1,7 +1,6 @@
#pragma once
#include "arm64/assemble.h"
#define MAX_JUMP_PATCH_SIZE 12
-#define MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */
static inline int jump_patch_size(uintptr_t pc, uintptr_t dpc,
UNUSED struct arch_dis_ctx arch,
bool force) {
diff --git a/lib/arm64/misc.h b/lib/arm64/misc.h
index c7fa5c9..84bd638 100644
--- a/lib/arm64/misc.h
+++ b/lib/arm64/misc.h
@@ -4,4 +4,6 @@
#define TARGET_JUMP_PATCH_HDR "arm64/jump-patch.h"
#define TARGET_TRANSFORM_DIS_HEADER "arm64/transform-dis-arm64.inc.h"
#define MIN_INSN_SIZE 4
+#define TD_MAX_REWRITTEN_SIZE (7 * 2 * 4) /* also conservative */
struct arch_dis_ctx {};
+static inline void arch_dis_ctx_init(UNUSED struct arch_dis_ctx *ctx) {}
diff --git a/lib/arm64/transform-dis-arm64.inc.h b/lib/arm64/transform-dis-arm64.inc.h
index 682613a..af2d4c7 100644
--- a/lib/arm64/transform-dis-arm64.inc.h
+++ b/lib/arm64/transform-dis-arm64.inc.h
@@ -16,3 +16,18 @@ static NOINLINE UNUSED void transform_dis_pcrel(struct transform_dis_ctx *ctx,
}
}
+static NOINLINE UNUSED void transform_dis_branch(struct transform_dis_ctx *ctx,
+ uintptr_t dpc, int cc) {
+#ifdef TRANSFORM_DIS_VERBOSE
+ printf("transform_dis (%p): branch => %p\n", (void *) ctx->pc, (void *) dpc);
+#endif
+ if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) {
+ ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START;
+ return;
+ }
+ /* TODO */
+ (void) cc;
+}
+
+static void transform_dis_pre_dis(UNUSED struct transform_dis_ctx *ctx) {}
+static void transform_dis_post_dis(UNUSED struct transform_dis_ctx *ctx) {}
diff --git a/lib/dis.h b/lib/dis.h
index 216578c..a5af29a 100644
--- a/lib/dis.h
+++ b/lib/dis.h
@@ -116,3 +116,9 @@ static inline void op32(void **codep, uint32_t op) {
*codep += 4;
}
+static inline void op16(void **codep, uint16_t op) {
+ *(uint16_t *) *codep = op;
+ *codep += 2;
+}
+
+#define CC_CONDITIONAL 0x100
diff --git a/lib/hook-functions.c b/lib/hook-functions.c
index 9d34400..986990b 100644
--- a/lib/hook-functions.c
+++ b/lib/hook-functions.c
@@ -151,7 +151,7 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks,
struct hook_internal *hi = &his[i];
void *code = hook->function;
struct arch_dis_ctx arch;
- memset(&arch, 0, sizeof(arch));
+ arch_dis_ctx_init(&arch);
#ifdef __arm__
if ((uintptr_t) code & 1) {
arch.pc_low_bit = true;
@@ -183,9 +183,9 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks,
uintptr_t pc_patch_end = pc_patch_start + patch_size;
/* Generate the rewritten start of the function for the outro
* trampoline (complaining if any bad instructions are found). */
- uint8_t rewritten_temp[MAX_REWRITTEN_SIZE];
+ uint8_t rewritten_temp[TD_MAX_REWRITTEN_SIZE];
void *rp = rewritten_temp;
- if ((ret = transform_dis_main(code, &rp, pc_patch_start, pc_patch_end,
+ if ((ret = transform_dis_main(code, &rp, pc_patch_start, &pc_patch_end,
arch, hi->offset_by_pcdiff)))
goto end;
/* Check some of the rest of the function for jumps back into the
diff --git a/lib/jump-dis.c b/lib/jump-dis.c
index 5e06460..7ba3608 100644
--- a/lib/jump-dis.c
+++ b/lib/jump-dis.c
@@ -116,6 +116,9 @@ static INLINE UNUSED void jump_dis_bad(struct jump_dis_ctx *ctx) {
ctx->continue_after_this_insn = false;
}
+static INLINE UNUSED void jump_dis_thumb_it(UNUSED struct jump_dis_ctx *ctx) {
+}
+
static void jump_dis_dis(struct jump_dis_ctx *ctx);
bool jump_dis_main(void *code_ptr, uintptr_t pc_patch_start, uintptr_t pc_patch_end,
diff --git a/lib/transform-dis.c b/lib/transform-dis.c
index 1e66bd1..3210333 100644
--- a/lib/transform-dis.c
+++ b/lib/transform-dis.c
@@ -3,6 +3,7 @@
#include "substitute.h"
#include "dis.h"
+#include "transform-dis.h"
#include <stdbool.h>
#include <stdint.h>
@@ -14,6 +15,8 @@ struct transform_dis_ctx {
int err;
uintptr_t pc_patch_start;
+ /* this is only tentative - it will be updated to include parts of
+ * instructions poking out, and instructions forced to be transformed by IT */
uintptr_t pc_patch_end;
uintptr_t pc;
int op_size;
@@ -21,6 +24,9 @@ struct transform_dis_ctx {
unsigned newop;
unsigned newval[4];
+ /* for IT - eww */
+ bool force_keep_transforming;
+
const void *ptr;
void **rewritten_ptr_ptr;
void *write_newop_here;
@@ -36,50 +42,46 @@ struct transform_dis_ctx {
/* largely similar to jump_dis */
-static INLINE UNUSED void transform_dis_ret(struct transform_dis_ctx *ctx) {
- /* ret is okay if it's at the end of the patch */
+static INLINE UNUSED
+void transform_dis_ret(struct transform_dis_ctx *ctx) {
+ /* ret is okay if it's at the end of the required patch (past the original
+ * patch size is good too) */
if (ctx->pc + ctx->op_size < ctx->pc_patch_end)
ctx->err = SUBSTITUTE_ERR_FUNC_TOO_SHORT;
}
-static INLINE UNUSED void transform_dis_branch(struct transform_dis_ctx *ctx,
- uintptr_t dpc, UNUSED bool conditional) {
-#ifdef TRANSFORM_DIS_VERBOSE
- printf("transform_dis (%p): branch => %p\n", (void *) ctx->pc, (void *) dpc);
-#endif
- if (dpc >= ctx->pc_patch_start && dpc < ctx->pc_patch_end) {
- /* don't support this for now */
- ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START;
- }
- /* branch out of bounds is fine */
- /* XXX just kidding, the instruction needs to be rewritten obviously. what
- * was I thinking? */
-}
-
-static INLINE UNUSED void transform_dis_unidentified(UNUSED struct transform_dis_ctx *ctx) {
+static INLINE UNUSED
+void transform_dis_unidentified(UNUSED struct transform_dis_ctx *ctx) {
#ifdef TRANSFORM_DIS_VERBOSE
printf("transform_dis (%p): unidentified\n", (void *) ctx->pc);
#endif
/* this isn't exhaustive, so unidentified is fine */
}
-static INLINE UNUSED void transform_dis_bad(struct transform_dis_ctx *ctx) {
+static INLINE UNUSED
+void transform_dis_bad(struct transform_dis_ctx *ctx) {
ctx->err = SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START;
}
+static INLINE UNUSED
+void transform_dis_thumb_it(UNUSED struct transform_dis_ctx *ctx) {
+ /* ignore, since it was turned into B */
+}
static void transform_dis_dis(struct transform_dis_ctx *ctx);
+static void transform_dis_pre_dis(struct transform_dis_ctx *ctx);
+static void transform_dis_post_dis(struct transform_dis_ctx *ctx);
int transform_dis_main(const void *restrict code_ptr,
void **restrict rewritten_ptr_ptr,
uintptr_t pc_patch_start,
- uintptr_t pc_patch_end,
+ uintptr_t *pc_patch_end_p,
struct arch_dis_ctx initial_arch_ctx,
int *offset_by_pcdiff) {
struct transform_dis_ctx ctx;
memset(&ctx, 0, sizeof(ctx));
ctx.pc_patch_start = pc_patch_start;
- ctx.pc_patch_end = pc_patch_end;
+ ctx.pc_patch_end = *pc_patch_end_p;
ctx.pc = pc_patch_start;
ctx.arch = initial_arch_ctx;
/* data is written to rewritten both by this function directly and, in case
@@ -88,12 +90,16 @@ int transform_dis_main(const void *restrict code_ptr,
void *rewritten_start = *rewritten_ptr_ptr;
int written_pcdiff = 0;
offset_by_pcdiff[written_pcdiff++] = 0;
- while (ctx.pc < ctx.pc_patch_end) {
+ while (ctx.pc < ctx.pc_patch_end && !ctx.force_keep_transforming) {
ctx.modify = false;
ctx.err = 0;
ctx.ptr = code_ptr + (ctx.pc - pc_patch_start);
+
+ transform_dis_pre_dis(&ctx);
+
void *rewritten_ptr = *rewritten_ptr_ptr;
ctx.write_newop_here = rewritten_ptr;
+
transform_dis_dis(&ctx);
if (ctx.err)
@@ -111,11 +117,16 @@ int transform_dis_main(const void *restrict code_ptr,
*rewritten_ptr_ptr += ctx.op_size;
}
ctx.pc += ctx.op_size;
+
+ transform_dis_post_dis(&ctx);
+
int pcdiff = ctx.pc - ctx.pc_patch_start;
while (written_pcdiff < pcdiff)
offset_by_pcdiff[written_pcdiff++] = -1;
- offset_by_pcdiff[written_pcdiff++] = (int) (*rewritten_ptr_ptr - rewritten_start);
+ offset_by_pcdiff[written_pcdiff++] =
+ (int) (*rewritten_ptr_ptr - rewritten_start);
}
+ *pc_patch_end_p = ctx.pc;
return SUBSTITUTE_OK;
}
diff --git a/lib/transform-dis.h b/lib/transform-dis.h
index dec38a1..3ab120b 100644
--- a/lib/transform-dis.h
+++ b/lib/transform-dis.h
@@ -5,6 +5,6 @@
int transform_dis_main(const void *restrict code_ptr,
void **restrict rewritten_ptr_ptr,
uintptr_t pc_patch_start,
- uintptr_t pc_patch_end,
+ uintptr_t *pc_patch_end,
struct arch_dis_ctx initial_arch_ctx,
int *offset_by_pcdiff);