aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/darwin/execmem.c432
-rw-r--r--lib/darwin/inject.c44
-rw-r--r--lib/darwin/mach-decls.h22
-rw-r--r--lib/darwin/stop-other-threads.c163
-rw-r--r--lib/darwin/substrate-compat.c2
-rw-r--r--lib/execmem.h21
-rw-r--r--lib/hook-functions.c75
-rw-r--r--lib/stop-other-threads.h9
-rw-r--r--lib/substitute.h9
9 files changed, 466 insertions, 311 deletions
diff --git a/lib/darwin/execmem.c b/lib/darwin/execmem.c
index 76f0643..3048cd8 100644
--- a/lib/darwin/execmem.c
+++ b/lib/darwin/execmem.c
@@ -1,60 +1,39 @@
+/* define to avoid error that ucontext is "deprecated" (it's unavoidable with
+ * sigaction!) */
+#define _XOPEN_SOURCE 700
+#define _DARWIN_C_SOURCE
+#include "cbit/htab.h"
#include "execmem.h"
-#include "darwin/manual-syscall.h"
+/* #include "darwin/manual-syscall.h" */
+#include "darwin/mach-decls.h"
#include "substitute.h"
+#include "substitute-internal.h"
#include <mach/mach.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <errno.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <signal.h>
-int execmem_write(void *dest, const void *src, size_t len) {
- /* Use vm_region to determine the original protection, so we can mprotect
- * it back afterwards. (Note: PROT_* are equal to VM_PROT_*.) */
- vm_address_t region = (vm_address_t) dest;
- vm_size_t region_len = 0;
- struct vm_region_submap_short_info_64 info;
- mach_msg_type_number_t info_count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
- natural_t max_depth = 99999;
- kern_return_t kr = vm_region_recurse_64(mach_task_self(), &region, &region_len,
- &max_depth,
- (vm_region_recurse_info_t) &info,
- &info_count);
- if (kr) {
- /* Weird; this probably means the region doesn't exist, but we should
- * have already read from the memory in order to generate the patch. */
- errno = 0;
- return SUBSTITUTE_ERR_VM;
- }
+#define port_hash(portp) (*(portp))
+#define port_eq(port1p, port2p) (*(port1p) == *(port2p))
+#define port_null(portp) (*(portp) == MACH_PORT_NULL)
+DECL_STATIC_HTAB_KEY(mach_port_t, mach_port_t, port_hash, port_eq, port_null, 0);
+struct empty {};
+DECL_HTAB(mach_port_set, mach_port_t, struct empty);
- uintptr_t lopage = (uintptr_t) dest & ~PAGE_MASK;
- uintptr_t hipage = ((uintptr_t) dest + len + PAGE_MASK) & ~PAGE_MASK;
-
- /* We do the syscall manually just in case the user is trying to write to
+ /* ORPHAN: We do the syscall manually just in case the user is trying to write to
* the mprotect syscall stub itself, or one of the functions it calls.
* (Obviously, it will still break if the user targets some libsubstitute
* function within the same page as this one, though.) */
- int ret = manual_syscall(SYS_mprotect, lopage, hipage - lopage,
- PROT_READ | PROT_WRITE, 0);
- if (ret) {
- errno = ret;
- return SUBSTITUTE_ERR_VM;
- }
- /* volatile to avoid compiler transformation to call to memcpy */
- volatile uint8_t *d8 = dest;
- const uint8_t *s8 = src;
- while (len--)
- *d8++ = *s8++;
-
- int oldprot = info.protection & (PROT_READ | PROT_WRITE | PROT_EXEC);
- ret = manual_syscall(SYS_mprotect, lopage, hipage - lopage,
- oldprot, 0);
- if (ret) {
- errno = ret;
- return SUBSTITUTE_ERR_VM;
- }
- return SUBSTITUTE_OK;
-}
+/* This should only run on the main thread, so just use globals. */
+static HTAB_STORAGE(mach_port_set) g_suspended_ports;
+static struct sigaction old_segv, old_bus;
+static execmem_pc_patch_callback g_pc_patch_callback;
+static void *g_pc_patch_callback_ctx;
int execmem_alloc_unsealed(uintptr_t hint, void **page_p, size_t *size_p) {
*size_p = PAGE_SIZE;
@@ -74,3 +53,368 @@ int execmem_seal(void *page) {
void execmem_free(void *page) {
munmap(page, PAGE_SIZE);
}
+
+#if defined(__x86_64__)
+ typedef struct __darwin_x86_thread_state64 native_thread_state;
+ #define NATIVE_THREAD_STATE_FLAVOR x86_THREAD_STATE64
+#elif defined(__i386__)
+ typedef struct __darwin_i386_thread_state native_thread_state;
+ #define NATIVE_THREAD_STATE_FLAVOR x86_THREAD_STATE32
+#elif defined(__arm__)
+ typedef struct __darwin_arm_thread_state native_thread_state;
+ #define NATIVE_THREAD_STATE_FLAVOR ARM_THREAD_STATE
+#elif defined(__arm64__)
+ typedef struct __darwin_arm_thread_state64 native_thread_state;
+ #define NATIVE_THREAD_STATE_FLAVOR ARM_THREAD_STATE64
+#else
+ #error ?
+#endif
+
+/* returns whether it changed */
+static bool apply_one_pcp_with_state(native_thread_state *state,
+ execmem_pc_patch_callback callback,
+ void *ctx) {
+
+ uintptr_t *pcp;
+#if defined(__x86_64__)
+ pcp = (uintptr_t *) &state->__rip;
+#elif defined(__i386__)
+ pcp = (uintptr_t *) &state->__eip;
+#elif defined(__arm__) || defined(__arm64__)
+ pcp = (uintptr_t *) &state->__pc;
+#endif
+ uintptr_t old = *pcp;
+#ifdef __arm__
+ /* thumb */
+ if (state.cpsr & 0x20)
+ old |= 1;
+#endif
+ uintptr_t new = callback(ctx, *pcp);
+ bool changed = new != old;
+ *pcp = new;
+#ifdef __arm__
+ *pcp &= ~1;
+ state.cpsr = (state.cpsr & ~0x20) | ((new & 1) * 0x20);
+#endif
+ return changed;
+}
+
+static int apply_one_pcp(mach_port_t thread, execmem_pc_patch_callback callback,
+ void *ctx) {
+ native_thread_state state;
+ mach_msg_type_number_t real_cnt = sizeof(state) / sizeof(int);
+ mach_msg_type_number_t cnt = real_cnt;
+ kern_return_t kr = thread_get_state(thread, NATIVE_THREAD_STATE_FLAVOR,
+ (thread_state_t) &state, &cnt);
+ if (kr == KERN_TERMINATED)
+ return SUBSTITUTE_OK;
+ if (kr || cnt != real_cnt)
+ return SUBSTITUTE_ERR_ADJUSTING_THREADS;;
+
+ if (apply_one_pcp_with_state(&state, callback, ctx)) {
+ kr = thread_set_state(thread, NATIVE_THREAD_STATE_FLAVOR,
+ (thread_state_t) &state, real_cnt);
+ if (kr)
+ return SUBSTITUTE_ERR_ADJUSTING_THREADS;
+ }
+ return SUBSTITUTE_OK;
+}
+
+static void resume_other_threads();
+
+static int stop_other_threads() {
+ /* pthread_main should have already been checked. */
+
+ int ret;
+ mach_port_t self = mach_thread_self();
+
+ /* The following shenanigans are for catching any new threads that are
+ * created while we're looping, without suspending anything twice. Keep
+ * looping until only threads we already suspended before this loop are
+ * there. */
+ HTAB_STORAGE_INIT(&g_suspended_ports, mach_port_set);
+ struct htab_mach_port_set *suspended_set = &g_suspended_ports.h;
+
+ bool got_new;
+ do {
+ got_new = false;
+
+ thread_act_port_array_t ports;
+ mach_msg_type_number_t nports;
+
+ kern_return_t kr = task_threads(mach_task_self(), &ports, &nports);
+ if (kr) { /* ouch */
+ ret = SUBSTITUTE_ERR_ADJUSTING_THREADS;
+ goto fail;
+ }
+
+ for (mach_msg_type_number_t i = 0; i < nports; i++) {
+ mach_port_t port = ports[i];
+ struct htab_bucket_mach_port_set *bucket;
+ if (port == self ||
+ (bucket = htab_setbucket_mach_port_set(suspended_set, &port),
+ bucket->key)) {
+ /* already suspended, ignore */
+ mach_port_deallocate(mach_task_self(), port);
+ } else {
+ got_new = true;
+ kr = thread_suspend(port);
+ if (kr == KERN_TERMINATED) {
+ /* too late */
+ mach_port_deallocate(mach_task_self(), port);
+ } else if (kr) {
+ ret = SUBSTITUTE_ERR_ADJUSTING_THREADS;
+ for (; i < nports; i++)
+ mach_port_deallocate(mach_task_self(), ports[i]);
+ vm_deallocate(mach_task_self(), (vm_address_t) ports,
+ nports * sizeof(*ports));
+ goto fail;
+ }
+ bucket->key = port;
+ }
+ }
+ vm_deallocate(mach_task_self(), (vm_address_t) ports,
+ nports * sizeof(*ports));
+ } while(got_new);
+
+ /* Success - keep the set around for when we're done. */
+ return SUBSTITUTE_OK;
+
+fail:
+ resume_other_threads();
+ return ret;
+}
+
+static void resume_other_threads() {
+ struct htab_mach_port_set *suspended_set = &g_suspended_ports.h;
+ HTAB_FOREACH(suspended_set, mach_port_t *threadp,
+ UNUSED struct empty *_,
+ mach_port_set) {
+ thread_resume(*threadp);
+ mach_port_deallocate(mach_task_self(), *threadp);
+ }
+ htab_free_storage_mach_port_set(suspended_set);
+}
+
+static void segfault_handler(UNUSED int sig, UNUSED siginfo_t *info,
+ void *uap_) {
+ /* We didn't catch it before it segfaulted so have to fix it up here. */
+ ucontext_t *uap = uap_;
+ apply_one_pcp_with_state(&uap->uc_mcontext->__ss, g_pc_patch_callback,
+ g_pc_patch_callback_ctx);
+ /* just let it continue, whatever */
+}
+
+static int init_pc_patch(execmem_pc_patch_callback callback, void *ctx) {
+ g_pc_patch_callback = callback;
+ g_pc_patch_callback_ctx = ctx;
+ int ret;
+ if ((ret = stop_other_threads()))
+ return ret;
+
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = segfault_handler;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART | SA_NODEFER | SA_SIGINFO;
+
+ if (sigaction(SIGSEGV, &sa, &old_segv))
+ return SUBSTITUTE_ERR_ADJUSTING_THREADS;
+ if (sigaction(SIGBUS, &sa, &old_bus)) {
+ sigaction(SIGSEGV, &old_segv, NULL);
+ return SUBSTITUTE_ERR_ADJUSTING_THREADS;
+ }
+ return SUBSTITUTE_OK;
+}
+
+static int run_pc_patch() {
+ int ret;
+
+ struct htab_mach_port_set *suspended_set = &g_suspended_ports.h;
+ HTAB_FOREACH(suspended_set, mach_port_t *threadp,
+ UNUSED struct empty *_,
+ mach_port_set) {
+ if ((ret = apply_one_pcp(*threadp, g_pc_patch_callback,
+ g_pc_patch_callback_ctx)))
+ return ret;
+ }
+
+ return SUBSTITUTE_OK;
+}
+
+static int finish_pc_patch() {
+ if (sigaction(SIGBUS, &old_bus, NULL) ||
+ sigaction(SIGSEGV, &old_segv, NULL))
+ return SUBSTITUTE_ERR_ADJUSTING_THREADS;
+
+ resume_other_threads();
+ return SUBSTITUTE_OK;
+}
+
+static int compare_dsts(const void *a, const void *b) {
+ void *dst_a = ((struct execmem_foreign_write *) a)->dst;
+ void *dst_b = ((struct execmem_foreign_write *) b)->dst;
+ return dst_a < dst_b ? -1 : dst_a > dst_b ? 1 : 0;
+}
+
+static kern_return_t get_page_prot(uintptr_t ptr, vm_prot_t *prot,
+ vm_inherit_t *inherit) {
+
+ vm_address_t region = (vm_address_t) ptr;
+ vm_size_t region_len = 0;
+ struct vm_region_submap_short_info_64 info;
+ mach_msg_type_number_t info_count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
+ natural_t max_depth = 99999;
+ kern_return_t kr = vm_region_recurse_64(mach_task_self(), &region, &region_len,
+ &max_depth,
+ (vm_region_recurse_info_t) &info,
+ &info_count);
+ *prot = info.protection & (PROT_READ | PROT_WRITE | PROT_EXEC);
+ *inherit = info.inheritance;
+ return kr;
+}
+
+static void manual_memcpy(void *restrict dest, const void *src, size_t len) {
+ /* volatile to avoid compiler transformation to call to memcpy */
+ volatile uint8_t *d8 = dest;
+ const uint8_t *s8 = src;
+ while (len--)
+ *d8++ = *s8++;
+}
+
+int execmem_foreign_write_with_pc_patch(struct execmem_foreign_write *writes,
+ size_t nwrites,
+ execmem_pc_patch_callback callback,
+ void *callback_ctx) {
+ int ret;
+
+ qsort(writes, nwrites, sizeof(*writes), compare_dsts);
+
+ size_t last;
+ for (size_t first = 0; first < nwrites; first = last + 1) {
+ const struct execmem_foreign_write *first_write = &writes[first];
+ uintptr_t page_start = (uintptr_t) first_write->dst & ~PAGE_MASK;
+ uintptr_t page_end = ((uintptr_t) first_write->dst +
+ first_write->len - 1) & ~PAGE_MASK;
+
+ last = first;
+ while (last + 1 < nwrites) {
+ uintptr_t this_start = (uintptr_t) first_write->dst & ~PAGE_MASK;
+ uintptr_t this_end = ((uintptr_t) first_write->dst +
+ first_write->len - 1) & ~PAGE_MASK;
+ if (page_start <= this_start && this_start <= page_end) {
+ if (this_end > page_end)
+ page_end = this_end;
+ } else if (page_start <= this_end && this_end <= page_end) {
+ if (this_start < page_start)
+ page_start = this_start;
+ } else {
+ break;
+ }
+ last++;
+ }
+ size_t len = page_end - page_start + PAGE_SIZE;
+
+ vm_prot_t prot;
+ vm_inherit_t inherit;
+ /* Assume that a single patch region will be pages of all the same
+ * protection, since the alternative is probably someone doing
+ * something wrong. */
+ kern_return_t kr = get_page_prot(page_start, &prot, &inherit);
+ if (kr) {
+ /* Weird; this probably means the region doesn't exist, but we should
+ * have already read from the memory in order to generate the patch. */
+ return SUBSTITUTE_ERR_VM;
+ }
+ /* Instead of trying to set the existing region to write, which may
+ * fail due to max_protection, we make a fresh copy and remap it over
+ * the original. */
+ void *new = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_SHARED, -1, 0);
+ if (new == MAP_FAILED)
+ return SUBSTITUTE_ERR_VM;
+ /* Ideally, if the original page wasn't mapped anywhere else, no actual
+ * copy will take place: new will be CoW, then we unmap the original so
+ * new becomes the sole owner before actually writing. Though, for all
+ * I know, these trips through the VM system could be slower than just
+ * memcpying a page or two... */
+ kr = vm_copy(mach_task_self(), page_start, len, (vm_address_t) new);
+ if (kr) {
+ ret = SUBSTITUTE_ERR_VM;
+ goto fail_unmap;
+ }
+ if (callback) {
+ /* Set the segfault handler - stopping all other threads before
+ * doing so in case they were using it for something (this
+ * happens). One might think the latter makes segfaults
+ * impossible, but we can't prevent injectors from making new
+ * threads that might run during this process. Hopefully no
+ * *injected* threads try to use segfault handlers for something!
+ */
+ if ((ret = init_pc_patch(callback, callback_ctx)))
+ goto fail_unmap;
+ }
+ /* Disable access to the page so anyone trying to execute there
+ * will segfault. */
+ if (mmap(NULL, len, PROT_NONE, MAP_ANON | MAP_SHARED, -1, 0)
+ == MAP_FAILED) {
+ ret = SUBSTITUTE_ERR_VM;
+ goto fail_unmap;
+ }
+ /* Write patches to the copy. */
+ for (size_t i = first; i <= last; i++) {
+ struct execmem_foreign_write *write = &writes[i];
+ ptrdiff_t off = (uintptr_t) write->dst - page_start;
+ manual_memcpy(new + off, write->src, write->len);
+ }
+ if (callback) {
+ /* Actually run the callback for any threads which are paused at an
+ * affected PC, or are running and don't get scheduled by the
+ * kernel in time to segfault. Any thread which moves to an
+ * affected PC *after* run_pc_patch() is assumed to do so by
+ * calling the function in question, so they can't get past the
+ * first instruction and it doesn't matter whether or not they're
+ * patched. (A call instruction within the affected region would
+ * break this assumption, as then a thread could move to an
+ * affected PC by returning. */
+ if ((ret = run_pc_patch()))
+ goto fail_unmap;
+ }
+
+ /* Protect new like the original, and move it into place. */
+ vm_address_t target = page_start;
+ if (mprotect(new, len, prot)) {
+ ret = SUBSTITUTE_ERR_VM;
+ goto fail_unmap;
+ }
+ vm_prot_t c, m;
+ kr = vm_remap(mach_task_self(), &target, len, 0, VM_FLAGS_OVERWRITE,
+ mach_task_self(), (vm_address_t) new, /*copy*/ FALSE,
+ &c, &m, inherit);
+ if (kr) {
+ ret = SUBSTITUTE_ERR_VM;
+ goto fail_unmap;
+ }
+ /* ignore errors... */
+ munmap(new, len);
+ if (callback) {
+ /* Other threads are no longer in danger of segfaulting, so put
+ * back the old setfault handler. */
+ if ((ret = finish_pc_patch()))
+ return ret;
+ }
+
+ continue;
+
+ fail_unmap:
+ /* This is probably useless, since the original page is gone
+ * forever (intentionally, see above). May as well arrange the
+ * deck chairs, though. */
+ munmap(new, PAGE_SIZE);
+ return ret;
+ }
+
+ /* Shockingly, we made it out! */
+ return SUBSTITUTE_OK;
+}
+
diff --git a/lib/darwin/inject.c b/lib/darwin/inject.c
index 6ec07fa..6e0c50e 100644
--- a/lib/darwin/inject.c
+++ b/lib/darwin/inject.c
@@ -646,10 +646,14 @@ int substitute_dlopen_in_pid(int pid, const char *filename, int options,
goto fail;
union {
- struct _x86_thread_state_32 x32;
- struct _x86_thread_state_64 x64;
- struct _arm_thread_state_32 a32;
- struct _arm_thread_state_64 a64;
+#if defined(__x86_64__) || defined(__i386__)
+ struct __darwin_i386_thread_state x32;
+ struct __darwin_x86_thread_state64 x64;
+#endif
+#if defined(__arm__) || defined(__arm64__)
+ struct __darwin_arm_thread_state a32;
+ struct __darwin_arm_thread_state64 a64;
+#endif
} u;
size_t state_size;
thread_state_flavor_t flavor;
@@ -658,34 +662,34 @@ int substitute_dlopen_in_pid(int pid, const char *filename, int options,
switch (cputype) {
#if defined(__x86_64__) || defined(__i386__)
case CPU_TYPE_X86_64:
- u.x64.rsp = target_stack_top;
- u.x64.rdi = target_stack_top;
- u.x64.rip = target_code_page + (inject_start_x86_64 - inject_page_start);
+ u.x64.__rsp = target_stack_top;
+ u.x64.__rdi = target_stack_top;
+ u.x64.__rip = target_code_page + (inject_start_x86_64 - inject_page_start);
state_size = sizeof(u.x64);
- flavor = _x86_thread_state_64_flavor;
+ flavor = x86_THREAD_STATE64;
break;
case CPU_TYPE_I386:
- u.x32.esp = target_stack_top;
- u.x32.ecx = target_stack_top;
- u.x32.eip = target_code_page + (inject_start_i386 - inject_page_start);
+ u.x32.__esp = target_stack_top;
+ u.x32.__ecx = target_stack_top;
+ u.x32.__eip = target_code_page + (inject_start_i386 - inject_page_start);
state_size = sizeof(u.x32);
- flavor = _x86_thread_state_32_flavor;
+ flavor = x86_THREAD_STATE32;
break;
#endif
#if defined(__arm__) || defined(__arm64__)
case CPU_TYPE_ARM:
- u.a32.sp = target_stack_top;
- u.a32.r[0] = target_stack_top;
- u.a32.pc = target_code_page + (inject_start_arm - inject_page_start);
+ u.a32.__sp = target_stack_top;
+ u.a32.__r[0] = target_stack_top;
+ u.a32.__pc = target_code_page + (inject_start_arm - inject_page_start);
state_size = sizeof(u.a32);
- flavor = _arm_thread_state_32_flavor;
+ flavor = ARM_THREAD_STATE;
break;
case CPU_TYPE_ARM64:
- u.a64.sp = target_stack_top;
- u.a64.x[0] = target_stack_top;
- u.a64.pc = target_code_page + (inject_start_arm64 - inject_page_start);
+ u.a64.__sp = target_stack_top;
+ u.a64.__x[0] = target_stack_top;
+ u.a64.__pc = target_code_page + (inject_start_arm64 - inject_page_start);
state_size = sizeof(u.a64);
- flavor = _arm_thread_state_64_flavor;
+ flavor = ARM_THREAD_STATE64;
break;
#endif
default:
diff --git a/lib/darwin/mach-decls.h b/lib/darwin/mach-decls.h
index 29ea908..b1c7af6 100644
--- a/lib/darwin/mach-decls.h
+++ b/lib/darwin/mach-decls.h
@@ -1,26 +1,6 @@
#pragma once
#include <stdint.h>
-
-struct _x86_thread_state_32 {
- uint32_t eax, ebx, ecx, edx, edi, esi, ebp, esp;
- uint32_t ss, eflags, eip, cs, ds, es, fs, gs;
-};
-#define _x86_thread_state_32_flavor 1
-struct _x86_thread_state_64 {
- uint64_t rax, rbx, rcx, rdx, rdi, rsi, rbp, rsp;
- uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
- uint64_t rip, rflags, cs, fs, gs;
-};
-#define _x86_thread_state_64_flavor 4
-struct _arm_thread_state_32 {
- uint32_t r[13], sp, lr, pc, cpsr;
-};
-#define _arm_thread_state_32_flavor 9
-struct _arm_thread_state_64 {
- uint64_t x[29], fp, lr, sp, pc;
- uint32_t cpsr, pad;
-};
-#define _arm_thread_state_64_flavor 6
+#include <mach/mach.h>
kern_return_t mach_vm_read_overwrite(vm_map_t, mach_vm_address_t, mach_vm_size_t, mach_vm_address_t, mach_vm_size_t *);
kern_return_t mach_vm_remap(vm_map_t, mach_vm_address_t *, mach_vm_size_t, mach_vm_offset_t, int, vm_map_t, mach_vm_address_t, boolean_t, vm_prot_t *, vm_prot_t *, vm_inherit_t);
diff --git a/lib/darwin/stop-other-threads.c b/lib/darwin/stop-other-threads.c
deleted file mode 100644
index ff239f3..0000000
--- a/lib/darwin/stop-other-threads.c
+++ /dev/null
@@ -1,163 +0,0 @@
-#include "substitute.h"
-#include "substitute-internal.h"
-#include "darwin/mach-decls.h"
-#include "stop-other-threads.h"
-#include "cbit/htab.h"
-#include <pthread.h>
-#include <mach/mach.h>
-
-#define port_hash(portp) (*(portp))
-#define port_eq(port1p, port2p) (*(port1p) == *(port2p))
-#define port_null(portp) (*(portp) == MACH_PORT_NULL)
-DECL_STATIC_HTAB_KEY(mach_port_t, mach_port_t, port_hash, port_eq, port_null, 0);
-struct empty {};
-DECL_HTAB(mach_port_set, mach_port_t, struct empty);
-
-static bool apply_one_pcp(mach_port_t thread,
- uintptr_t (*callback)(void *ctx, uintptr_t pc),
- void *ctx) {
- int flavor;
-#if defined(__x86_64__)
- struct _x86_thread_state_64 state;
- flavor = _x86_thread_state_64_flavor;
-#elif defined(__i386__)
- struct _x86_thread_state_32 state;
- flavor = _x86_thread_state_32_flavor;
-#elif defined(__arm__)
- struct _arm_thread_state_32 state;
- flavor = _arm_thread_state_32_flavor;
-#elif defined(__arm64__)
- struct _arm_thread_state_64 state;
- flavor = _arm_thread_state_64_flavor;
-#else
- #error ?
-#endif
-
- mach_msg_type_number_t real_cnt = sizeof(state) / sizeof(int);
- mach_msg_type_number_t cnt = real_cnt;
- kern_return_t kr = thread_get_state(thread, flavor, (thread_state_t) &state, &cnt);
- if (kr || cnt != real_cnt)
- return false;
-
- uintptr_t *pcp;
-#if defined(__x86_64__)
- pcp = (uintptr_t *) &state.rip;
-#elif defined(__i386__)
- pcp = (uintptr_t *) &state.eip;
-#elif defined(__arm__) || defined(__arm64__)
- pcp = (uintptr_t *) &state.pc;
-#endif
- uintptr_t old = *pcp;
-#ifdef __arm__
- /* thumb */
- if (state.cpsr & 0x20)
- old |= 1;
-#endif
- uintptr_t new = callback(ctx, *pcp);
- if (new != old) {
- *pcp = new;
-#ifdef __arm__
- *pcp &= ~1;
- state.cpsr = (state.cpsr & ~0x20) | ((new & 1) * 0x20);
-#endif
- kr = thread_set_state(thread, flavor, (thread_state_t) &state, real_cnt);
- if (kr)
- return false;
- }
- return true;
-}
-
-int stop_other_threads(void **token_ptr) {
- if (!pthread_main_np())
- return SUBSTITUTE_ERR_NOT_ON_MAIN_THREAD;
-
- int ret;
- mach_port_t self = mach_thread_self();
-
- /* The following shenanigans are for catching any new threads that are
- * created while we're looping, without suspending anything twice. Keep
- * looping until only threads we already suspended before this loop are
- * there. */
- HTAB_STORAGE(mach_port_set) *hs = malloc(sizeof(*hs));
- HTAB_STORAGE_INIT(hs, mach_port_set);
- struct htab_mach_port_set *suspended_set = &hs->h;
-
- thread_act_array_t ports = 0;
- mach_msg_type_number_t nports = 0;
-
- bool got_new = true;
- while (got_new) {
- got_new = false;
-
- kern_return_t kr = task_threads(mach_task_self(), &ports, &nports);
- if (kr) { /* ouch */
- ret = SUBSTITUTE_ERR_ADJUSTING_THREADS;
- goto fail;
- }
-
- for (mach_msg_type_number_t i = 0; i < nports; i++) {
- mach_port_t port = ports[i];
- struct htab_bucket_mach_port_set *bucket;
- if (port == self ||
- (bucket = htab_setbucket_mach_port_set(suspended_set, &port),
- bucket->key)) {
- /* already suspended, ignore */
- mach_port_deallocate(mach_task_self(), port);
- } else {
- got_new = true;
- kr = thread_suspend(port);
- if (kr == KERN_TERMINATED) {
- /* too late */
- mach_port_deallocate(mach_task_self(), port);
- } else if (kr) {
- ret = SUBSTITUTE_ERR_ADJUSTING_THREADS;
- for (; i < nports; i++)
- mach_port_deallocate(mach_task_self(), ports[i]);
- vm_deallocate(mach_task_self(), (vm_address_t) ports,
- nports * sizeof(*ports));
- goto fail;
- }
- bucket->key = port;
- }
- }
- vm_deallocate(mach_task_self(), (vm_address_t) ports,
- nports * sizeof(*ports));
- }
-
- /* Success - keep the set around for when we're done. */
- *token_ptr = suspended_set;
- return SUBSTITUTE_OK;
-
-fail:
- resume_other_threads(suspended_set);
- return ret;
-}
-
-int apply_pc_patch_callback(void *token,
- uintptr_t (*pc_patch_callback)(void *ctx, uintptr_t pc),
- void *ctx) {
- struct htab_mach_port_set *suspended_set = token;
- int ret = SUBSTITUTE_OK;
- HTAB_FOREACH(suspended_set, mach_port_t *threadp,
- UNUSED struct empty *_,
- mach_port_set) {
- if (!apply_one_pcp(*threadp, pc_patch_callback, ctx)) {
- ret = SUBSTITUTE_ERR_ADJUSTING_THREADS;
- break;
- }
- }
- return ret;
-}
-
-int resume_other_threads(void *token) {
- struct htab_mach_port_set *suspended_set = token;
- HTAB_FOREACH(suspended_set, mach_port_t *threadp,
- UNUSED struct empty *_,
- mach_port_set) {
- thread_resume(*threadp);
- mach_port_deallocate(mach_task_self(), *threadp);
- }
- htab_free_storage_mach_port_set(suspended_set);
- free(suspended_set);
- return SUBSTITUTE_OK; /* eh */
-}
diff --git a/lib/darwin/substrate-compat.c b/lib/darwin/substrate-compat.c
index 2746795..2cdcf6f 100644
--- a/lib/darwin/substrate-compat.c
+++ b/lib/darwin/substrate-compat.c
@@ -43,7 +43,7 @@ EXPORT
void SubHookFunction(void *symbol, void *replace, void **result) __asm__("SubHookFunction");
void SubHookFunction(void *symbol, void *replace, void **result) {
struct substitute_function_hook hook = {symbol, replace, result};
- int ret = substitute_hook_functions(&hook, 1, SUBSTITUTE_DONT_STOP_THREADS);
+ int ret = substitute_hook_functions(&hook, 1, SUBSTITUTE_NO_THREAD_SAFETY);
if (ret) {
panic("SubHookFunction: substitute_hook_functions returned %s\n",
substitute_strerror(ret));
diff --git a/lib/execmem.h b/lib/execmem.h
index b4860e9..895769d 100644
--- a/lib/execmem.h
+++ b/lib/execmem.h
@@ -1,9 +1,22 @@
#pragma once
-#include <stdlib.h>
-/* Write to a foreign page which is already RX / with unknown permissions. */
-int execmem_write(void *dest, const void *src, size_t len);
-
+#include <sys/types.h>
/* For allocating trampolines - this is just a mmap wrapper. */
int execmem_alloc_unsealed(uintptr_t hint, void **page_p, size_t *size_p);
int execmem_seal(void *page);
void execmem_free(void *page);
+
+/* Write to foreign pages which are already RX or have unknown permissions.
+ * If callback is not NULL, run it on all other threads 'atomically', in the
+ * sense that it will be called on any thread which executed any of the old
+ * instructions in the write region.
+ * Oh, and it might mutate writes (to sort it). */
+struct execmem_foreign_write {
+ void *dst;
+ const void *src;
+ size_t len;
+};
+typedef uintptr_t (*execmem_pc_patch_callback)(void *ctx, uintptr_t pc);
+int execmem_foreign_write_with_pc_patch(struct execmem_foreign_write *writes,
+ size_t nwrites,
+ execmem_pc_patch_callback callback,
+ void *callback_ctx);
diff --git a/lib/hook-functions.c b/lib/hook-functions.c
index 7db06d4..5d1f1d5 100644
--- a/lib/hook-functions.c
+++ b/lib/hook-functions.c
@@ -4,8 +4,8 @@
#include "jump-dis.h"
#include "transform-dis.h"
#include "execmem.h"
-#include "stop-other-threads.h"
#include stringify(TARGET_DIR/jump-patch.h)
+#include <pthread.h>
struct hook_internal {
int offset_by_pcdiff[MAX_JUMP_PATCH_SIZE + 1];
@@ -16,6 +16,7 @@ struct hook_internal {
/* page allocated with execmem_alloc_unsealed - only if we had to allocate
* one when processing this hook */
void *trampoline_page;
+ struct arch_dis_ctx arch_dis_ctx;
};
struct pc_callback_info {
@@ -125,22 +126,21 @@ skip_after:;
EXPORT
int substitute_hook_functions(const struct substitute_function_hook *hooks,
size_t nhooks, int options) {
- struct hook_internal *his = malloc(nhooks * sizeof(*his));
+ bool thread_safe = !(options & SUBSTITUTE_NO_THREAD_SAFETY);
+ if (thread_safe && !pthread_main_np())
+ return SUBSTITUTE_ERR_NOT_ON_MAIN_THREAD;
+
+ struct execmem_foreign_write *fws;
+ struct hook_internal *his = malloc(nhooks * sizeof(*his) +
+ nhooks + sizeof(*fws));
if (!his)
return SUBSTITUTE_ERR_OOM;
+ fws = (void *) (his + nhooks);
for (size_t i = 0; i < nhooks; i++)
his[i].trampoline_page = NULL;
int ret = SUBSTITUTE_OK;
- ssize_t emw_finished_i = -1;
- bool stopped = false;
- void *stop_token;
- if (!(options & SUBSTITUTE_DONT_STOP_THREADS)) {
- if ((ret = stop_other_threads(&stop_token)))
- goto end;
- stopped = true;
- }
void *trampoline_ptr = NULL;
size_t trampoline_size_left = 0;
@@ -160,6 +160,7 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks,
}
#endif
hi->code = code;
+ hi->arch_dis_ctx = arch;
uintptr_t pc_patch_start = (uintptr_t) code;
int patch_size;
bool need_intro_trampoline;
@@ -201,6 +202,7 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks,
}
hi->outro_trampoline = trampoline_ptr;
+ *(void **) hook->old_ptr = hi->outro_trampoline;
uintptr_t dpc = pc_patch_end;
#ifdef __arm__
if (arch.pc_low_bit) {
@@ -229,49 +231,34 @@ int substitute_hook_functions(const struct substitute_function_hook *hooks,
/* Now commit. */
for (size_t i = 0; i < nhooks; i++) {
- const struct substitute_function_hook *hook = &hooks[i];
struct hook_internal *hi = &his[i];
- emw_finished_i = (ssize_t) i;
- if ((ret = execmem_write(hi->code, hi->jump_patch, hi->jump_patch_size))) {
- /* User is probably screwed, since this probably means a failure to
- * re-protect exec, thanks to code signing, so now the function is
- * permanently inaccessible. */
- goto end;
- }
- if (hook->old_ptr)
- *(void **) hook->old_ptr = hi->outro_trampoline;
+ void *page = hi->trampoline_page;
+ if (page)
+ execmem_seal(page);
+ fws[i].dst = hi->code;
+ fws[i].src = hi->jump_patch;
+ fws[i].len = hi->jump_patch_size;
}
- /* *sigh of relief* now we can rewrite the PCs. */
- if (stopped) {
- struct pc_callback_info info = {his, nhooks, false};
- if ((ret = apply_pc_patch_callback(stop_token, pc_callback, &info)))
- goto end;
- if (info.encountered_bad_pc) {
- ret = SUBSTITUTE_ERR_UNEXPECTED_PC_ON_OTHER_THREAD;
- goto end;
- }
+ struct pc_callback_info info = {his, nhooks, false};
+ if ((ret = execmem_foreign_write_with_pc_patch(
+ fws, nhooks, thread_safe ? pc_callback : NULL, &info))) {
+ /* Too late to free the trampolines. Chances are this is fatal anyway. */
+ goto end_dont_free;
+ }
+ if (info.encountered_bad_pc) {
+ ret = SUBSTITUTE_ERR_UNEXPECTED_PC_ON_OTHER_THREAD;
+ goto end_dont_free;
}
end:
+ /* if we failed, get rid of the trampolines. */
for (size_t i = 0; i < nhooks; i++) {
void *page = his[i].trampoline_page;
- if (page) {
- /* if we failed, get rid of the trampolines. if we succeeded, make
- * them executable */
- if (ret && (ssize_t) i >= emw_finished_i) {
- execmem_free(page);
- } else {
- /* we already patched them all, too late to go back.. */
- ret = execmem_seal(page);
- }
- }
- }
- if (stopped) {
- int r2 = resume_other_threads(stop_token);
- if (!ret)
- ret = r2;
+ if (page)
+ execmem_free(page);
}
+end_dont_free:
free(his);
return ret;
}
diff --git a/lib/stop-other-threads.h b/lib/stop-other-threads.h
deleted file mode 100644
index 1f6e639..0000000
--- a/lib/stop-other-threads.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#pragma once
-#include <stdint.h>
-
-/* Stop the world; return token to be used for applying PC patches and resuming. */
-int stop_other_threads(void **token_ptr);
-int apply_pc_patch_callback(void *token,
- uintptr_t (*pc_patch_callback)(void *ctx, uintptr_t pc),
- void *ctx);
-int resume_other_threads(void *token);
diff --git a/lib/substitute.h b/lib/substitute.h
index 8764bcf..2045c3d 100644
--- a/lib/substitute.h
+++ b/lib/substitute.h
@@ -37,9 +37,8 @@ enum {
/* out of memory */
SUBSTITUTE_ERR_OOM,
- /* substitute_hook_functions: mmap or mprotect failure other than ENOMEM
- * (preserved in errno on return)
- * substitute_hook_functions: vm_region failure (errno = 0)
+ /* substitute_hook_functions: mmap, mprotect, vm_copy, or
+ * vm_remap failure
* substitute_hook_objc_message: vm_remap failure
* Most likely to come up with substitute_hook_functions if the kernel is
* preventing pages from being marked executable. */
@@ -48,7 +47,7 @@ enum {
/* substitute_hook_functions: not on the main thread (so stopping all other
* threads would be unsafe, as concurrent attempts to do the same from
* other threads would result in deadlock), and you did not pass
- * SUBSTITUTE_DONT_STOP_THREADS */
+ * SUBSTITUTE_NO_THREAD_SAFETY */
SUBSTITUTE_ERR_NOT_ON_MAIN_THREAD,
/* substitute_hook_functions: when trying to patch the PC of other threads
@@ -86,7 +85,7 @@ const char *substitute_strerror(int err);
/* substitute_hook_functions options */
enum {
- SUBSTITUTE_DONT_STOP_THREADS = 1,
+ SUBSTITUTE_NO_THREAD_SAFETY = 1,
};
/* TODO doc */