aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile1
-rw-r--r--lib/dis-arm.inc.h2
-rw-r--r--lib/dis-thumb2.inc.h2
-rw-r--r--lib/dis.h3
-rw-r--r--lib/find-syms.c22
-rw-r--r--lib/interpose.c188
-rw-r--r--lib/substitute-internal.h20
-rw-r--r--lib/substitute.h112
8 files changed, 317 insertions, 33 deletions
diff --git a/Makefile b/Makefile
index d97b130..7d29f12 100644
--- a/Makefile
+++ b/Makefile
@@ -32,6 +32,7 @@ out/jump-dis-arm-multi.o: generated/generic-dis-arm.inc.h generated/generic-dis-
LIB_OBJS := \
out/find-syms.o \
+ out/interpose.o \
out/substrate-compat.o \
out/jump-dis-arm-multi.o
out/libsubstitute.dylib: $(LIB_OBJS)
diff --git a/lib/dis-arm.inc.h b/lib/dis-arm.inc.h
index 4d5f134..b537442 100644
--- a/lib/dis-arm.inc.h
+++ b/lib/dis-arm.inc.h
@@ -19,6 +19,8 @@
LDR: 11111 00 0 U 10 1 1111
*/
+/* TODO: bx lr, and handle conditionals */
+
static inline enum pcrel_load_mode get_arm_load_mode(unsigned op) {
if ((op & 0x7000090) == 0x90) {
return ((op >> 22) & 1) ? PLM_U8 : PLM_U32;
diff --git a/lib/dis-thumb2.inc.h b/lib/dis-thumb2.inc.h
index 30053aa..81b293d 100644
--- a/lib/dis-thumb2.inc.h
+++ b/lib/dis-thumb2.inc.h
@@ -1,5 +1,7 @@
#include "dis.h"
+/* TODO: handle 'it' for conditional br/ret!! */
+
static inline enum pcrel_load_mode get_thumb_load_mode(unsigned op) {
bool sign = (op >> 8) & 1;
switch ((op >> 5) & 3) {
diff --git a/lib/dis.h b/lib/dis.h
index 7e89cfa..5a26d84 100644
--- a/lib/dis.h
+++ b/lib/dis.h
@@ -1,10 +1,11 @@
#pragma once
+#include "substitute-internal.h"
+
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
-#define UNUSED __attribute__((unused))
#define INLINE __attribute__((always_inline))
#define NOINLINE __attribute__((noinline))
diff --git a/lib/find-syms.c b/lib/find-syms.c
index 78421c5..84df9b4 100644
--- a/lib/find-syms.c
+++ b/lib/find-syms.c
@@ -1,9 +1,6 @@
#ifdef __APPLE__
#include <stdbool.h>
-#include <mach-o/loader.h>
-#include <mach-o/dyld.h>
-#include <mach-o/dyld_images.h>
#include <dlfcn.h>
#include <pthread.h>
@@ -16,17 +13,6 @@ static pthread_once_t dyld_inspect_once = PTHREAD_ONCE_INIT;
/* and its fruits: */
static uintptr_t (*ImageLoaderMachO_getSlide)(void *);
static const struct mach_header *(*ImageLoaderMachO_machHeader)(void *);
-#ifdef __LP64__
-typedef struct mach_header_64 mach_header_x;
-typedef struct segment_command_64 segment_command_x;
-typedef struct section_64 section_x;
-#define LC_SEGMENT_X LC_SEGMENT_64
-#else
-typedef struct mach_header mach_header_x;
-typedef struct segment_command segment_command_x;
-typedef struct section section_x;
-#define LC_SEGMENT_X LC_SEGMENT
-#endif
static void *sym_to_ptr(substitute_sym *sym, intptr_t slide) {
uintptr_t addr = sym->n_value;
@@ -109,7 +95,7 @@ static void inspect_dyld() {
intptr_t dyld_slide = -1;
find_syms_raw(dyld_hdr, &dyld_slide, names, syms, 2);
if (!syms[0] || !syms[1])
- panic("couldn't find ImageLoader methods\n");
+ substitute_panic("couldn't find ImageLoader methods\n");
ImageLoaderMachO_getSlide = sym_to_ptr(syms[0], dyld_slide);
ImageLoaderMachO_machHeader = sym_to_ptr(syms[1], dyld_slide);
}
@@ -143,8 +129,8 @@ void substitute_close_image(struct substitute_image *im) {
EXPORT
int substitute_find_private_syms(struct substitute_image *im, const char **names,
- substitute_sym **syms, size_t count) {
- find_syms_raw(im->image_header, &im->slide, names, syms, count);
+ substitute_sym **syms, size_t nsyms) {
+ find_syms_raw(im->image_header, &im->slide, names, syms, nsyms);
return SUBSTITUTE_OK;
}
@@ -153,4 +139,4 @@ void *substitute_sym_to_ptr(struct substitute_image *handle, substitute_sym *sym
return sym_to_ptr(sym, handle->slide);
}
-#endif
+#endif /* __APPLE__ */
diff --git a/lib/interpose.c b/lib/interpose.c
new file mode 100644
index 0000000..cb2b870
--- /dev/null
+++ b/lib/interpose.c
@@ -0,0 +1,188 @@
+#ifdef __APPLE__
+
+
+#include <stdint.h>
+#include <stdbool.h>
+//#include <stdlib.h>
+//#include <stdio.h>
+//#include <string.h>
+
+
+#include "substitute.h"
+#include "substitute-internal.h"
+
+
+enum { MAX_SEGMENTS = 32 };
+struct interpose_state {
+ int nsegments;
+ segment_command_x *segments[MAX_SEGMENTS];
+ uintptr_t slide;
+ const struct substitute_import_hook *hooks;
+ size_t nhooks;
+};
+
+static uintptr_t read_leb128(void **ptr, void *end, bool is_signed) {
+ uintptr_t result = 0;
+ uint8_t *p = *ptr;
+ uint8_t bit;
+ unsigned int shift = 0;
+ do {
+ if (p >= (uint8_t *) end)
+ return 0;
+ bit = *p++;
+ uintptr_t k = bit & 0x7f;
+ if (shift < sizeof(uintptr_t) * 8)
+ result |= k << shift;
+ shift += 7;
+ } while (bit & 0x80);
+ if (is_signed && (bit & 0x40))
+ result |= ~((uintptr_t) 0) << shift;
+ *ptr = p;
+ return result;
+}
+
+static inline char *read_cstring(void **ptr, void *end) {
+ char *s = *ptr;
+ *ptr = s + strnlen(s, (char *) end - s);
+ return s;
+}
+
+
+static int try_bind_section(void *bind, size_t size, const struct interpose_state *st, bool lazy) {
+ void *ptr = bind, *end = bind + size;
+ const char *sym = NULL;
+ uint8_t type = lazy ? BIND_TYPE_POINTER : 0;
+ intptr_t addend = 0;
+ size_t offset = 0;
+ int n = 0;
+ void *segment = NULL;
+ while (ptr < end) {
+ uint8_t byte = *(uint8_t *) ptr;
+ ptr++;
+ uint8_t immediate = byte & BIND_IMMEDIATE_MASK;
+ uint8_t opcode = byte & BIND_OPCODE_MASK;
+
+ uintptr_t count, stride;
+
+ switch(opcode) {
+ case BIND_OPCODE_DONE:
+ case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
+ case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
+ break;
+ case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
+ read_leb128(&ptr, end, false);
+ break;
+ case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
+ sym = read_cstring(&ptr, end);
+ /* ignoring flags for now */
+ break;
+ case BIND_OPCODE_SET_TYPE_IMM:
+ type = immediate;
+ break;
+ case BIND_OPCODE_SET_ADDEND_SLEB:
+ addend = read_leb128(&ptr, end, true);
+ break;
+ case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
+ if (immediate < st->nsegments)
+ segment = (void *) (st->segments[immediate]->vmaddr + st->slide);
+ offset = read_leb128(&ptr, end, false);
+ break;
+ case BIND_OPCODE_ADD_ADDR_ULEB:
+ offset += read_leb128(&ptr, end, false);
+ break;
+ case BIND_OPCODE_DO_BIND:
+ count = 1;
+ stride = sizeof(void *);
+ goto bind;
+ case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
+ count = 1;
+ stride = read_leb128(&ptr, end, false) + sizeof(void *);
+ goto bind;
+ case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
+ count = 1;
+ stride = immediate * sizeof(void *) + sizeof(void *);
+ goto bind;
+ case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
+ count = read_leb128(&ptr, end, false);
+ stride = read_leb128(&ptr, end, false) + sizeof(void *);
+ goto bind;
+ bind:
+ if (segment && sym) {
+ const struct substitute_import_hook *h;
+ size_t i;
+ for (i = 0; i < st->nhooks; i++) {
+ h = &st->hooks[i];
+ // TODO abs/pcrel32? used on arm?
+ if (!strcmp(sym, h->name)) {
+ if (type != BIND_TYPE_POINTER)
+ return SUBSTITUTE_ERR_UNKNOWN_RELOCATION_TYPE;
+ break;
+ }
+ }
+ if (i != st->nhooks) {
+ while (count--) {
+ uintptr_t new = (uintptr_t) h->replacement + addend;
+ uintptr_t *p = (void *) (segment + offset);
+ uintptr_t old = __atomic_exchange_n(p, new, __ATOMIC_RELAXED);
+ if (h->old_ptr)
+ *(void **) h->old_ptr = (void *) (old - addend);
+ offset += stride;
+ }
+ break;
+ }
+ }
+ offset += count * stride;
+ break;
+ }
+ }
+ return n;
+}
+
+static void *off_to_addr(const struct interpose_state *st, uint32_t off) {
+ for (int i = 0; i < st->nsegments; i++) {
+ const segment_command_x *sc = st->segments[i];
+ if ((off - sc->fileoff) < sc->filesize)
+ return (void *) (sc->vmaddr + st->slide + off - sc->fileoff);
+ }
+ return NULL;
+}
+
+EXPORT
+int substitute_interpose_imports(const struct substitute_image *image,
+ const struct substitute_import_hook *hooks,
+ size_t nhooks, UNUSED int options) {
+ struct interpose_state st;
+ st.slide = image->slide;
+ st.nsegments = 0;
+ st.hooks = hooks;
+ st.nhooks = nhooks;
+
+ const mach_header_x *mh = image->image_header;
+ const struct load_command *lc = (void *) (mh + 1);
+ for (uint32_t i = 0; i < mh->ncmds; i++) {
+ if (lc->cmd == LC_SEGMENT_X) {
+ segment_command_x *sc = (void *) lc;
+ if (st.nsegments < MAX_SEGMENTS)
+ st.segments[st.nsegments++] = sc;
+ }
+ lc = (void *) lc + lc->cmdsize;
+ }
+
+ lc = (void *) (mh + 1);
+ for (uint32_t i = 0; i < mh->ncmds; i++) {
+ if (lc->cmd == LC_DYLD_INFO || lc->cmd == LC_DYLD_INFO_ONLY) {
+ struct dyld_info_command *dc = (void *) lc;
+ int ret;
+ if ((ret = try_bind_section(off_to_addr(&st, dc->bind_off), dc->bind_size, &st, false)) ||
+ (ret = try_bind_section(off_to_addr(&st, dc->weak_bind_off), dc->weak_bind_size, &st, false)) ||
+ (ret = try_bind_section(off_to_addr(&st, dc->lazy_bind_off), dc->lazy_bind_size, &st, true)))
+ return ret;
+
+ break;
+ }
+ lc = (void *) lc + lc->cmdsize;
+ }
+ return SUBSTITUTE_OK;
+}
+
+#endif /* __APPLE__ */
diff --git a/lib/substitute-internal.h b/lib/substitute-internal.h
index cbb7462..fb64714 100644
--- a/lib/substitute-internal.h
+++ b/lib/substitute-internal.h
@@ -1,10 +1,28 @@
#pragma once
#include <stdio.h>
-#define panic(...) do { \
+#define substitute_panic(...) do { \
fprintf(stderr, __VA_ARGS__); \
abort(); \
__builtin_unreachable(); \
} while(0)
#define EXPORT __attribute__ ((visibility("default")))
+#define UNUSED __attribute__((unused))
+
+#ifdef __APPLE__
+#include <mach-o/loader.h>
+#include <mach-o/dyld.h>
+#include <mach-o/dyld_images.h>
+#ifdef __LP64__
+typedef struct mach_header_64 mach_header_x;
+typedef struct segment_command_64 segment_command_x;
+typedef struct section_64 section_x;
+#define LC_SEGMENT_X LC_SEGMENT_64
+#else
+typedef struct mach_header mach_header_x;
+typedef struct segment_command segment_command_x;
+typedef struct section section_x;
+#define LC_SEGMENT_X LC_SEGMENT
+#endif
+#endif
diff --git a/lib/substitute.h b/lib/substitute.h
index 444c5df..11141bf 100644
--- a/lib/substitute.h
+++ b/lib/substitute.h
@@ -1,7 +1,7 @@
/*
libsubstitute - https://github.com/comex/substitute
- This header file is in the public domain (or in any jusrisdiction where the
- former is ineffective, CC0 1.0).
+ This header file itself is in the public domain (or in any jusrisdiction
+ where the former is ineffective, CC0 1.0).
*/
#pragma once
@@ -13,14 +13,50 @@
extern "C" {
#endif
-/* TODO add numbers */
+/* Error codes */
enum {
+ /* TODO add numbers */
SUBSTITUTE_OK = 0,
+
+ /* substitute_hook_functions: can't patch a function because it's too short-
+ * i.e. there's an unconditional return instruction inside the patch region
+ * (and not at its end) */
+ SUBSTITUTE_ERR_FUNC_TOO_SHORT,
+
+ /* substitute_hook_functions: can't patch a function because one of the
+ * instructions within the patch region is one of a few special problematic
+ * cases - if you get this on real code, the library should probably be
+ * updated to handle that case properly */
+ SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START,
+
+ /* substitute_hook_functions: can't patch a function because the (somewhat
+ * cursory) jump analysis found a jump later in the function to within the
+ * patch region at the beginning */
+ SUBSTITUTE_ERR_FUNC_JUMPS_TO_START,
+
+ /* mmap or mprotect failure other than ENOMEM (preserved in errno on return
+ * from the substitute_* function). Most likely to come up with
+ * substitute_hook_functions, if the kernel is preventing pages from being
+ * marked executable. */
+ SUBSTITUTE_ERR_VM,
+
+ /* substitute_interpose_imports: couldn't redo relocation for an import
+ * because the type was unknown */
+ SUBSTITUTE_ERR_UNKNOWN_RELOCATION_TYPE,
};
-int substitute_hook_function(void *function, void *replacement, int options, void *result);
+struct substitute_function_hook {
+ void *function;
+ void *replacement;
+ void *old_ptr; /* optional: out pointer to function pointer to call old impl */
+};
+
+/* TODO doc */
+int substitute_hook_functions(const struct substitute_function_hook *hooks,
+ size_t nhooks,
+ int options);
-#if 1 /* declare substitute_find_syms? */
+#if 1 /* declare dynamic linker-related stuff? */
#ifdef __APPLE__
#include <mach-o/nlist.h>
@@ -45,7 +81,7 @@ struct substitute_image {
/* Look up an image currently loaded into the process.
*
* @filename the executable/library path (c.f. dyld(3) on Darwin)
- * @return a handle, or NULL if the image wasn't found
+ * @return a handle, or NULL if the image wasn't found
*/
struct substitute_image *substitute_open_image(const char *filename);
@@ -58,27 +94,77 @@ void substitute_close_image(struct substitute_image *handle);
/* Look up private symbols in an image currently loaded into the process.
*
* @handle handle opened with substitute_open_image
- * @names an array of symbol names to search for
- * @nlist an array of substitute_sym *, one per name; on return, each entry
- * will be a pointer into the symbol table for that image, or NULL if the
- * symbol wasn't found
- * @count number of names
+ * @names an array of symbol names to search for
+ * @syms an array of substitute_sym *, one per name; on return, each entry
+ * will be a pointer into the symbol table for that image, or NULL if
+ * the symbol wasn't found
+ * @nsyms number of names
*
* @return SUBSTITUTE_OK (maybe errors in the future)
*/
int substitute_find_private_syms(struct substitute_image *handle,
const char **names,
substitute_sym **syms,
- size_t count);
+ size_t nsyms);
/* Get a pointer corresponding to a loaded symbol table entry.
* @handle handle containing the symbol
- * @sym symbol
+ * @sym symbol
* @return the pointer - on ARM, this can be | 1 for Thumb, like everything
* else
*/
void *substitute_sym_to_ptr(struct substitute_image *handle, substitute_sym *sym);
+struct substitute_import_hook {
+ /* The symbol name - this is raw, so C++ symbols are mangled, and on OS X
+ * most symbols have '_' prepended. */
+ const char *name;
+ /* The new import address. */
+ void *replacement;
+ /* Optional: out pointer to old value. if there are multiple imports for
+ * the same symbol, only one address is returned (hopefully they are all
+ * equal) */
+ void *old_ptr;
+};
+
+/* Directly modify the GOT/PLT entries from a specified image corresponding to
+ * specified symbols.
+ *
+ * This can be used to 'hook' functions or even exported variables. Compared
+ * to substitute_hook_functions, it has the following advantages:
+ *
+ * - Because it does not require the ability to patch executable code;
+ * accordingly, it can (from a technical rather than policy perspective) be
+ * used in sandboxed environments like iOS or PaX MPROTECT.
+ * - On platforms without RELRO or similar, it is thread safe, as the patches
+ * are done using atomic instructions.
+ * - It does not require architecture specific code.
+ * - It can be used to modify a single library's view of the world without
+ * affecting the rest of the program.
+ *
+ * ...and the following disadvantages:
+ *
+ * - It only works for exported functions, and even then will not catch calls
+ * from a library to its own exported functions.
+ * - At present, it *only* works for a single importing library at a time.
+ * Although it is not difficult on most platforms to iterate loaded libraries
+ * in order to hook all of them, substitute does not currently provide this
+ * functionality, traversing all libraries' symbol tables may be slow, and in
+ * any case there is the matter of new importers being loaded after the fact.
+ *
+ * @handle handle of the importing library
+ * @hooks see struct substitute_import_hook
+ * @nhooks number of hooks
+ * @options options - pass 0.
+ * @return SUBSTITUTE_OK
+ * SUBSTITUTE_ERR_UNKNOWN_RELOCATION_TYPE
+ * SUBSTITUTE_ERR_VM - in the future with RELRO on Linux
+ */
+int substitute_interpose_imports(const struct substitute_image *handle,
+ const struct substitute_import_hook *hooks,
+ size_t nhooks, int options);
+
+
#endif /* 1 */
#ifdef __cplusplus