diff options
author | comex | 2015-01-16 04:22:58 -0500 |
---|---|---|
committer | comex | 2015-01-16 04:33:39 -0500 |
commit | d2963f46218c6dc73c6207a1bdcf7042b0af3418 (patch) | |
tree | 61e55419011a865ced995bbb2ba67f9998978b4b /lib | |
parent | jump dis - seemingly working(!) (diff) | |
download | substitute-d2963f46218c6dc73c6207a1bdcf7042b0af3418.tar.gz |
interpose (based on %c based on data) compiles...
Diffstat (limited to 'lib')
-rw-r--r-- | lib/dis-arm.inc.h | 2 | ||||
-rw-r--r-- | lib/dis-thumb2.inc.h | 2 | ||||
-rw-r--r-- | lib/dis.h | 3 | ||||
-rw-r--r-- | lib/find-syms.c | 22 | ||||
-rw-r--r-- | lib/interpose.c | 188 | ||||
-rw-r--r-- | lib/substitute-internal.h | 20 | ||||
-rw-r--r-- | lib/substitute.h | 112 |
7 files changed, 316 insertions, 33 deletions
diff --git a/lib/dis-arm.inc.h b/lib/dis-arm.inc.h index 4d5f134..b537442 100644 --- a/lib/dis-arm.inc.h +++ b/lib/dis-arm.inc.h @@ -19,6 +19,8 @@ LDR: 11111 00 0 U 10 1 1111 */ +/* TODO: bx lr, and handle conditionals */ + static inline enum pcrel_load_mode get_arm_load_mode(unsigned op) { if ((op & 0x7000090) == 0x90) { return ((op >> 22) & 1) ? PLM_U8 : PLM_U32; diff --git a/lib/dis-thumb2.inc.h b/lib/dis-thumb2.inc.h index 30053aa..81b293d 100644 --- a/lib/dis-thumb2.inc.h +++ b/lib/dis-thumb2.inc.h @@ -1,5 +1,7 @@ #include "dis.h" +/* TODO: handle 'it' for conditional br/ret!! */ + static inline enum pcrel_load_mode get_thumb_load_mode(unsigned op) { bool sign = (op >> 8) & 1; switch ((op >> 5) & 3) { @@ -1,10 +1,11 @@ #pragma once +#include "substitute-internal.h" + #include <stdbool.h> #include <stdint.h> #include <stdlib.h> -#define UNUSED __attribute__((unused)) #define INLINE __attribute__((always_inline)) #define NOINLINE __attribute__((noinline)) diff --git a/lib/find-syms.c b/lib/find-syms.c index 78421c5..84df9b4 100644 --- a/lib/find-syms.c +++ b/lib/find-syms.c @@ -1,9 +1,6 @@ #ifdef __APPLE__ #include <stdbool.h> -#include <mach-o/loader.h> -#include <mach-o/dyld.h> -#include <mach-o/dyld_images.h> #include <dlfcn.h> #include <pthread.h> @@ -16,17 +13,6 @@ static pthread_once_t dyld_inspect_once = PTHREAD_ONCE_INIT; /* and its fruits: */ static uintptr_t (*ImageLoaderMachO_getSlide)(void *); static const struct mach_header *(*ImageLoaderMachO_machHeader)(void *); -#ifdef __LP64__ -typedef struct mach_header_64 mach_header_x; -typedef struct segment_command_64 segment_command_x; -typedef struct section_64 section_x; -#define LC_SEGMENT_X LC_SEGMENT_64 -#else -typedef struct mach_header mach_header_x; -typedef struct segment_command segment_command_x; -typedef struct section section_x; -#define LC_SEGMENT_X LC_SEGMENT -#endif static void *sym_to_ptr(substitute_sym *sym, intptr_t slide) { uintptr_t addr = sym->n_value; @@ -109,7 +95,7 @@ static void inspect_dyld() { intptr_t dyld_slide = -1; find_syms_raw(dyld_hdr, &dyld_slide, names, syms, 2); if (!syms[0] || !syms[1]) - panic("couldn't find ImageLoader methods\n"); + substitute_panic("couldn't find ImageLoader methods\n"); ImageLoaderMachO_getSlide = sym_to_ptr(syms[0], dyld_slide); ImageLoaderMachO_machHeader = sym_to_ptr(syms[1], dyld_slide); } @@ -143,8 +129,8 @@ void substitute_close_image(struct substitute_image *im) { EXPORT int substitute_find_private_syms(struct substitute_image *im, const char **names, - substitute_sym **syms, size_t count) { - find_syms_raw(im->image_header, &im->slide, names, syms, count); + substitute_sym **syms, size_t nsyms) { + find_syms_raw(im->image_header, &im->slide, names, syms, nsyms); return SUBSTITUTE_OK; } @@ -153,4 +139,4 @@ void *substitute_sym_to_ptr(struct substitute_image *handle, substitute_sym *sym return sym_to_ptr(sym, handle->slide); } -#endif +#endif /* __APPLE__ */ diff --git a/lib/interpose.c b/lib/interpose.c new file mode 100644 index 0000000..cb2b870 --- /dev/null +++ b/lib/interpose.c @@ -0,0 +1,188 @@ +#ifdef __APPLE__ + + +#include <stdint.h> +#include <stdbool.h> +//#include <stdlib.h> +//#include <stdio.h> +//#include <string.h> + + +#include "substitute.h" +#include "substitute-internal.h" + + +enum { MAX_SEGMENTS = 32 }; +struct interpose_state { + int nsegments; + segment_command_x *segments[MAX_SEGMENTS]; + uintptr_t slide; + const struct substitute_import_hook *hooks; + size_t nhooks; +}; + +static uintptr_t read_leb128(void **ptr, void *end, bool is_signed) { + uintptr_t result = 0; + uint8_t *p = *ptr; + uint8_t bit; + unsigned int shift = 0; + do { + if (p >= (uint8_t *) end) + return 0; + bit = *p++; + uintptr_t k = bit & 0x7f; + if (shift < sizeof(uintptr_t) * 8) + result |= k << shift; + shift += 7; + } while (bit & 0x80); + if (is_signed && (bit & 0x40)) + result |= ~((uintptr_t) 0) << shift; + *ptr = p; + return result; +} + +static inline char *read_cstring(void **ptr, void *end) { + char *s = *ptr; + *ptr = s + strnlen(s, (char *) end - s); + return s; +} + + +static int try_bind_section(void *bind, size_t size, const struct interpose_state *st, bool lazy) { + void *ptr = bind, *end = bind + size; + const char *sym = NULL; + uint8_t type = lazy ? BIND_TYPE_POINTER : 0; + intptr_t addend = 0; + size_t offset = 0; + int n = 0; + void *segment = NULL; + while (ptr < end) { + uint8_t byte = *(uint8_t *) ptr; + ptr++; + uint8_t immediate = byte & BIND_IMMEDIATE_MASK; + uint8_t opcode = byte & BIND_OPCODE_MASK; + + uintptr_t count, stride; + + switch(opcode) { + case BIND_OPCODE_DONE: + case BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: + case BIND_OPCODE_SET_DYLIB_SPECIAL_IMM: + break; + case BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: + read_leb128(&ptr, end, false); + break; + case BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM: + sym = read_cstring(&ptr, end); + /* ignoring flags for now */ + break; + case BIND_OPCODE_SET_TYPE_IMM: + type = immediate; + break; + case BIND_OPCODE_SET_ADDEND_SLEB: + addend = read_leb128(&ptr, end, true); + break; + case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + if (immediate < st->nsegments) + segment = (void *) (st->segments[immediate]->vmaddr + st->slide); + offset = read_leb128(&ptr, end, false); + break; + case BIND_OPCODE_ADD_ADDR_ULEB: + offset += read_leb128(&ptr, end, false); + break; + case BIND_OPCODE_DO_BIND: + count = 1; + stride = sizeof(void *); + goto bind; + case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: + count = 1; + stride = read_leb128(&ptr, end, false) + sizeof(void *); + goto bind; + case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: + count = 1; + stride = immediate * sizeof(void *) + sizeof(void *); + goto bind; + case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: + count = read_leb128(&ptr, end, false); + stride = read_leb128(&ptr, end, false) + sizeof(void *); + goto bind; + bind: + if (segment && sym) { + const struct substitute_import_hook *h; + size_t i; + for (i = 0; i < st->nhooks; i++) { + h = &st->hooks[i]; + // TODO abs/pcrel32? used on arm? + if (!strcmp(sym, h->name)) { + if (type != BIND_TYPE_POINTER) + return SUBSTITUTE_ERR_UNKNOWN_RELOCATION_TYPE; + break; + } + } + if (i != st->nhooks) { + while (count--) { + uintptr_t new = (uintptr_t) h->replacement + addend; + uintptr_t *p = (void *) (segment + offset); + uintptr_t old = __atomic_exchange_n(p, new, __ATOMIC_RELAXED); + if (h->old_ptr) + *(void **) h->old_ptr = (void *) (old - addend); + offset += stride; + } + break; + } + } + offset += count * stride; + break; + } + } + return n; +} + +static void *off_to_addr(const struct interpose_state *st, uint32_t off) { + for (int i = 0; i < st->nsegments; i++) { + const segment_command_x *sc = st->segments[i]; + if ((off - sc->fileoff) < sc->filesize) + return (void *) (sc->vmaddr + st->slide + off - sc->fileoff); + } + return NULL; +} + +EXPORT +int substitute_interpose_imports(const struct substitute_image *image, + const struct substitute_import_hook *hooks, + size_t nhooks, UNUSED int options) { + struct interpose_state st; + st.slide = image->slide; + st.nsegments = 0; + st.hooks = hooks; + st.nhooks = nhooks; + + const mach_header_x *mh = image->image_header; + const struct load_command *lc = (void *) (mh + 1); + for (uint32_t i = 0; i < mh->ncmds; i++) { + if (lc->cmd == LC_SEGMENT_X) { + segment_command_x *sc = (void *) lc; + if (st.nsegments < MAX_SEGMENTS) + st.segments[st.nsegments++] = sc; + } + lc = (void *) lc + lc->cmdsize; + } + + lc = (void *) (mh + 1); + for (uint32_t i = 0; i < mh->ncmds; i++) { + if (lc->cmd == LC_DYLD_INFO || lc->cmd == LC_DYLD_INFO_ONLY) { + struct dyld_info_command *dc = (void *) lc; + int ret; + if ((ret = try_bind_section(off_to_addr(&st, dc->bind_off), dc->bind_size, &st, false)) || + (ret = try_bind_section(off_to_addr(&st, dc->weak_bind_off), dc->weak_bind_size, &st, false)) || + (ret = try_bind_section(off_to_addr(&st, dc->lazy_bind_off), dc->lazy_bind_size, &st, true))) + return ret; + + break; + } + lc = (void *) lc + lc->cmdsize; + } + return SUBSTITUTE_OK; +} + +#endif /* __APPLE__ */ diff --git a/lib/substitute-internal.h b/lib/substitute-internal.h index cbb7462..fb64714 100644 --- a/lib/substitute-internal.h +++ b/lib/substitute-internal.h @@ -1,10 +1,28 @@ #pragma once #include <stdio.h> -#define panic(...) do { \ +#define substitute_panic(...) do { \ fprintf(stderr, __VA_ARGS__); \ abort(); \ __builtin_unreachable(); \ } while(0) #define EXPORT __attribute__ ((visibility("default"))) +#define UNUSED __attribute__((unused)) + +#ifdef __APPLE__ +#include <mach-o/loader.h> +#include <mach-o/dyld.h> +#include <mach-o/dyld_images.h> +#ifdef __LP64__ +typedef struct mach_header_64 mach_header_x; +typedef struct segment_command_64 segment_command_x; +typedef struct section_64 section_x; +#define LC_SEGMENT_X LC_SEGMENT_64 +#else +typedef struct mach_header mach_header_x; +typedef struct segment_command segment_command_x; +typedef struct section section_x; +#define LC_SEGMENT_X LC_SEGMENT +#endif +#endif diff --git a/lib/substitute.h b/lib/substitute.h index 444c5df..11141bf 100644 --- a/lib/substitute.h +++ b/lib/substitute.h @@ -1,7 +1,7 @@ /* libsubstitute - https://github.com/comex/substitute - This header file is in the public domain (or in any jusrisdiction where the - former is ineffective, CC0 1.0). + This header file itself is in the public domain (or in any jusrisdiction + where the former is ineffective, CC0 1.0). */ #pragma once @@ -13,14 +13,50 @@ extern "C" { #endif -/* TODO add numbers */ +/* Error codes */ enum { + /* TODO add numbers */ SUBSTITUTE_OK = 0, + + /* substitute_hook_functions: can't patch a function because it's too short- + * i.e. there's an unconditional return instruction inside the patch region + * (and not at its end) */ + SUBSTITUTE_ERR_FUNC_TOO_SHORT, + + /* substitute_hook_functions: can't patch a function because one of the + * instructions within the patch region is one of a few special problematic + * cases - if you get this on real code, the library should probably be + * updated to handle that case properly */ + SUBSTITUTE_ERR_FUNC_BAD_INSN_AT_START, + + /* substitute_hook_functions: can't patch a function because the (somewhat + * cursory) jump analysis found a jump later in the function to within the + * patch region at the beginning */ + SUBSTITUTE_ERR_FUNC_JUMPS_TO_START, + + /* mmap or mprotect failure other than ENOMEM (preserved in errno on return + * from the substitute_* function). Most likely to come up with + * substitute_hook_functions, if the kernel is preventing pages from being + * marked executable. */ + SUBSTITUTE_ERR_VM, + + /* substitute_interpose_imports: couldn't redo relocation for an import + * because the type was unknown */ + SUBSTITUTE_ERR_UNKNOWN_RELOCATION_TYPE, }; -int substitute_hook_function(void *function, void *replacement, int options, void *result); +struct substitute_function_hook { + void *function; + void *replacement; + void *old_ptr; /* optional: out pointer to function pointer to call old impl */ +}; + +/* TODO doc */ +int substitute_hook_functions(const struct substitute_function_hook *hooks, + size_t nhooks, + int options); -#if 1 /* declare substitute_find_syms? */ +#if 1 /* declare dynamic linker-related stuff? */ #ifdef __APPLE__ #include <mach-o/nlist.h> @@ -45,7 +81,7 @@ struct substitute_image { /* Look up an image currently loaded into the process. * * @filename the executable/library path (c.f. dyld(3) on Darwin) - * @return a handle, or NULL if the image wasn't found + * @return a handle, or NULL if the image wasn't found */ struct substitute_image *substitute_open_image(const char *filename); @@ -58,27 +94,77 @@ void substitute_close_image(struct substitute_image *handle); /* Look up private symbols in an image currently loaded into the process. * * @handle handle opened with substitute_open_image - * @names an array of symbol names to search for - * @nlist an array of substitute_sym *, one per name; on return, each entry - * will be a pointer into the symbol table for that image, or NULL if the - * symbol wasn't found - * @count number of names + * @names an array of symbol names to search for + * @syms an array of substitute_sym *, one per name; on return, each entry + * will be a pointer into the symbol table for that image, or NULL if + * the symbol wasn't found + * @nsyms number of names * * @return SUBSTITUTE_OK (maybe errors in the future) */ int substitute_find_private_syms(struct substitute_image *handle, const char **names, substitute_sym **syms, - size_t count); + size_t nsyms); /* Get a pointer corresponding to a loaded symbol table entry. * @handle handle containing the symbol - * @sym symbol + * @sym symbol * @return the pointer - on ARM, this can be | 1 for Thumb, like everything * else */ void *substitute_sym_to_ptr(struct substitute_image *handle, substitute_sym *sym); +struct substitute_import_hook { + /* The symbol name - this is raw, so C++ symbols are mangled, and on OS X + * most symbols have '_' prepended. */ + const char *name; + /* The new import address. */ + void *replacement; + /* Optional: out pointer to old value. if there are multiple imports for + * the same symbol, only one address is returned (hopefully they are all + * equal) */ + void *old_ptr; +}; + +/* Directly modify the GOT/PLT entries from a specified image corresponding to + * specified symbols. + * + * This can be used to 'hook' functions or even exported variables. Compared + * to substitute_hook_functions, it has the following advantages: + * + * - Because it does not require the ability to patch executable code; + * accordingly, it can (from a technical rather than policy perspective) be + * used in sandboxed environments like iOS or PaX MPROTECT. + * - On platforms without RELRO or similar, it is thread safe, as the patches + * are done using atomic instructions. + * - It does not require architecture specific code. + * - It can be used to modify a single library's view of the world without + * affecting the rest of the program. + * + * ...and the following disadvantages: + * + * - It only works for exported functions, and even then will not catch calls + * from a library to its own exported functions. + * - At present, it *only* works for a single importing library at a time. + * Although it is not difficult on most platforms to iterate loaded libraries + * in order to hook all of them, substitute does not currently provide this + * functionality, traversing all libraries' symbol tables may be slow, and in + * any case there is the matter of new importers being loaded after the fact. + * + * @handle handle of the importing library + * @hooks see struct substitute_import_hook + * @nhooks number of hooks + * @options options - pass 0. + * @return SUBSTITUTE_OK + * SUBSTITUTE_ERR_UNKNOWN_RELOCATION_TYPE + * SUBSTITUTE_ERR_VM - in the future with RELRO on Linux + */ +int substitute_interpose_imports(const struct substitute_image *handle, + const struct substitute_import_hook *hooks, + size_t nhooks, int options); + + #endif /* 1 */ #ifdef __cplusplus |