lib/darwin/execmem.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435

/* define to avoid error that ucontext is "deprecated" (it's unavoidable with
 * sigaction!) */
#define _XOPEN_SOURCE 700
#define _DARWIN_C_SOURCE
#include "cbit/htab.h"
#include "execmem.h"
/* #include "darwin/manual-syscall.h" */
#include "darwin/mach-decls.h"
#include "substitute.h"
#include "substitute-internal.h"
#include <mach/mach.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <ucontext.h>
#include <signal.h>
#include <pthread.h>

#define port_hash(portp) (*(portp))
#define port_eq(port1p, port2p) (*(port1p) == *(port2p))
#define port_null(portp) (*(portp) == MACH_PORT_NULL)
DECL_STATIC_HTAB_KEY(mach_port_t, mach_port_t, port_hash, port_eq, port_null, 0);
struct empty {};
DECL_HTAB(mach_port_set, mach_port_t, struct empty);

    /* ORPHAN: We do the syscall manually just in case the user is trying to write to
     * the mprotect syscall stub itself, or one of the functions it calls.
     * (Obviously, it will still break if the user targets some libsubstitute
     * function within the same page as this one, though.) */

/* This should only run on the main thread, so just use globals. */
static HTAB_STORAGE(mach_port_set) g_suspended_ports;
static struct sigaction old_segv, old_bus;
static execmem_pc_patch_callback g_pc_patch_callback;
static void *g_pc_patch_callback_ctx;

int execmem_alloc_unsealed(uintptr_t hint, void **page_p, size_t *size_p) {
    *size_p = PAGE_SIZE;
    *page_p = mmap((void *) hint, *size_p, PROT_READ | PROT_WRITE,
                   MAP_ANON | MAP_SHARED, -1, 0);
    if (*page_p == MAP_FAILED)
        return SUBSTITUTE_ERR_VM;
    return SUBSTITUTE_OK;
}

int execmem_seal(void *page) {
    if (mprotect(page, PAGE_SIZE, PROT_READ | PROT_EXEC))
        return SUBSTITUTE_ERR_VM;
    return SUBSTITUTE_OK;
}

void execmem_free(void *page) {
    munmap(page, PAGE_SIZE);
}

#if defined(__x86_64__)
    typedef struct __darwin_x86_thread_state64 native_thread_state;
    #define NATIVE_THREAD_STATE_FLAVOR x86_THREAD_STATE64
#elif defined(__i386__)
    typedef struct __darwin_i386_thread_state native_thread_state;
    #define NATIVE_THREAD_STATE_FLAVOR x86_THREAD_STATE32
#elif defined(__arm__)
    typedef struct __darwin_arm_thread_state native_thread_state;
    #define NATIVE_THREAD_STATE_FLAVOR ARM_THREAD_STATE
#elif defined(__arm64__)
    typedef struct __darwin_arm_thread_state64 native_thread_state;
    #define NATIVE_THREAD_STATE_FLAVOR ARM_THREAD_STATE64
#else
    #error ?
#endif

/* returns whether it changed */
static bool apply_one_pcp_with_state(native_thread_state *state,
                                     execmem_pc_patch_callback callback,
                                     void *ctx) {

    uintptr_t *pcp;
#if defined(__x86_64__)
    pcp = (uintptr_t *) &state->__rip;
#elif defined(__i386__)
    pcp = (uintptr_t *) &state->__eip;
#elif defined(__arm__) || defined(__arm64__)
    pcp = (uintptr_t *) &state->__pc;
#endif
    uintptr_t old = *pcp;
#ifdef __arm__
    /* thumb */
    if (state.cpsr & 0x20)
        old |= 1;
#endif
    uintptr_t new = callback(ctx, *pcp);
    bool changed = new != old;
    *pcp = new;
#ifdef __arm__
    *pcp &= ~1;
    state.cpsr = (state.cpsr & ~0x20) | ((new & 1) * 0x20);
#endif
    return changed;
}

static int apply_one_pcp(mach_port_t thread, execmem_pc_patch_callback callback,
                         void *ctx) {
    native_thread_state state;
    mach_msg_type_number_t real_cnt = sizeof(state) / sizeof(int);
    mach_msg_type_number_t cnt = real_cnt;
    kern_return_t kr = thread_get_state(thread, NATIVE_THREAD_STATE_FLAVOR,
                                        (thread_state_t) &state, &cnt);
    if (kr == KERN_TERMINATED)
        return SUBSTITUTE_OK;
    if (kr || cnt != real_cnt)
        return SUBSTITUTE_ERR_ADJUSTING_THREADS;;

    if (apply_one_pcp_with_state(&state, callback, ctx)) {
        kr = thread_set_state(thread, NATIVE_THREAD_STATE_FLAVOR,
                              (thread_state_t) &state, real_cnt);
        if (kr)
            return SUBSTITUTE_ERR_ADJUSTING_THREADS;
    }
    return SUBSTITUTE_OK;
}

static void resume_other_threads();

static int stop_other_threads() {
    /* pthread_main should have already been checked. */

    int ret;
    mach_port_t self = mach_thread_self();

    /* The following shenanigans are for catching any new threads that are
     * created while we're looping, without suspending anything twice.  Keep
     * looping until only threads we already suspended before this loop are
     * there. */
    HTAB_STORAGE_INIT(&g_suspended_ports, mach_port_set);
    struct htab_mach_port_set *suspended_set = &g_suspended_ports.h;

    bool got_new;
    do {
        got_new = false;

        thread_act_port_array_t ports;
        mach_msg_type_number_t nports;

        kern_return_t kr = task_threads(mach_task_self(), &ports, &nports);
        if (kr) { /* ouch */
            ret = SUBSTITUTE_ERR_ADJUSTING_THREADS;
            goto fail;
        }

        for (mach_msg_type_number_t i = 0; i < nports; i++) {
            mach_port_t port = ports[i];
            struct htab_bucket_mach_port_set *bucket;
            if (port == self ||
                (bucket = htab_setbucket_mach_port_set(suspended_set, &port),
                 bucket->key)) {
                /* already suspended, ignore */
                mach_port_deallocate(mach_task_self(), port);
            } else {
                got_new = true;
                kr = thread_suspend(port);
                if (kr == KERN_TERMINATED) {
                    /* too late */
                    mach_port_deallocate(mach_task_self(), port);
                } else if (kr) {
                    ret = SUBSTITUTE_ERR_ADJUSTING_THREADS;
                    for (; i < nports; i++)
                        mach_port_deallocate(mach_task_self(), ports[i]);
                    vm_deallocate(mach_task_self(), (vm_address_t) ports,
                                  nports * sizeof(*ports));
                    goto fail;
                }
                bucket->key = port;
            }
        }
        vm_deallocate(mach_task_self(), (vm_address_t) ports,
                      nports * sizeof(*ports));
    } while(got_new);

    /* Success - keep the set around for when we're done. */
    return SUBSTITUTE_OK;

fail:
    resume_other_threads();
    return ret;
}

static void resume_other_threads() {
    struct htab_mach_port_set *suspended_set = &g_suspended_ports.h;
    HTAB_FOREACH(suspended_set, mach_port_t *threadp,
                 UNUSED struct empty *_,
                 mach_port_set) {
        thread_resume(*threadp);
        mach_port_deallocate(mach_task_self(), *threadp);
    }
    htab_free_storage_mach_port_set(suspended_set);
}

static void segfault_handler(UNUSED int sig, UNUSED siginfo_t *info,
                             void *uap_) {
    if (pthread_main_np()) {
        /* The patcher itself segfaulted.  Oops.  Reset the signal so the
         * process exits rather than going into an infinite loop. */
        signal(sig, SIG_DFL);
        return;
    }
    /* We didn't catch it before it segfaulted so have to fix it up here. */
    ucontext_t *uap = uap_;
    apply_one_pcp_with_state(&uap->uc_mcontext->__ss, g_pc_patch_callback,
                             g_pc_patch_callback_ctx);
    /* just let it continue, whatever */
}

static int init_pc_patch(execmem_pc_patch_callback callback, void *ctx) {
    g_pc_patch_callback = callback;
    g_pc_patch_callback_ctx = ctx;
    int ret;
    if ((ret = stop_other_threads()))
        return ret;

    struct sigaction sa;
    memset(&sa, 0, sizeof(sa));
    sa.sa_sigaction = segfault_handler;
    sigfillset(&sa.sa_mask);
    sa.sa_flags = SA_RESTART | SA_NODEFER | SA_SIGINFO;

    if (sigaction(SIGSEGV, &sa, &old_segv))
        return SUBSTITUTE_ERR_ADJUSTING_THREADS;
    if (sigaction(SIGBUS, &sa, &old_bus)) {
        sigaction(SIGSEGV, &old_segv, NULL);
        return SUBSTITUTE_ERR_ADJUSTING_THREADS;
    }
    return SUBSTITUTE_OK;
}

static int run_pc_patch() {
    int ret;

    struct htab_mach_port_set *suspended_set = &g_suspended_ports.h;
    HTAB_FOREACH(suspended_set, mach_port_t *threadp,
                 UNUSED struct empty *_,
                 mach_port_set) {
        if ((ret = apply_one_pcp(*threadp, g_pc_patch_callback,
                                 g_pc_patch_callback_ctx)))
            return ret;
    }

    return SUBSTITUTE_OK;
}

static int finish_pc_patch() {
    if (sigaction(SIGBUS, &old_bus, NULL) ||
        sigaction(SIGSEGV, &old_segv, NULL))
        return SUBSTITUTE_ERR_ADJUSTING_THREADS;

    resume_other_threads();
    return SUBSTITUTE_OK;
}

static int compare_dsts(const void *a, const void *b) {
    void *dst_a = ((struct execmem_foreign_write *) a)->dst;
    void *dst_b = ((struct execmem_foreign_write *) b)->dst;
    return dst_a < dst_b ? -1 : dst_a > dst_b ? 1 : 0;
}

static kern_return_t get_page_prot(uintptr_t ptr, vm_prot_t *prot,
                                   vm_inherit_t *inherit) {

    vm_address_t region = (vm_address_t) ptr;
    vm_size_t region_len = 0;
    struct vm_region_submap_short_info_64 info;
    mach_msg_type_number_t info_count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
    natural_t max_depth = 99999;
    kern_return_t kr = vm_region_recurse_64(mach_task_self(), &region, &region_len,
                                            &max_depth,
                                            (vm_region_recurse_info_t) &info,
                                            &info_count);
    *prot = info.protection & (PROT_READ | PROT_WRITE | PROT_EXEC);
    *inherit = info.inheritance;
    return kr;
}

static void manual_memcpy(void *restrict dest, const void *src, size_t len) {
    /* volatile to avoid compiler transformation to call to memcpy */
    volatile uint8_t *d8 = dest;
    const uint8_t *s8 = src;
    while (len--)
        *d8++ = *s8++;
}

int execmem_foreign_write_with_pc_patch(struct execmem_foreign_write *writes,
                                        size_t nwrites,
                                        execmem_pc_patch_callback callback,
                                        void *callback_ctx) {
    int ret;

    qsort(writes, nwrites, sizeof(*writes), compare_dsts);

    if (callback) {
        /* Set the segfault handler - stopping all other threads before
         * doing so in case they were using it for something (this
         * happens).  One might think the latter makes segfaults
         * impossible, but we can't prevent injectors from making new
         * threads that might run during this process.  Hopefully no
         * *injected* threads try to use segfault handlers for something!
         */
        if ((ret = init_pc_patch(callback, callback_ctx)))
            return ret;
    }

    size_t last;
    for (size_t first = 0; first < nwrites; first = last + 1) {
        const struct execmem_foreign_write *first_write = &writes[first];
        uintptr_t page_start = (uintptr_t) first_write->dst & ~PAGE_MASK;
        uintptr_t page_end = ((uintptr_t) first_write->dst +
                              first_write->len - 1) & ~PAGE_MASK;

        last = first;
        while (last + 1 < nwrites) {
            const struct execmem_foreign_write *write = &writes[last + 1];
            uintptr_t this_start = (uintptr_t) write->dst & ~PAGE_MASK;
            uintptr_t this_end = ((uintptr_t) write->dst +
                                  first_write->len - 1) & ~PAGE_MASK;
            if (page_start <= this_start && this_start <= page_end) {
                if (this_end > page_end)
                    page_end = this_end;
            } else if (page_start <= this_end && this_end <= page_end) {
                if (this_start < page_start)
                    page_start = this_start;
            } else {
                break;
            }
            last++;
        }
        size_t len = page_end - page_start + PAGE_SIZE;

        vm_prot_t prot;
        vm_inherit_t inherit;
        /* Assume that a single patch region will be pages of all the same
         * protection, since the alternative is probably someone doing
         * something wrong. */
        kern_return_t kr = get_page_prot(page_start, &prot, &inherit);
        if (kr) {
            /* Weird; this probably means the region doesn't exist, but we should
             * have already read from the memory in order to generate the patch. */
            ret = SUBSTITUTE_ERR_VM;
            goto fail;
        }
        /* Instead of trying to set the existing region to write, which may
         * fail due to max_protection, we make a fresh copy and remap it over
         * the original. */
        void *new = mmap(NULL, len, PROT_READ | PROT_WRITE,
                         MAP_ANON | MAP_SHARED, -1, 0);
        if (new == MAP_FAILED) {
            ret = SUBSTITUTE_ERR_VM;
            goto fail;
        }
        /* Ideally, if the original page wasn't mapped anywhere else, no actual
         * copy will take place: new will be CoW, then we unmap the original so
         * new becomes the sole owner before actually writing.  Though, for all
         * I know, these trips through the VM system could be slower than just
         * memcpying a page or two... */
        kr = vm_copy(mach_task_self(), page_start, len, (vm_address_t) new);
        if (kr) {
            ret = SUBSTITUTE_ERR_VM;
            goto fail_unmap;
        }
        /* Disable access to the page so anyone trying to execute there
         * will segfault. */
        if (mmap(NULL, len, PROT_NONE, MAP_ANON | MAP_SHARED, -1, 0)
            == MAP_FAILED) {
            ret = SUBSTITUTE_ERR_VM;
            goto fail_unmap;
        }
        /* Write patches to the copy. */
        for (size_t i = first; i <= last; i++) {
            struct execmem_foreign_write *write = &writes[i];
            ptrdiff_t off = (uintptr_t) write->dst - page_start;
            manual_memcpy(new + off, write->src, write->len);
        }
        if (callback) {
            /* Actually run the callback for any threads which are paused at an
             * affected PC, or are running and don't get scheduled by the
             * kernel in time to segfault.  Any thread which moves to an
             * affected PC *after* run_pc_patch() is assumed to do so by
             * calling the function in question, so they can't get past the
             * first instruction and it doesn't matter whether or not they're
             * patched.  (A call instruction within the affected region would
             * break this assumption, as then a thread could move to an
             * affected PC by returning. */
            if ((ret = run_pc_patch()))
                goto fail_unmap;
        }

        /* Protect new like the original, and move it into place. */
        vm_address_t target = page_start;
        if (mprotect(new, len, prot)) {
            ret = SUBSTITUTE_ERR_VM;
            goto fail_unmap;
        }
        vm_prot_t c, m;
        kr = vm_remap(mach_task_self(), &target, len, 0, VM_FLAGS_OVERWRITE,
                      mach_task_self(), (vm_address_t) new, /*copy*/ FALSE,
                      &c, &m, inherit);
        if (kr) {
            ret = SUBSTITUTE_ERR_VM;
            goto fail_unmap;
        }
        /* ignore errors... */
        munmap(new, len);

        continue;

    fail_unmap:
        /* This is probably useless, since the original page is gone
         * forever (intentionally, see above).  May as well arrange the
         * deck chairs, though. */
        munmap(new, len);
        goto fail;
    }

    ret = 0;

fail:
    if (callback) {
        /* Other threads are no longer in danger of segfaulting, so put
         * back the old segfault handler. */
        if ((ret = finish_pc_patch()))
            return ret;
    }

    return ret;
}