]> granicus.if.org Git - strace/blobdiff - syscall.c
tests: move F_OFD_SETLK* checks from fcntl64.c to fcntl-common.c
[strace] / syscall.c
index e696b25f1c4d65b379756f3b28ff0f660cd29a82..9eb0a9ec4e7c3f5ea47ffce7bbcf37e765433e87 100644 (file)
--- a/syscall.c
+++ b/syscall.c
@@ -6,6 +6,7 @@
  * Copyright (c) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
  *                     Linux for s390 port by D.J. Barrow
  *                    <barrow_dj@mail.yahoo.com,djbarrow@de.ibm.com>
+ * Copyright (c) 1999-2018 The strace developers.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  */
 
 #include "defs.h"
+#include "mmap_notify.h"
 #include "native_defs.h"
-#include <sys/param.h>
-#include <signal.h>
+#include "ptrace.h"
+#include "nsig.h"
+#include "number_set.h"
+#include "delay.h"
+#include "retval.h"
+#include <limits.h>
 
 /* for struct iovec */
 #include <sys/uio.h>
 
+/* for __X32_SYSCALL_BIT */
+#include <asm/unistd.h>
+
 #include "regs.h"
-#include "ptrace.h"
 
 #if defined(SPARC64)
 # undef PTRACE_GETREGS
 # define PTRACE_SETREGS PTRACE_SETREGS64
 #endif
 
-#if defined SPARC64
-# include <asm/psrcompat.h>
-#elif defined SPARC
-# include <asm/psr.h>
-#endif
-
-#ifdef IA64
-# include <asm/rse.h>
-#endif
-
-#ifndef NT_PRSTATUS
-# define NT_PRSTATUS 1
-#endif
-
-#ifndef NSIG
-# warning: NSIG is not defined, using 32
-# define NSIG 32
-#endif
-
 #include "syscall.h"
+#include "xstring.h"
 
 /* Define these shorthand notations to simplify the syscallent files. */
-#define TD TRACE_DESC
-#define TF TRACE_FILE
-#define TI TRACE_IPC
-#define TN TRACE_NETWORK
-#define TP TRACE_PROCESS
-#define TS TRACE_SIGNAL
-#define TM TRACE_MEMORY
-#define NF SYSCALL_NEVER_FAILS
-#define MA MAX_ARGS
-#define SI STACKTRACE_INVALIDATE_CACHE
-#define SE STACKTRACE_CAPTURE_ON_ENTER
-#define CST COMPAT_SYSCALL_TYPES
+#include "sysent_shorthand_defs.h"
 
 #define SEN(syscall_name) SEN_ ## syscall_name, SYS_FUNC_NAME(sys_ ## syscall_name)
 
@@ -105,20 +84,15 @@ static const struct_sysent sysent2[] = {
 #endif
 
 /* Now undef them since short defines cause wicked namespace pollution. */
-#undef SEN
-#undef TD
-#undef TF
-#undef TI
-#undef TN
-#undef TP
-#undef TS
-#undef TM
-#undef NF
-#undef MA
-#undef SI
-#undef SE
-#undef CST
+#include "sysent_shorthand_undefs.h"
+
+const char *const errnoent[] = {
+#include "errnoent.h"
+};
+const char *const signalent[] = {
+#include "signalent.h"
 
+};
 /*
  * `ioctlent[012].h' files are automatically generated by the auxiliary
  * program `ioctlsort', such that the list is sorted by the `code' field.
@@ -127,23 +101,11 @@ static const struct_sysent sysent2[] = {
  * in "/usr/include".
  */
 
-const char *const errnoent0[] = {
-#include "errnoent.h"
-};
-const char *const signalent0[] = {
-#include "signalent.h"
-};
 const struct_ioctlent ioctlent0[] = {
 #include "ioctlent0.h"
 };
 
 #if SUPPORTED_PERSONALITIES > 1
-static const char *const errnoent1[] = {
-# include "errnoent1.h"
-};
-static const char *const signalent1[] = {
-# include "signalent1.h"
-};
 static const struct_ioctlent ioctlent1[] = {
 # include "ioctlent1.h"
 };
@@ -158,12 +120,6 @@ static const struct_printers printers1 = {
 #endif
 
 #if SUPPORTED_PERSONALITIES > 2
-static const char *const errnoent2[] = {
-# include "errnoent2.h"
-};
-static const char *const signalent2[] = {
-# include "signalent2.h"
-};
 static const struct_ioctlent ioctlent2[] = {
 # include "ioctlent2.h"
 };
@@ -183,26 +139,6 @@ enum {
 #endif
 };
 
-enum {
-       nerrnos0 = ARRAY_SIZE(errnoent0)
-#if SUPPORTED_PERSONALITIES > 1
-       , nerrnos1 = ARRAY_SIZE(errnoent1)
-# if SUPPORTED_PERSONALITIES > 2
-       , nerrnos2 = ARRAY_SIZE(errnoent2)
-# endif
-#endif
-};
-
-enum {
-       nsignals0 = ARRAY_SIZE(signalent0)
-#if SUPPORTED_PERSONALITIES > 1
-       , nsignals1 = ARRAY_SIZE(signalent1)
-# if SUPPORTED_PERSONALITIES > 2
-       , nsignals2 = ARRAY_SIZE(signalent2)
-# endif
-#endif
-};
-
 enum {
        nioctlents0 = ARRAY_SIZE(ioctlent0)
 #if SUPPORTED_PERSONALITIES > 1
@@ -215,15 +151,13 @@ enum {
 
 #if SUPPORTED_PERSONALITIES > 1
 const struct_sysent *sysent = sysent0;
-const char *const *errnoent = errnoent0;
-const char *const *signalent = signalent0;
 const struct_ioctlent *ioctlent = ioctlent0;
 const struct_printers *printers = &printers0;
 #endif
 
+const unsigned int nerrnos = ARRAY_SIZE(errnoent);
+const unsigned int nsignals = ARRAY_SIZE(signalent);
 unsigned nsyscalls = nsyscalls0;
-unsigned nerrnos = nerrnos0;
-unsigned nsignals = nsignals0;
 unsigned nioctlents = nioctlents0;
 
 const unsigned int nsyscall_vec[SUPPORTED_PERSONALITIES] = {
@@ -245,11 +179,24 @@ const struct_sysent *const sysent_vec[SUPPORTED_PERSONALITIES] = {
 #endif
 };
 
+const char *const personality_names[] =
+# if defined X86_64
+       {"64 bit", "32 bit", "x32"}
+# elif defined X32
+       {"x32", "32 bit"}
+# elif SUPPORTED_PERSONALITIES == 2
+       {"64 bit", "32 bit"}
+# else
+       {STRINGIFY_VAL(__WORDSIZE) " bit"}
+# endif
+       ;
+
 #if SUPPORTED_PERSONALITIES > 1
+
 unsigned current_personality;
 
 # ifndef current_wordsize
-unsigned current_wordsize;
+unsigned current_wordsize = PERSONALITY0_WORDSIZE;
 static const int personality_wordsize[SUPPORTED_PERSONALITIES] = {
        PERSONALITY0_WORDSIZE,
        PERSONALITY1_WORDSIZE,
@@ -260,7 +207,7 @@ static const int personality_wordsize[SUPPORTED_PERSONALITIES] = {
 # endif
 
 # ifndef current_klongsize
-unsigned current_klongsize;
+unsigned current_klongsize = PERSONALITY0_KLONGSIZE;
 static const int personality_klongsize[SUPPORTED_PERSONALITIES] = {
        PERSONALITY0_KLONGSIZE,
        PERSONALITY1_KLONGSIZE,
@@ -271,40 +218,35 @@ static const int personality_klongsize[SUPPORTED_PERSONALITIES] = {
 # endif
 
 void
-set_personality(int personality)
+set_personality(unsigned int personality)
 {
+       if (personality == current_personality)
+               return;
+
+       if (personality >= SUPPORTED_PERSONALITIES)
+               error_msg_and_die("Requested switch to unsupported personality "
+                                 "%u", personality);
+
        nsyscalls = nsyscall_vec[personality];
        sysent = sysent_vec[personality];
 
        switch (personality) {
        case 0:
-               errnoent = errnoent0;
-               nerrnos = nerrnos0;
                ioctlent = ioctlent0;
                nioctlents = nioctlents0;
-               signalent = signalent0;
-               nsignals = nsignals0;
                printers = &printers0;
                break;
 
        case 1:
-               errnoent = errnoent1;
-               nerrnos = nerrnos1;
                ioctlent = ioctlent1;
                nioctlents = nioctlents1;
-               signalent = signalent1;
-               nsignals = nsignals1;
                printers = &printers1;
                break;
 
 # if SUPPORTED_PERSONALITIES > 2
        case 2:
-               errnoent = errnoent2;
-               nerrnos = nerrnos2;
                ioctlent = ioctlent2;
                nioctlents = nioctlents2;
-               signalent = signalent2;
-               nsignals = nsignals2;
                printers = &printers2;
                break;
 # endif
@@ -322,33 +264,26 @@ set_personality(int personality)
 static void
 update_personality(struct tcb *tcp, unsigned int personality)
 {
-       if (personality == current_personality)
-               return;
+       static bool need_mpers_warning[] =
+               { false, !HAVE_PERSONALITY_1_MPERS, !HAVE_PERSONALITY_2_MPERS };
+
        set_personality(personality);
 
        if (personality == tcp->currpers)
                return;
        tcp->currpers = personality;
 
-# undef PERSONALITY_NAMES
-# if defined POWERPC64
-#  define PERSONALITY_NAMES {"64 bit", "32 bit"}
-# elif defined X86_64
-#  define PERSONALITY_NAMES {"64 bit", "32 bit", "x32"}
-# elif defined X32
-#  define PERSONALITY_NAMES {"x32", "32 bit"}
-# elif defined AARCH64
-#  define PERSONALITY_NAMES {"64 bit", "32 bit"}
-# elif defined TILE
-#  define PERSONALITY_NAMES {"64-bit", "32-bit"}
-# endif
-# ifdef PERSONALITY_NAMES
        if (!qflag) {
-               static const char *const names[] = PERSONALITY_NAMES;
                error_msg("[ Process PID=%d runs in %s mode. ]",
-                         tcp->pid, names[personality]);
+                         tcp->pid, personality_names[personality]);
+       }
+
+       if (need_mpers_warning[personality]) {
+               error_msg("WARNING: Proper structure decoding for this "
+                         "personality is not supported, please consider "
+                         "building strace with mpers support enabled.");
+               need_mpers_warning[personality] = false;
        }
-# endif
 }
 #endif
 
@@ -377,7 +312,7 @@ decode_socket_subcall(struct tcb *tcp)
                tcp->u_arg[i] = (sizeof(uint32_t) == current_wordsize)
                                ? ((uint32_t *) (void *) buf)[i] : buf[i];
 }
-#endif
+#endif /* SYS_socket_subcall */
 
 #ifdef SYS_ipc_subcall
 static void
@@ -417,11 +352,11 @@ decode_ipc_subcall(struct tcb *tcp)
        for (i = 0; i < n; i++)
                tcp->u_arg[i] = tcp->u_arg[i + 1];
 }
-#endif
+#endif /* SYS_ipc_subcall */
 
-#ifdef LINUX_MIPSO32
+#ifdef SYS_syscall_subcall
 static void
-decode_mips_subcall(struct tcb *tcp)
+decode_syscall_subcall(struct tcb *tcp)
 {
        if (!scno_is_valid(tcp->u_arg[0]))
                return;
@@ -430,6 +365,7 @@ decode_mips_subcall(struct tcb *tcp)
        tcp->s_ent = &sysent[tcp->scno];
        memmove(&tcp->u_arg[0], &tcp->u_arg[1],
                sizeof(tcp->u_arg) - sizeof(tcp->u_arg[0]));
+# ifdef LINUX_MIPSO32
        /*
         * Fetching the last arg of 7-arg syscalls (fadvise64_64
         * and sync_file_range) requires additional code,
@@ -442,43 +378,18 @@ decode_mips_subcall(struct tcb *tcp)
                           &tcp->u_arg[MAX_ARGS - 1]) < 0)
                tcp->u_arg[MAX_ARGS - 1] = 0;
        }
+# endif /* LINUX_MIPSO32 */
 }
-#endif /* LINUX_MIPSO32 */
+#endif /* SYS_syscall_subcall */
 
 static void
 dumpio(struct tcb *tcp)
 {
-       if (syserror(tcp))
-               return;
-
        int fd = tcp->u_arg[0];
        if (fd < 0)
                return;
 
-       if (is_number_in_set(fd, &read_set)) {
-               switch (tcp->s_ent->sen) {
-               case SEN_read:
-               case SEN_pread:
-               case SEN_recv:
-               case SEN_recvfrom:
-               case SEN_mq_timedreceive:
-                       dumpstr(tcp, tcp->u_arg[1], tcp->u_rval);
-                       return;
-               case SEN_readv:
-               case SEN_preadv:
-               case SEN_preadv2:
-                       dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1],
-                                    tcp->u_rval);
-                       return;
-               case SEN_recvmsg:
-                       dumpiov_in_msghdr(tcp, tcp->u_arg[1], tcp->u_rval);
-                       return;
-               case SEN_recvmmsg:
-                       dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
-                       return;
-               }
-       }
-       if (is_number_in_set(fd, &write_set)) {
+       if (is_number_in_set(fd, write_set)) {
                switch (tcp->s_ent->sen) {
                case SEN_write:
                case SEN_pwrite:
@@ -501,41 +412,33 @@ dumpio(struct tcb *tcp)
                        break;
                }
        }
-}
-
-/*
- * Shuffle syscall numbers so that we don't have huge gaps in syscall table.
- * The shuffling should be an involution: shuffle_scno(shuffle_scno(n)) == n.
- */
-static kernel_ulong_t
-shuffle_scno(kernel_ulong_t scno)
-{
-#if defined(ARM) || defined(AARCH64) /* So far only 32-bit ARM needs this */
-       if (scno < ARM_FIRST_SHUFFLED_SYSCALL)
-               return scno;
 
-       /* __ARM_NR_cmpxchg? Swap with LAST_ORDINARY+1 */
-       if (scno == ARM_FIRST_SHUFFLED_SYSCALL)
-               return 0x000ffff0;
-       if (scno == 0x000ffff0)
-               return ARM_FIRST_SHUFFLED_SYSCALL;
+       if (syserror(tcp))
+               return;
 
-# define ARM_SECOND_SHUFFLED_SYSCALL (ARM_FIRST_SHUFFLED_SYSCALL + 1)
-       /*
-        * Is it ARM specific syscall?
-        * Swap [0x000f0000, 0x000f0000 + LAST_SPECIAL] range
-        * with [SECOND_SHUFFLED, SECOND_SHUFFLED + LAST_SPECIAL] range.
-        */
-       if (scno >= 0x000f0000 &&
-           scno <= 0x000f0000 + ARM_LAST_SPECIAL_SYSCALL) {
-               return scno - 0x000f0000 + ARM_SECOND_SHUFFLED_SYSCALL;
-       }
-       if (scno <= ARM_SECOND_SHUFFLED_SYSCALL + ARM_LAST_SPECIAL_SYSCALL) {
-               return scno + 0x000f0000 - ARM_SECOND_SHUFFLED_SYSCALL;
+       if (is_number_in_set(fd, read_set)) {
+               switch (tcp->s_ent->sen) {
+               case SEN_read:
+               case SEN_pread:
+               case SEN_recv:
+               case SEN_recvfrom:
+               case SEN_mq_timedreceive:
+                       dumpstr(tcp, tcp->u_arg[1], tcp->u_rval);
+                       return;
+               case SEN_readv:
+               case SEN_preadv:
+               case SEN_preadv2:
+                       dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1],
+                                    tcp->u_rval);
+                       return;
+               case SEN_recvmsg:
+                       dumpiov_in_msghdr(tcp, tcp->u_arg[1], tcp->u_rval);
+                       return;
+               case SEN_recvmmsg:
+                       dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
+                       return;
+               }
        }
-#endif /* ARM || AARCH64 */
-
-       return scno;
 }
 
 const char *
@@ -547,43 +450,49 @@ err_name(unsigned long err)
        return NULL;
 }
 
-static long get_regs_error;
-
-void
-clear_regs(void)
+static void
+print_err_ret(kernel_ulong_t ret, unsigned long u_error)
 {
-       get_regs_error = -1;
+       const char *u_error_str = err_name(u_error);
+
+       if (u_error_str)
+               tprintf("= %" PRI_kld " %s (%s)",
+                       ret, u_error_str, strerror(u_error));
+       else
+               tprintf("= %" PRI_kld " (errno %lu)", ret, u_error);
 }
 
+static long get_regs(struct tcb *);
 static int get_syscall_args(struct tcb *);
 static int get_syscall_result(struct tcb *);
 static int arch_get_scno(struct tcb *tcp);
 static int arch_set_scno(struct tcb *, kernel_ulong_t);
 static void get_error(struct tcb *, const bool);
 static int arch_set_error(struct tcb *);
+static int arch_set_success(struct tcb *);
 
-struct fault_opts *fault_vec[SUPPORTED_PERSONALITIES];
+struct inject_opts *inject_vec[SUPPORTED_PERSONALITIES];
 
-static struct fault_opts *
-tcb_fault_opts(struct tcb *tcp)
+static struct inject_opts *
+tcb_inject_opts(struct tcb *tcp)
 {
-       return (scno_in_range(tcp->scno) && tcp->fault_vec[current_personality])
-              ? &tcp->fault_vec[current_personality][tcp->scno] : NULL;
+       return (scno_in_range(tcp->scno) && tcp->inject_vec[current_personality])
+              ? &tcp->inject_vec[current_personality][tcp->scno] : NULL;
 }
 
 
 static long
-inject_syscall_fault_entering(struct tcb *tcp)
+tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo)
 {
-       if (!tcp->fault_vec[current_personality]) {
-               tcp->fault_vec[current_personality] =
-                       xcalloc(nsyscalls, sizeof(**fault_vec));
-               memcpy(tcp->fault_vec[current_personality],
-                      fault_vec[current_personality],
-                      nsyscalls * sizeof(**fault_vec));
+       if (!tcp->inject_vec[current_personality]) {
+               tcp->inject_vec[current_personality] =
+                       xcalloc(nsyscalls, sizeof(**inject_vec));
+               memcpy(tcp->inject_vec[current_personality],
+                      inject_vec[current_personality],
+                      nsyscalls * sizeof(**inject_vec));
        }
 
-       struct fault_opts *opts = tcb_fault_opts(tcp);
+       struct inject_opts *opts = tcb_inject_opts(tcp);
 
        if (!opts || opts->first == 0)
                return 0;
@@ -595,71 +504,140 @@ inject_syscall_fault_entering(struct tcb *tcp)
 
        opts->first = opts->step;
 
-       if (!arch_set_scno(tcp, -1))
-               tcp->flags |= TCB_FAULT_INJ;
+       if (!recovering(tcp)) {
+               if (opts->data.flags & INJECT_F_SIGNAL)
+                       *signo = opts->data.signo;
+               if (opts->data.flags & (INJECT_F_ERROR | INJECT_F_RETVAL) &&
+                   !arch_set_scno(tcp, -1))
+                       tcp->flags |= TCB_TAMPERED;
+               if (opts->data.flags & INJECT_F_DELAY_ENTER)
+                       delay_tcb(tcp, opts->data.delay_idx, true);
+               if (opts->data.flags & INJECT_F_DELAY_EXIT)
+                       tcp->flags |= TCB_INJECT_DELAY_EXIT;
+       }
 
        return 0;
 }
 
 static long
-update_syscall_fault_exiting(struct tcb *tcp)
+tamper_with_syscall_exiting(struct tcb *tcp)
 {
-       struct fault_opts *opts = tcb_fault_opts(tcp);
+       struct inject_opts *opts = tcb_inject_opts(tcp);
+       if (!opts)
+               return 0;
+
+       if (inject_delay_exit(tcp))
+               delay_tcb(tcp, opts->data.delay_idx, false);
+
+       if (!syscall_tampered(tcp))
+               return 0;
+
+       if (!syserror(tcp)) {
+               error_msg("Failed to tamper with process %d: got no error "
+                         "(return value %#" PRI_klx ")",
+                         tcp->pid, tcp->u_rval);
+
+               return 1;
+       }
+
+       bool update_tcb = false;
 
-       if (opts && opts->err && tcp->u_error != opts->err) {
-               unsigned long u_error = tcp->u_error;
-               tcp->u_error = opts->err;
-               if (arch_set_error(tcp))
-                       tcp->u_error = u_error;
+       if (opts->data.flags & INJECT_F_RETVAL) {
+               kernel_long_t inject_rval =
+                       retval_get(opts->data.rval_idx);
+               kernel_long_t u_rval = tcp->u_rval;
+
+               tcp->u_rval = inject_rval;
+               if (arch_set_success(tcp)) {
+                       tcp->u_rval = u_rval;
+               } else {
+                       update_tcb = true;
+                       tcp->u_error = 0;
+               }
+       } else {
+               unsigned long new_error = retval_get(opts->data.rval_idx);
+
+               if (new_error != tcp->u_error && new_error <= MAX_ERRNO_VALUE) {
+                       unsigned long u_error = tcp->u_error;
+
+                       tcp->u_error = new_error;
+                       if (arch_set_error(tcp)) {
+                               tcp->u_error = u_error;
+                       } else {
+                               update_tcb = true;
+                       }
+               }
+       }
+
+       if (update_tcb) {
+               tcp->u_error = 0;
+               get_error(tcp, !(tcp->s_ent->sys_flags & SYSCALL_NEVER_FAILS));
        }
 
        return 0;
 }
 
-static int
-trace_syscall_entering(struct tcb *tcp)
+/*
+ * Returns:
+ * 0: "ignore this ptrace stop", bail out silently.
+ * 1: ok, decoded; call
+ *    syscall_entering_finish(tcp, syscall_entering_trace(tcp, ...)).
+ * other: error; call syscall_entering_finish(tcp, res), where res is the value
+ *    returned.
+ */
+int
+syscall_entering_decode(struct tcb *tcp)
 {
-       int res, scno_good;
-
-       scno_good = res = get_scno(tcp);
+       int res = get_scno(tcp);
        if (res == 0)
                return res;
-       if (res == 1)
-               res = get_syscall_args(tcp);
-
-       if (res != 1) {
+       int scno_good = res;
+       if (res != 1 || (res = get_syscall_args(tcp)) != 1) {
                printleader(tcp);
                tprintf("%s(", scno_good == 1 ? tcp->s_ent->sys_name : "????");
                /*
                 * " <unavailable>" will be added later by the code which
                 * detects ptrace errors.
                 */
-               goto ret;
+               return res;
        }
 
-#ifdef LINUX_MIPSO32
-       if (SEN_syscall == tcp->s_ent->sen)
-               decode_mips_subcall(tcp);
-#endif
-
-#if defined(SYS_socket_subcall) || defined(SYS_ipc_subcall)
-       switch (tcp->s_ent->sen) {
+#if defined SYS_ipc_subcall    \
+ || defined SYS_socket_subcall \
+ || defined SYS_syscall_subcall
+       for (;;) {
+               switch (tcp->s_ent->sen) {
+# ifdef SYS_ipc_subcall
+               case SEN_ipc:
+                       decode_ipc_subcall(tcp);
+                       break;
+# endif
 # ifdef SYS_socket_subcall
                case SEN_socketcall:
                        decode_socket_subcall(tcp);
                        break;
 # endif
-# ifdef SYS_ipc_subcall
-               case SEN_ipc:
-                       decode_ipc_subcall(tcp);
+# ifdef SYS_syscall_subcall
+               case SEN_syscall:
+                       decode_syscall_subcall(tcp);
+                       if (tcp->s_ent->sen != SEN_syscall)
+                               continue;
                        break;
 # endif
+               }
+               break;
        }
 #endif
 
+       return 1;
+}
+
+int
+syscall_entering_trace(struct tcb *tcp, unsigned int *sig)
+{
        /* Restrain from fault injection while the trace executes strace code. */
        if (hide_log(tcp)) {
-               tcp->qual_flg &= ~QUAL_FAULT;
+               tcp->qual_flg &= ~QUAL_INJECT;
        }
 
        switch (tcp->s_ent->sen) {
@@ -672,93 +650,86 @@ trace_syscall_entering(struct tcb *tcp)
                        break;
        }
 
-       if (!(tcp->qual_flg & QUAL_TRACE)
-        || (tracing_paths && !pathtrace_match(tcp))
-       ) {
-               tcp->flags |= TCB_INSYSCALL | TCB_FILTERED;
-               tcp->sys_func_rval = 0;
+       if (!traced(tcp) || (tracing_paths && !pathtrace_match(tcp))) {
+               tcp->flags |= TCB_FILTERED;
                return 0;
        }
 
        tcp->flags &= ~TCB_FILTERED;
 
        if (hide_log(tcp)) {
-               res = 0;
-               goto ret;
+               return 0;
        }
 
-       if (tcp->qual_flg & QUAL_FAULT)
-               inject_syscall_fault_entering(tcp);
+       if (inject(tcp))
+               tamper_with_syscall_entering(tcp, sig);
 
        if (cflag == CFLAG_ONLY_STATS) {
-               res = 0;
-               goto ret;
+               return 0;
        }
 
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
        if (stack_trace_enabled) {
                if (tcp->s_ent->sys_flags & STACKTRACE_CAPTURE_ON_ENTER)
-                       unwind_capture_stacktrace(tcp);
+                       unwind_tcb_capture(tcp);
        }
 #endif
 
        printleader(tcp);
        tprintf("%s(", tcp->s_ent->sys_name);
-       if (tcp->qual_flg & QUAL_RAW)
-               res = printargs(tcp);
-       else
-               res = tcp->s_ent->sys_func(tcp);
-
+       int res = raw(tcp) ? printargs(tcp) : tcp->s_ent->sys_func(tcp);
        fflush(tcp->outf);
- ret:
-       tcp->flags |= TCB_INSYSCALL;
-       tcp->sys_func_rval = res;
-       /* Measure the entrance time as late as possible to avoid errors. */
-       if (Tflag || cflag)
-               gettimeofday(&tcp->etime, NULL);
        return res;
 }
 
-static bool
-syscall_fault_injected(struct tcb *tcp)
+void
+syscall_entering_finish(struct tcb *tcp, int res)
 {
-       return tcp->flags & TCB_FAULT_INJ;
+       tcp->flags |= TCB_INSYSCALL;
+       tcp->sys_func_rval = res;
+       /* Measure the entrance time as late as possible to avoid errors. */
+       if ((Tflag || cflag) && !filtered(tcp))
+               clock_gettime(CLOCK_MONOTONIC, &tcp->etime);
 }
 
-static int
-trace_syscall_exiting(struct tcb *tcp)
+/* Returns:
+ * 0: "bail out".
+ * 1: ok.
+ * -1: error in one of ptrace ops.
+ *
+ * If not 0, call syscall_exiting_trace(tcp, res), where res is the return
+ *    value. Anyway, call syscall_exiting_finish(tcp) then.
+ */
+int
+syscall_exiting_decode(struct tcb *tcp, struct timespec *pts)
 {
-       int sys_res;
-       struct timeval tv;
-       int res;
-       unsigned long u_error;
-       const char *u_error_str;
-
        /* Measure the exit time as early as possible to avoid errors. */
-       if (Tflag || cflag)
-               gettimeofday(&tv, NULL);
+       if ((Tflag || cflag) && !(filtered(tcp) || hide_log(tcp)))
+               clock_gettime(CLOCK_MONOTONIC, pts);
 
-#ifdef USE_LIBUNWIND
-       if (stack_trace_enabled) {
-               if (tcp->s_ent->sys_flags & STACKTRACE_INVALIDATE_CACHE)
-                       unwind_cache_invalidate(tcp);
-       }
-#endif
+       if (tcp->s_ent->sys_flags & MEMORY_MAPPING_CHANGE)
+               mmap_notify_report(tcp);
+
+       if (filtered(tcp) || hide_log(tcp))
+               return 0;
 
 #if SUPPORTED_PERSONALITIES > 1
        update_personality(tcp, tcp->currpers);
 #endif
-       res = (get_regs_error ? -1 : get_syscall_result(tcp));
-       if (filtered(tcp) || hide_log(tcp))
-               goto ret;
 
-       if (syserror(tcp) && syscall_fault_injected(tcp))
-               update_syscall_fault_exiting(tcp);
+       return get_syscall_result(tcp);
+}
+
+int
+syscall_exiting_trace(struct tcb *tcp, struct timespec *ts, int res)
+{
+       if (syscall_tampered(tcp) || inject_delay_exit(tcp))
+               tamper_with_syscall_exiting(tcp);
 
        if (cflag) {
-               count_syscall(tcp, &tv);
+               count_syscall(tcp, ts);
                if (cflag == CFLAG_ONLY_STATS) {
-                       goto ret;
+                       return 0;
                }
        }
 
@@ -785,27 +756,24 @@ trace_syscall_exiting(struct tcb *tcp)
                tabto();
                tprints("= ? <unavailable>\n");
                line_ended();
-               tcp->flags &= ~(TCB_INSYSCALL | TCB_FAULT_INJ);
-               tcp->sys_func_rval = 0;
-               free_tcb_priv_data(tcp);
                return res;
        }
        tcp->s_prev_ent = tcp->s_ent;
 
-       sys_res = 0;
-       if (tcp->qual_flg & QUAL_RAW) {
+       int sys_res = 0;
+       if (raw(tcp)) {
                /* sys_res = printargs(tcp); - but it's nop on sysexit */
        } else {
        /* FIXME: not_failing_only (IOW, option -z) is broken:
         * failure of syscall is known only after syscall return.
         * Thus we end up with something like this on, say, ENOENT:
-        *     open("doesnt_exist", O_RDONLY <unfinished ...>
+        *     open("does_not_exist", O_RDONLY <unfinished ...>
         *     {next syscall decode}
         * whereas the intended result is that open(...) line
         * is not shown at all.
         */
                if (not_failing_only && tcp->u_error)
-                       goto ret;       /* ignore failed syscalls */
+                       return 0;       /* ignore failed syscalls */
                if (tcp->sys_func_rval & RVAL_DECODED)
                        sys_res = tcp->sys_func_rval;
                else
@@ -814,19 +782,17 @@ trace_syscall_exiting(struct tcb *tcp)
 
        tprints(") ");
        tabto();
-       u_error = tcp->u_error;
 
-       if (tcp->qual_flg & QUAL_RAW) {
-               if (u_error) {
-                       tprintf("= -1 (errno %lu)", u_error);
-                       if (syscall_fault_injected(tcp))
-                               tprints(" (INJECTED)");
-               } else {
+       if (raw(tcp)) {
+               if (tcp->u_error)
+                       print_err_ret(tcp->u_rval, tcp->u_error);
+               else
                        tprintf("= %#" PRI_klx, tcp->u_rval);
-               }
-       }
-       else if (!(sys_res & RVAL_NONE) && u_error) {
-               switch (u_error) {
+
+               if (syscall_tampered(tcp))
+                       tprints(" (INJECTED)");
+       } else if (!(sys_res & RVAL_NONE) && tcp->u_error) {
+               switch (tcp->u_error) {
                /* Blocked signals do not interrupt any syscalls.
                 * In this case syscalls don't return ERESTARTfoo codes.
                 *
@@ -881,28 +847,21 @@ trace_syscall_exiting(struct tcb *tcp)
                        tprints("= ? ERESTART_RESTARTBLOCK (Interrupted by signal)");
                        break;
                default:
-                       u_error_str = err_name(u_error);
-                       if (u_error_str)
-                               tprintf("= -1 %s (%s)",
-                                       u_error_str, strerror(u_error));
-                       else
-                               tprintf("= -1 %lu (%s)",
-                                       u_error, strerror(u_error));
+                       print_err_ret(tcp->u_rval, tcp->u_error);
                        break;
                }
-               if (syscall_fault_injected(tcp))
-                       tprintf(" (INJECTED)");
+               if (syscall_tampered(tcp))
+                       tprints(" (INJECTED)");
                if ((sys_res & RVAL_STR) && tcp->auxstr)
                        tprintf(" (%s)", tcp->auxstr);
-       }
-       else {
+       } else {
                if (sys_res & RVAL_NONE)
                        tprints("= ?");
                else {
                        switch (sys_res & RVAL_MASK) {
                        case RVAL_HEX:
 #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG
-                               if (current_wordsize < sizeof(tcp->u_rval)) {
+                               if (current_klongsize < sizeof(tcp->u_rval)) {
                                        tprintf("= %#x",
                                                (unsigned int) tcp->u_rval);
                                } else
@@ -917,7 +876,7 @@ trace_syscall_exiting(struct tcb *tcp)
                                break;
                        case RVAL_UDECIMAL:
 #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG
-                               if (current_wordsize < sizeof(tcp->u_rval)) {
+                               if (current_klongsize < sizeof(tcp->u_rval)) {
                                        tprintf("= %u",
                                                (unsigned int) tcp->u_rval);
                                } else
@@ -926,15 +885,11 @@ trace_syscall_exiting(struct tcb *tcp)
                                        tprintf("= %" PRI_klu, tcp->u_rval);
                                }
                                break;
-                       case RVAL_DECIMAL:
-                               tprintf("= %" PRI_kld, tcp->u_rval);
-                               break;
                        case RVAL_FD:
                                if (show_fd_path) {
                                        tprints("= ");
                                        printfd(tcp, tcp->u_rval);
-                               }
-                               else
+                               } else
                                        tprintf("= %" PRI_kld, tcp->u_rval);
                                break;
                        default:
@@ -944,33 +899,31 @@ trace_syscall_exiting(struct tcb *tcp)
                }
                if ((sys_res & RVAL_STR) && tcp->auxstr)
                        tprintf(" (%s)", tcp->auxstr);
+               if (syscall_tampered(tcp))
+                       tprints(" (INJECTED)");
        }
        if (Tflag) {
-               tv_sub(&tv, &tv, &tcp->etime);
+               ts_sub(ts, ts, &tcp->etime);
                tprintf(" <%ld.%06ld>",
-                       (long) tv.tv_sec, (long) tv.tv_usec);
+                       (long) ts->tv_sec, (long) ts->tv_nsec / 1000);
        }
        tprints("\n");
        dumpio(tcp);
        line_ended();
 
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
        if (stack_trace_enabled)
-               unwind_print_stacktrace(tcp);
+               unwind_tcb_print(tcp);
 #endif
-
- ret:
-       tcp->flags &= ~(TCB_INSYSCALL | TCB_FAULT_INJ);
-       tcp->sys_func_rval = 0;
-       free_tcb_priv_data(tcp);
        return 0;
 }
 
-int
-trace_syscall(struct tcb *tcp)
+void
+syscall_exiting_finish(struct tcb *tcp)
 {
-       return exiting(tcp) ?
-               trace_syscall_exiting(tcp) : trace_syscall_entering(tcp);
+       tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED | TCB_INJECT_DELAY_EXIT);
+       tcp->sys_func_rval = 0;
+       free_tcb_priv_data(tcp);
 }
 
 bool
@@ -1002,29 +955,13 @@ restore_cleared_syserror(struct tcb *tcp)
        tcp->u_error = saved_u_error;
 }
 
-/*
- * Check the syscall return value register value for whether it is
- * a negated errno code indicating an error, or a success return value.
- */
-static inline bool
-is_negated_errno(kernel_ulong_t val)
-{
-       /* Linux kernel defines MAX_ERRNO to 4095. */
-       kernel_ulong_t max = -(kernel_long_t) 4095;
-
-#ifndef current_klongsize
-       if (current_klongsize < sizeof(val)) {
-               val = (uint32_t) val;
-               max = (uint32_t) max;
-       }
-#endif /* !current_klongsize */
-
-       return val >= max;
-}
+#define XLAT_MACROS_ONLY
+# include "xlat/nt_descriptor_types.h"
+#undef XLAT_MACROS_ONLY
 
 #include "arch_regs.c"
 
-#ifdef HAVE_GETRVAL2
+#if HAVE_ARCH_GETRVAL2
 # include "arch_getrval2.c"
 #endif
 
@@ -1036,11 +973,11 @@ print_pc(struct tcb *tcp)
 #elif defined ARCH_PC_PEEK_ADDR
        kernel_ulong_t pc;
 # define ARCH_PC_REG pc
-# define ARCH_GET_PC upeek(tcp->pid, ARCH_PC_PEEK_ADDR, &pc)
+# define ARCH_GET_PC upeek(tcp, ARCH_PC_PEEK_ADDR, &pc)
 #else
 # error Neither ARCH_PC_REG nor ARCH_PC_PEEK_ADDR is defined
 #endif
-       if (get_regs_error || ARCH_GET_PC)
+       if (get_regs(tcp) < 0 || ARCH_GET_PC)
                tprints(current_wordsize == 4 ? "[????????] "
                                              : "[????????????????] ");
        else
@@ -1125,11 +1062,25 @@ ptrace_setregs(pid_t pid)
 
 #endif /* ARCH_REGS_FOR_GETREGSET || ARCH_REGS_FOR_GETREGS */
 
+#ifdef ptrace_getregset_or_getregs
+static long get_regs_error;
+#endif
+
 void
-get_regs(pid_t pid)
+clear_regs(struct tcb *tcp)
 {
-#undef USE_GET_SYSCALL_RESULT_REGS
 #ifdef ptrace_getregset_or_getregs
+       get_regs_error = -1;
+#endif
+}
+
+static long
+get_regs(struct tcb *const tcp)
+{
+#ifdef ptrace_getregset_or_getregs
+
+       if (get_regs_error != -1)
+               return get_regs_error;
 
 # ifdef HAVE_GETREGS_OLD
        /*
@@ -1138,29 +1089,57 @@ get_regs(pid_t pid)
         */
        static int use_getregs_old;
        if (use_getregs_old < 0) {
-               get_regs_error = ptrace_getregset_or_getregs(pid);
-               return;
+               return get_regs_error = ptrace_getregset_or_getregs(tcp->pid);
        } else if (use_getregs_old == 0) {
-               get_regs_error = ptrace_getregset_or_getregs(pid);
+               get_regs_error = ptrace_getregset_or_getregs(tcp->pid);
                if (get_regs_error >= 0) {
                        use_getregs_old = -1;
-                       return;
+                       return get_regs_error;
                }
                if (errno == EPERM || errno == ESRCH)
-                       return;
+                       return get_regs_error;
                use_getregs_old = 1;
        }
-       get_regs_error = getregs_old(pid);
+       return get_regs_error = getregs_old(tcp);
 # else /* !HAVE_GETREGS_OLD */
        /* Assume that PTRACE_GETREGSET/PTRACE_GETREGS works. */
-       get_regs_error = ptrace_getregset_or_getregs(pid);
+       get_regs_error = ptrace_getregset_or_getregs(tcp->pid);
+
+#  if defined ARCH_PERSONALITY_0_IOV_SIZE
+       if (get_regs_error)
+               return get_regs_error;
+
+       switch (ARCH_IOVEC_FOR_GETREGSET.iov_len) {
+       case ARCH_PERSONALITY_0_IOV_SIZE:
+               update_personality(tcp, 0);
+               break;
+       case ARCH_PERSONALITY_1_IOV_SIZE:
+               update_personality(tcp, 1);
+               break;
+       default: {
+               static bool printed = false;
+
+               if (!printed) {
+                       error_msg("Unsupported regset size returned by "
+                                 "PTRACE_GETREGSET: %zu",
+                                 ARCH_IOVEC_FOR_GETREGSET.iov_len);
+
+                       printed = true;
+               }
+
+               update_personality(tcp, 0);
+       }
+       }
+#  endif /* ARCH_PERSONALITY_0_IOV_SIZE */
+
+       return get_regs_error;
+
 # endif /* !HAVE_GETREGS_OLD */
 
 #else /* !ptrace_getregset_or_getregs */
 
-# define USE_GET_SYSCALL_RESULT_REGS 1
 # warning get_regs is not implemented for this architecture yet
-       get_regs_error = 0;
+       return 0;
 
 #endif /* !ptrace_getregset_or_getregs */
 }
@@ -1176,7 +1155,7 @@ set_regs(pid_t pid)
 struct sysent_buf {
        struct tcb *tcp;
        struct_sysent ent;
-       char buf[sizeof("syscall_%lu") + sizeof(kernel_ulong_t) * 3];
+       char buf[sizeof("syscall_0x") + sizeof(kernel_ulong_t) * 2];
 };
 
 static void
@@ -1189,21 +1168,24 @@ free_sysent_buf(void *ptr)
 
 /*
  * Returns:
- * 0: "ignore this ptrace stop", bail out of trace_syscall_entering() silently.
- * 1: ok, continue in trace_syscall_entering().
- * other: error, trace_syscall_entering() should print error indicator
- *    ("????" etc) and bail out.
+ * 0: "ignore this ptrace stop", syscall_entering_decode() should return a "bail
+ *    out silently" code.
+ * 1: ok, continue in syscall_entering_decode().
+ * other: error, syscall_entering_decode() should print error indicator
+ *    ("????" etc) and return an appropriate code.
  */
 int
 get_scno(struct tcb *tcp)
 {
-       if (get_regs_error)
+       if (get_regs(tcp) < 0)
                return -1;
 
        int rc = arch_get_scno(tcp);
        if (rc != 1)
                return rc;
 
+       tcp->scno = shuffle_scno(tcp->scno);
+
        if (scno_is_valid(tcp->scno)) {
                tcp->s_ent = &sysent[tcp->scno];
                tcp->qual_flg = qual_flags(tcp->scno);
@@ -1215,38 +1197,48 @@ get_scno(struct tcb *tcp)
                s->ent.sen = SEN_printargs;
                s->ent.sys_func = printargs;
                s->ent.sys_name = s->buf;
-               sprintf(s->buf, "syscall_%" PRI_klu, shuffle_scno(tcp->scno));
+               xsprintf(s->buf, "syscall_%#" PRI_klx, shuffle_scno(tcp->scno));
 
                tcp->s_ent = &s->ent;
                tcp->qual_flg = QUAL_RAW | DEFAULT_QUAL_FLAGS;
 
                set_tcb_priv_data(tcp, s, free_sysent_buf);
 
-               if (debug_flag)
-                       error_msg("pid %d invalid syscall %" PRI_kld,
-                                 tcp->pid, tcp->scno);
+               debug_msg("pid %d invalid syscall %#" PRI_klx,
+                         tcp->pid, shuffle_scno(tcp->scno));
        }
+
+       /*
+        * We refrain from argument decoding during recovering
+        * as tracee memory mappings has changed and the registers
+        * are very likely pointing to garbage already.
+        */
+       if (recovering(tcp))
+               tcp->qual_flg |= QUAL_RAW;
+
        return 1;
 }
 
-#ifdef USE_GET_SYSCALL_RESULT_REGS
+#ifdef ptrace_getregset_or_getregs
+# define get_syscall_result_regs get_regs
+#else
 static int get_syscall_result_regs(struct tcb *);
 #endif
 
 /* Returns:
- * 1: ok, continue in trace_syscall_exiting().
- * -1: error, trace_syscall_exiting() should print error indicator
+ * 1: ok, continue in syscall_exiting_trace().
+ * -1: error, syscall_exiting_trace() should print error indicator
  *    ("????" etc) and bail out.
  */
 static int
 get_syscall_result(struct tcb *tcp)
 {
-#ifdef USE_GET_SYSCALL_RESULT_REGS
-       if (get_syscall_result_regs(tcp))
+       if (get_syscall_result_regs(tcp) < 0)
                return -1;
-#endif
        tcp->u_error = 0;
-       get_error(tcp, !(tcp->s_ent->sys_flags & SYSCALL_NEVER_FAILS));
+       get_error(tcp,
+                 !(tcp->s_ent->sys_flags & SYSCALL_NEVER_FAILS)
+                       || syscall_tampered(tcp));
 
        return 1;
 }
@@ -1254,7 +1246,7 @@ get_syscall_result(struct tcb *tcp)
 #include "get_scno.c"
 #include "set_scno.c"
 #include "get_syscall_args.c"
-#ifdef USE_GET_SYSCALL_RESULT_REGS
+#ifndef ptrace_getregset_or_getregs
 # include "get_syscall_result.c"
 #endif
 #include "get_error.c"
@@ -1262,13 +1254,10 @@ get_syscall_result(struct tcb *tcp)
 #ifdef HAVE_GETREGS_OLD
 # include "getregs_old.c"
 #endif
+#include "shuffle_scno.c"
 
 const char *
 syscall_name(kernel_ulong_t scno)
 {
-#if defined X32_PERSONALITY_NUMBER && defined __X32_SYSCALL_BIT
-       if (current_personality == X32_PERSONALITY_NUMBER)
-               scno &= ~__X32_SYSCALL_BIT;
-#endif
-       return scno_is_valid(scno) ? sysent[scno].sys_name: NULL;
+       return scno_is_valid(scno) ? sysent[scno].sys_name : NULL;
 }