]> granicus.if.org Git - strace/blobdiff - syscall.c
mem: decode hugetlb page size in mmap flags
[strace] / syscall.c
index c821d7efdb01ebb7909c2c73a9111451c3c716e8..b1047feb67f0ec2cb43180f1e59b00f963b03ddf 100644 (file)
--- a/syscall.c
+++ b/syscall.c
@@ -6,6 +6,7 @@
  * Copyright (c) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
  *                     Linux for s390 port by D.J. Barrow
  *                    <barrow_dj@mail.yahoo.com,djbarrow@de.ibm.com>
+ * Copyright (c) 1999-2017 The strace developers.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #include "defs.h"
 #include "native_defs.h"
 #include "nsig.h"
+#include "number_set.h"
 #include <sys/param.h>
 
 /* for struct iovec */
 #include <sys/uio.h>
 
+/* for __X32_SYSCALL_BIT */
+#include <asm/unistd.h>
+
 #include "regs.h"
 #include "ptrace.h"
 
 #include "syscall.h"
 
 /* Define these shorthand notations to simplify the syscallent files. */
-#define TD TRACE_DESC
-#define TF TRACE_FILE
-#define TI TRACE_IPC
-#define TN TRACE_NETWORK
-#define TP TRACE_PROCESS
-#define TS TRACE_SIGNAL
-#define TM TRACE_MEMORY
-#define NF SYSCALL_NEVER_FAILS
-#define MA MAX_ARGS
-#define SI STACKTRACE_INVALIDATE_CACHE
-#define SE STACKTRACE_CAPTURE_ON_ENTER
-#define CST COMPAT_SYSCALL_TYPES
+#include "sysent_shorthand_defs.h"
 
 #define SEN(syscall_name) SEN_ ## syscall_name, SYS_FUNC_NAME(sys_ ## syscall_name)
 
@@ -100,19 +94,7 @@ static const struct_sysent sysent2[] = {
 #endif
 
 /* Now undef them since short defines cause wicked namespace pollution. */
-#undef SEN
-#undef TD
-#undef TF
-#undef TI
-#undef TN
-#undef TP
-#undef TS
-#undef TM
-#undef NF
-#undef MA
-#undef SI
-#undef SE
-#undef CST
+#include "sysent_shorthand_undefs.h"
 
 /*
  * `ioctlent[012].h' files are automatically generated by the auxiliary
@@ -326,16 +308,12 @@ update_personality(struct tcb *tcp, unsigned int personality)
        tcp->currpers = personality;
 
 # undef PERSONALITY_NAMES
-# if defined POWERPC64
-#  define PERSONALITY_NAMES {"64 bit", "32 bit"}
-# elif defined X86_64
+# if defined X86_64
 #  define PERSONALITY_NAMES {"64 bit", "32 bit", "x32"}
 # elif defined X32
 #  define PERSONALITY_NAMES {"x32", "32 bit"}
-# elif defined AARCH64
+# elif SUPPORTED_PERSONALITIES == 2
 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
-# elif defined TILE
-#  define PERSONALITY_NAMES {"64-bit", "32-bit"}
 # endif
 # ifdef PERSONALITY_NAMES
        if (!qflag) {
@@ -450,7 +428,7 @@ dumpio(struct tcb *tcp)
        if (fd < 0)
                return;
 
-       if (is_number_in_set(fd, &read_set)) {
+       if (is_number_in_set(fd, read_set)) {
                switch (tcp->s_ent->sen) {
                case SEN_read:
                case SEN_pread:
@@ -473,7 +451,7 @@ dumpio(struct tcb *tcp)
                        return;
                }
        }
-       if (is_number_in_set(fd, &write_set)) {
+       if (is_number_in_set(fd, write_set)) {
                switch (tcp->s_ent->sen) {
                case SEN_write:
                case SEN_pwrite:
@@ -550,35 +528,37 @@ clear_regs(void)
        get_regs_error = -1;
 }
 
+static void get_regs(pid_t pid);
 static int get_syscall_args(struct tcb *);
 static int get_syscall_result(struct tcb *);
 static int arch_get_scno(struct tcb *tcp);
 static int arch_set_scno(struct tcb *, kernel_ulong_t);
 static void get_error(struct tcb *, const bool);
 static int arch_set_error(struct tcb *);
+static int arch_set_success(struct tcb *);
 
-struct fault_opts *fault_vec[SUPPORTED_PERSONALITIES];
+struct inject_opts *inject_vec[SUPPORTED_PERSONALITIES];
 
-static struct fault_opts *
-tcb_fault_opts(struct tcb *tcp)
+static struct inject_opts *
+tcb_inject_opts(struct tcb *tcp)
 {
-       return (scno_in_range(tcp->scno) && tcp->fault_vec[current_personality])
-              ? &tcp->fault_vec[current_personality][tcp->scno] : NULL;
+       return (scno_in_range(tcp->scno) && tcp->inject_vec[current_personality])
+              ? &tcp->inject_vec[current_personality][tcp->scno] : NULL;
 }
 
 
 static long
-inject_syscall_fault_entering(struct tcb *tcp, unsigned int *signo)
+tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo)
 {
-       if (!tcp->fault_vec[current_personality]) {
-               tcp->fault_vec[current_personality] =
-                       xcalloc(nsyscalls, sizeof(**fault_vec));
-               memcpy(tcp->fault_vec[current_personality],
-                      fault_vec[current_personality],
-                      nsyscalls * sizeof(**fault_vec));
+       if (!tcp->inject_vec[current_personality]) {
+               tcp->inject_vec[current_personality] =
+                       xcalloc(nsyscalls, sizeof(**inject_vec));
+               memcpy(tcp->inject_vec[current_personality],
+                      inject_vec[current_personality],
+                      nsyscalls * sizeof(**inject_vec));
        }
 
-       struct fault_opts *opts = tcb_fault_opts(tcp);
+       struct inject_opts *opts = tcb_inject_opts(tcp);
 
        if (!opts || opts->first == 0)
                return 0;
@@ -590,48 +570,70 @@ inject_syscall_fault_entering(struct tcb *tcp, unsigned int *signo)
 
        opts->first = opts->step;
 
-       if (opts->signo > 0)
-               *signo = opts->signo;
-       if (opts->err != -1 && !arch_set_scno(tcp, -1))
-               tcp->flags |= TCB_FAULT_INJ;
+       if (opts->data.flags & INJECT_F_SIGNAL)
+               *signo = opts->data.signo;
+       if (opts->data.flags & INJECT_F_RETVAL && !arch_set_scno(tcp, -1))
+               tcp->flags |= TCB_TAMPERED;
 
        return 0;
 }
 
 static long
-update_syscall_fault_exiting(struct tcb *tcp)
+tamper_with_syscall_exiting(struct tcb *tcp)
 {
-       struct fault_opts *opts = tcb_fault_opts(tcp);
+       struct inject_opts *opts = tcb_inject_opts(tcp);
+
+       if (!opts)
+               return 0;
+
+       if (opts->data.rval >= 0) {
+               kernel_long_t u_rval = tcp->u_rval;
+
+               tcp->u_rval = opts->data.rval;
+               if (arch_set_success(tcp)) {
+                       tcp->u_rval = u_rval;
+               } else {
+                       tcp->u_error = 0;
+               }
+       } else {
+               unsigned long new_error = -opts->data.rval;
+
+               if (new_error != tcp->u_error && new_error <= MAX_ERRNO_VALUE) {
+                       unsigned long u_error = tcp->u_error;
 
-       if (opts && opts->err > 0 && tcp->u_error != (uint16_t) opts->err) {
-               unsigned long u_error = tcp->u_error;
-               tcp->u_error = opts->err;
-               if (arch_set_error(tcp))
-                       tcp->u_error = u_error;
+                       tcp->u_error = new_error;
+                       if (arch_set_error(tcp)) {
+                               tcp->u_error = u_error;
+                       }
+               }
        }
 
        return 0;
 }
 
-static int
-trace_syscall_entering(struct tcb *tcp, unsigned int *sig)
+/*
+ * Returns:
+ * 0: "ignore this ptrace stop", bail out silently.
+ * 1: ok, decoded; call
+ *    syscall_entering_finish(tcp, syscall_entering_trace(tcp, ...)).
+ * other: error; call syscall_entering_finish(tcp, res), where res is the value
+ *    returned.
+ */
+int
+syscall_entering_decode(struct tcb *tcp)
 {
-       int res, scno_good;
-
-       scno_good = res = get_scno(tcp);
+       int res = get_scno(tcp);
        if (res == 0)
                return res;
-       if (res == 1)
-               res = get_syscall_args(tcp);
-
-       if (res != 1) {
+       int scno_good = res;
+       if (res != 1 || (res = get_syscall_args(tcp)) != 1) {
                printleader(tcp);
                tprintf("%s(", scno_good == 1 ? tcp->s_ent->sys_name : "????");
                /*
                 * " <unavailable>" will be added later by the code which
                 * detects ptrace errors.
                 */
-               goto ret;
+               return res;
        }
 
 #ifdef LINUX_MIPSO32
@@ -654,9 +656,15 @@ trace_syscall_entering(struct tcb *tcp, unsigned int *sig)
        }
 #endif
 
+       return 1;
+}
+
+int
+syscall_entering_trace(struct tcb *tcp, unsigned int *sig)
+{
        /* Restrain from fault injection while the trace executes strace code. */
        if (hide_log(tcp)) {
-               tcp->qual_flg &= ~QUAL_FAULT;
+               tcp->qual_flg &= ~QUAL_INJECT;
        }
 
        switch (tcp->s_ent->sen) {
@@ -669,27 +677,22 @@ trace_syscall_entering(struct tcb *tcp, unsigned int *sig)
                        break;
        }
 
-       if (!(tcp->qual_flg & QUAL_TRACE)
-        || (tracing_paths && !pathtrace_match(tcp))
-       ) {
-               tcp->flags |= TCB_INSYSCALL | TCB_FILTERED;
-               tcp->sys_func_rval = 0;
+       if (!traced(tcp) || (tracing_paths && !pathtrace_match(tcp))) {
+               tcp->flags |= TCB_FILTERED;
                return 0;
        }
 
        tcp->flags &= ~TCB_FILTERED;
 
        if (hide_log(tcp)) {
-               res = 0;
-               goto ret;
+               return 0;
        }
 
-       if (tcp->qual_flg & QUAL_FAULT)
-               inject_syscall_fault_entering(tcp, sig);
+       if (inject(tcp))
+               tamper_with_syscall_entering(tcp, sig);
 
        if (cflag == CFLAG_ONLY_STATS) {
-               res = 0;
-               goto ret;
+               return 0;
        }
 
 #ifdef USE_LIBUNWIND
@@ -701,39 +704,41 @@ trace_syscall_entering(struct tcb *tcp, unsigned int *sig)
 
        printleader(tcp);
        tprintf("%s(", tcp->s_ent->sys_name);
-       if (tcp->qual_flg & QUAL_RAW)
-               res = printargs(tcp);
-       else
-               res = tcp->s_ent->sys_func(tcp);
-
+       int res = raw(tcp) ? printargs(tcp) : tcp->s_ent->sys_func(tcp);
        fflush(tcp->outf);
- ret:
+       return res;
+}
+
+void
+syscall_entering_finish(struct tcb *tcp, int res)
+{
        tcp->flags |= TCB_INSYSCALL;
        tcp->sys_func_rval = res;
        /* Measure the entrance time as late as possible to avoid errors. */
-       if (Tflag || cflag)
+       if ((Tflag || cflag) && !filtered(tcp))
                gettimeofday(&tcp->etime, NULL);
-       return res;
 }
 
 static bool
-syscall_fault_injected(struct tcb *tcp)
+syscall_tampered(struct tcb *tcp)
 {
-       return tcp->flags & TCB_FAULT_INJ;
+       return tcp->flags & TCB_TAMPERED;
 }
 
-static int
-trace_syscall_exiting(struct tcb *tcp)
+/* Returns:
+ * 0: "bail out".
+ * 1: ok.
+ * -1: error in one of ptrace ops.
+ *
+ * If not 0, call syscall_exiting_trace(tcp, res), where res is the return
+ *    value. Anyway, call syscall_exiting_finish(tcp) then.
+ */
+int
+syscall_exiting_decode(struct tcb *tcp, struct timeval *ptv)
 {
-       int sys_res;
-       struct timeval tv;
-       int res;
-       unsigned long u_error;
-       const char *u_error_str;
-
        /* Measure the exit time as early as possible to avoid errors. */
-       if (Tflag || cflag)
-               gettimeofday(&tv, NULL);
+       if ((Tflag || cflag) && !(filtered(tcp) || hide_log(tcp)))
+               gettimeofday(ptv, NULL);
 
 #ifdef USE_LIBUNWIND
        if (stack_trace_enabled) {
@@ -742,20 +747,26 @@ trace_syscall_exiting(struct tcb *tcp)
        }
 #endif
 
+       if (filtered(tcp) || hide_log(tcp))
+               return 0;
+
+       get_regs(tcp->pid);
 #if SUPPORTED_PERSONALITIES > 1
        update_personality(tcp, tcp->currpers);
 #endif
-       res = (get_regs_error ? -1 : get_syscall_result(tcp));
-       if (filtered(tcp) || hide_log(tcp))
-               goto ret;
+       return get_regs_error ? -1 : get_syscall_result(tcp);
+}
 
-       if (syserror(tcp) && syscall_fault_injected(tcp))
-               update_syscall_fault_exiting(tcp);
+int
+syscall_exiting_trace(struct tcb *tcp, struct timeval tv, int res)
+{
+       if (syserror(tcp) && syscall_tampered(tcp))
+               tamper_with_syscall_exiting(tcp);
 
        if (cflag) {
                count_syscall(tcp, &tv);
                if (cflag == CFLAG_ONLY_STATS) {
-                       goto ret;
+                       return 0;
                }
        }
 
@@ -782,27 +793,24 @@ trace_syscall_exiting(struct tcb *tcp)
                tabto();
                tprints("= ? <unavailable>\n");
                line_ended();
-               tcp->flags &= ~(TCB_INSYSCALL | TCB_FAULT_INJ);
-               tcp->sys_func_rval = 0;
-               free_tcb_priv_data(tcp);
                return res;
        }
        tcp->s_prev_ent = tcp->s_ent;
 
-       sys_res = 0;
-       if (tcp->qual_flg & QUAL_RAW) {
+       int sys_res = 0;
+       if (raw(tcp)) {
                /* sys_res = printargs(tcp); - but it's nop on sysexit */
        } else {
        /* FIXME: not_failing_only (IOW, option -z) is broken:
         * failure of syscall is known only after syscall return.
         * Thus we end up with something like this on, say, ENOENT:
-        *     open("doesnt_exist", O_RDONLY <unfinished ...>
+        *     open("does_not_exist", O_RDONLY <unfinished ...>
         *     {next syscall decode}
         * whereas the intended result is that open(...) line
         * is not shown at all.
         */
                if (not_failing_only && tcp->u_error)
-                       goto ret;       /* ignore failed syscalls */
+                       return 0;       /* ignore failed syscalls */
                if (tcp->sys_func_rval & RVAL_DECODED)
                        sys_res = tcp->sys_func_rval;
                else
@@ -811,18 +819,19 @@ trace_syscall_exiting(struct tcb *tcp)
 
        tprints(") ");
        tabto();
-       u_error = tcp->u_error;
+       unsigned long u_error = tcp->u_error;
 
-       if (tcp->qual_flg & QUAL_RAW) {
+       if (raw(tcp)) {
                if (u_error) {
                        tprintf("= -1 (errno %lu)", u_error);
-                       if (syscall_fault_injected(tcp))
-                               tprints(" (INJECTED)");
                } else {
                        tprintf("= %#" PRI_klx, tcp->u_rval);
                }
-       }
-       else if (!(sys_res & RVAL_NONE) && u_error) {
+               if (syscall_tampered(tcp))
+                       tprints(" (INJECTED)");
+       } else if (!(sys_res & RVAL_NONE) && u_error) {
+               const char *u_error_str;
+
                switch (u_error) {
                /* Blocked signals do not interrupt any syscalls.
                 * In this case syscalls don't return ERESTARTfoo codes.
@@ -887,12 +896,11 @@ trace_syscall_exiting(struct tcb *tcp)
                                        u_error, strerror(u_error));
                        break;
                }
-               if (syscall_fault_injected(tcp))
-                       tprintf(" (INJECTED)");
+               if (syscall_tampered(tcp))
+                       tprints(" (INJECTED)");
                if ((sys_res & RVAL_STR) && tcp->auxstr)
                        tprintf(" (%s)", tcp->auxstr);
-       }
-       else {
+       } else {
                if (sys_res & RVAL_NONE)
                        tprints("= ?");
                else {
@@ -930,8 +938,7 @@ trace_syscall_exiting(struct tcb *tcp)
                                if (show_fd_path) {
                                        tprints("= ");
                                        printfd(tcp, tcp->u_rval);
-                               }
-                               else
+                               } else
                                        tprintf("= %" PRI_kld, tcp->u_rval);
                                break;
                        default:
@@ -941,6 +948,8 @@ trace_syscall_exiting(struct tcb *tcp)
                }
                if ((sys_res & RVAL_STR) && tcp->auxstr)
                        tprintf(" (%s)", tcp->auxstr);
+               if (syscall_tampered(tcp))
+                       tprints(" (INJECTED)");
        }
        if (Tflag) {
                tv_sub(&tv, &tv, &tcp->etime);
@@ -955,19 +964,15 @@ trace_syscall_exiting(struct tcb *tcp)
        if (stack_trace_enabled)
                unwind_print_stacktrace(tcp);
 #endif
-
- ret:
-       tcp->flags &= ~(TCB_INSYSCALL | TCB_FAULT_INJ);
-       tcp->sys_func_rval = 0;
-       free_tcb_priv_data(tcp);
        return 0;
 }
 
-int
-trace_syscall(struct tcb *tcp, unsigned int *signo)
+void
+syscall_exiting_finish(struct tcb *tcp)
 {
-       return exiting(tcp) ?
-               trace_syscall_exiting(tcp) : trace_syscall_entering(tcp, signo);
+       tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED);
+       tcp->sys_func_rval = 0;
+       free_tcb_priv_data(tcp);
 }
 
 bool
@@ -999,26 +1004,6 @@ restore_cleared_syserror(struct tcb *tcp)
        tcp->u_error = saved_u_error;
 }
 
-/*
- * Check the syscall return value register value for whether it is
- * a negated errno code indicating an error, or a success return value.
- */
-static inline bool
-is_negated_errno(kernel_ulong_t val)
-{
-       /* Linux kernel defines MAX_ERRNO to 4095. */
-       kernel_ulong_t max = -(kernel_long_t) 4095;
-
-#ifndef current_klongsize
-       if (current_klongsize < sizeof(val)) {
-               val = (uint32_t) val;
-               max = (uint32_t) max;
-       }
-#endif /* !current_klongsize */
-
-       return val >= max;
-}
-
 #include "arch_regs.c"
 
 #ifdef HAVE_GETRVAL2
@@ -1037,6 +1022,7 @@ print_pc(struct tcb *tcp)
 #else
 # error Neither ARCH_PC_REG nor ARCH_PC_PEEK_ADDR is defined
 #endif
+       get_regs(tcp->pid);
        if (get_regs_error || ARCH_GET_PC)
                tprints(current_wordsize == 4 ? "[????????] "
                                              : "[????????????????] ");
@@ -1122,12 +1108,15 @@ ptrace_setregs(pid_t pid)
 
 #endif /* ARCH_REGS_FOR_GETREGSET || ARCH_REGS_FOR_GETREGS */
 
-void
+static void
 get_regs(pid_t pid)
 {
 #undef USE_GET_SYSCALL_RESULT_REGS
 #ifdef ptrace_getregset_or_getregs
 
+       if (get_regs_error != -1)
+               return;
+
 # ifdef HAVE_GETREGS_OLD
        /*
         * Try PTRACE_GETREGSET/PTRACE_GETREGS first,
@@ -1186,14 +1175,17 @@ free_sysent_buf(void *ptr)
 
 /*
  * Returns:
- * 0: "ignore this ptrace stop", bail out of trace_syscall_entering() silently.
- * 1: ok, continue in trace_syscall_entering().
- * other: error, trace_syscall_entering() should print error indicator
- *    ("????" etc) and bail out.
+ * 0: "ignore this ptrace stop", syscall_entering_decode() should return a "bail
+ *    out silently" code.
+ * 1: ok, continue in syscall_entering_decode().
+ * other: error, syscall_entering_decode() should print error indicator
+ *    ("????" etc) and return an appropriate code.
  */
 int
 get_scno(struct tcb *tcp)
 {
+       get_regs(tcp->pid);
+
        if (get_regs_error)
                return -1;
 
@@ -1231,8 +1223,8 @@ static int get_syscall_result_regs(struct tcb *);
 #endif
 
 /* Returns:
- * 1: ok, continue in trace_syscall_exiting().
- * -1: error, trace_syscall_exiting() should print error indicator
+ * 1: ok, continue in syscall_exiting_trace().
+ * -1: error, syscall_exiting_trace() should print error indicator
  *    ("????" etc) and bail out.
  */
 static int
@@ -1267,5 +1259,5 @@ syscall_name(kernel_ulong_t scno)
        if (current_personality == X32_PERSONALITY_NUMBER)
                scno &= ~__X32_SYSCALL_BIT;
 #endif
-       return scno_is_valid(scno) ? sysent[scno].sys_name: NULL;
+       return scno_is_valid(scno) ? sysent[scno].sys_name : NULL;
 }