Workaround stray PTRACE_EVENT_EXEC
authorEugene Syromyatnikov <evgsyr@gmail.com>
Mon, 22 Jan 2018 02:53:43 +0000 (03:53 +0100)
committerDmitry V. Levin <ldv@altlinux.org>
Thu, 25 Jan 2018 01:55:08 +0000 (01:55 +0000)
We (apparently) had a long-standing test failure inside strace-ff.test
with the symptom that it misses exit_group call.  As it turned out, it
was PTRACE_EVENT_EXEC followed by execve syscall exiting stop.  That
behaviour indeed screwed all the syscall state tracking for the tracee.
Let's try to patch it up by calling trace_syscall when we receive
PTRACE_EVENT_EXEC outside syscall.

* defs.h (TCB_RECOVERING): New tcb flag.
* strace.c (dispatch_event) <case TE_STOP_BEFORE_EXECVE>: Invoke
trace_syscall with TCB_RECOVERING flag being set for the current_tcp
if the tracee is not on exiting syscall.
* syscall.c (get_scno): Set QUAL_RAW if we are recovering.
(tamper_with_syscall_entering): Do not perform actual tampering during
recovery as it's already too late.
* NEWS: Mention it.

Co-Authored-by: Dmitry V. Levin <ldv@altlinux.org>
NEWS
defs.h
strace.c
syscall.c

diff --git a/NEWS b/NEWS
index 148ac5b62319f3379dcf34f32bbe7eb0e3c540eb..37be7680a02abf013ad917448783229d483045a7 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -42,6 +42,9 @@ Noteworthy changes in release ?.?? (????-??-??)
     summary output.
   * Fixed call summary header for x32 personality.
   * Changes of tracee personalities are reported more reliably.
+  * Fixed the case when strace attaches to a tracee being inside exec and its
+    first syscall stop is syscall-exit-stop instead of syscall-enter-stop,
+    which messed up all the syscall state tracking.
 
 * Portability
   * A C compiler that supports "for" loop initial declarations is now required.
diff --git a/defs.h b/defs.h
index dddf8a693583e68490988c7ce9290405c61a863b..b62b88c952fcd6a05aa80e9b3e4151fa2626131f 100644 (file)
--- a/defs.h
+++ b/defs.h
@@ -248,8 +248,10 @@ struct tcb {
 #define TCB_TAMPERED   0x40    /* A syscall has been tampered with */
 #define TCB_HIDE_LOG   0x80    /* We should hide everything (until execve) */
 #define TCB_SKIP_DETACH_ON_FIRST_EXEC  0x100   /* -b execve should skip detach on first execve */
-#define TCB_GRABBED    0x200 /* We grab the process and can catch it
-                              * in the middle of a syscall */
+#define TCB_GRABBED    0x200   /* We grab the process and can catch it
+                                * in the middle of a syscall */
+#define TCB_RECOVERING 0x400   /* We try to recover after detecting incorrect
+                                * syscall entering/exiting state */
 
 /* qualifier flags */
 #define QUAL_TRACE     0x001   /* this system call should be traced */
@@ -271,6 +273,7 @@ struct tcb {
 #define filtered(tcp)  ((tcp)->flags & TCB_FILTERED)
 #define hide_log(tcp)  ((tcp)->flags & TCB_HIDE_LOG)
 #define syscall_tampered(tcp)  ((tcp)->flags & TCB_TAMPERED)
+#define recovering(tcp)        ((tcp)->flags & TCB_RECOVERING)
 
 #include "xlat.h"
 
index 0a8195804232d548a720600846a0228c4c63032d..eebbaa07c90cada9842a824c2152455e81728b9d 100644 (file)
--- a/strace.c
+++ b/strace.c
@@ -2482,6 +2482,31 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
                return true;
 
        case TE_STOP_BEFORE_EXECVE:
+               /*
+                * Check that we are inside syscall now (next event after
+                * PTRACE_EVENT_EXEC should be for syscall exiting).  If it is
+                * not the case, we might have a situation when we attach to a
+                * process and the first thing we see is a PTRACE_EVENT_EXEC
+                * and all the following syscall state tracking is screwed up
+                * otherwise.
+                */
+               if (entering(current_tcp)) {
+                       int ret;
+
+                       error_msg("Stray PTRACE_EVENT_EXEC from pid %d"
+                                 ", trying to recover...",
+                                 current_tcp->pid);
+
+                       current_tcp->flags |= TCB_RECOVERING;
+                       ret = trace_syscall(current_tcp, &restart_sig);
+                       current_tcp->flags &= ~TCB_RECOVERING;
+
+                       if (ret < 0) {
+                               /* The reason is described in TE_SYSCALL_STOP */
+                               return true;
+                       }
+               }
+
                /*
                 * Under Linux, execve changes pid to thread leader's pid,
                 * and we see this changed pid on EVENT_EXEC and later,
index 6fb111e90f1a5efa52f17ce06ffafbc2ad52e094..ce804bf5f33e8371b8e31067b80850ad38c8c730 100644 (file)
--- a/syscall.c
+++ b/syscall.c
@@ -572,10 +572,13 @@ tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo)
 
        opts->first = opts->step;
 
-       if (opts->data.flags & INJECT_F_SIGNAL)
-               *signo = opts->data.signo;
-       if (opts->data.flags & INJECT_F_RETVAL && !arch_set_scno(tcp, -1))
-               tcp->flags |= TCB_TAMPERED;
+       if (!recovering(tcp)) {
+               if (opts->data.flags & INJECT_F_SIGNAL)
+                       *signo = opts->data.signo;
+               if (opts->data.flags & INJECT_F_RETVAL &&
+                   !arch_set_scno(tcp, -1))
+                       tcp->flags |= TCB_TAMPERED;
+       }
 
        return 0;
 }
@@ -1256,6 +1259,15 @@ get_scno(struct tcb *tcp)
                debug_msg("pid %d invalid syscall %" PRI_kld,
                          tcp->pid, tcp->scno);
        }
+
+       /*
+        * We refrain from argument decoding during recovering
+        * as tracee memory mappings has changed and the registers
+        * are very likely pointing to garbage already.
+        */
+       if (recovering(tcp))
+               tcp->qual_flg |= QUAL_RAW;
+
        return 1;
 }