]> granicus.if.org Git - strace/blobdiff - strace.c
Fix preprocessor indentation
[strace] / strace.c
index f5547f9293eeff63d009ac3d4f718577db463551..b52a3db3412a92d88a3df4f76a6344107968cb6f 100644 (file)
--- a/strace.c
+++ b/strace.c
@@ -3,61 +3,52 @@
  * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
  * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
  * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
- * Copyright (c) 1999-2017 The strace developers.
+ * Copyright (c) 1999-2019 The strace developers.
  * All rights reserved.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote products
- *    derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
 #include "defs.h"
 #include <stdarg.h>
-#include <sys/param.h>
+#include <limits.h>
 #include <fcntl.h>
+#include "ptrace.h"
 #include <signal.h>
 #include <sys/resource.h>
-#include <sys/wait.h>
 #include <sys/stat.h>
+#ifdef HAVE_PATHS_H
+# include <paths.h>
+#endif
+#include <getopt.h>
 #include <pwd.h>
 #include <grp.h>
 #include <dirent.h>
+#include <locale.h>
 #include <sys/utsname.h>
 #ifdef HAVE_PRCTL
 # include <sys/prctl.h>
 #endif
-#include <asm/unistd.h>
 
+#include "kill_save_errno.h"
+#include "filter_seccomp.h"
+#include "largefile_wrappers.h"
+#include "mmap_cache.h"
 #include "number_set.h"
+#include "ptrace_syscall_info.h"
 #include "scno.h"
-#include "ptrace.h"
 #include "printsiginfo.h"
 #include "trace_event.h"
+#include "xstring.h"
+#include "delay.h"
+#include "wait.h"
 
 /* In some libc, these aren't declared. Do it ourself: */
 extern char **environ;
 extern int optind;
 extern char *optarg;
 
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
 /* if this is true do the stack trace for every system call */
 bool stack_trace_enabled;
 #endif
@@ -95,7 +86,7 @@ enum {
        INTR_ANYWHERE       = 1, /* don't block/ignore any signals */
        INTR_WHILE_WAIT     = 2, /* block fatal signals while decoding syscall. default */
        INTR_NEVER          = 3, /* block fatal signals. default if '-o FILE PROG' */
-       INTR_BLOCK_TSTP_TOO = 4, /* block fatal signals and SIGTSTP (^Z) */
+       INTR_BLOCK_TSTP_TOO = 4, /* block fatal signals and SIGTSTP (^Z); default if -D */
        NUM_INTR_OPTS
 };
 static int opt_intr;
@@ -116,16 +107,8 @@ static int opt_intr;
  */
 static bool daemonized_tracer;
 
-#if USE_SEIZE
 static int post_attach_sigstop = TCB_IGNORE_ONE_SIGSTOP;
-# define use_seize (post_attach_sigstop == 0)
-#else
-# define post_attach_sigstop TCB_IGNORE_ONE_SIGSTOP
-# define use_seize 0
-#endif
-
-/* Sometimes we want to print only succeeding syscalls. */
-bool not_failing_only;
+#define use_seize (post_attach_sigstop == 0)
 
 /* Show path associated with fd arguments */
 unsigned int show_fd_path;
@@ -144,17 +127,32 @@ unsigned int max_strlen = DEFAULT_STRLEN;
 static int acolumn = DEFAULT_ACOLUMN;
 static char *acolumn_spaces;
 
+/* Default output style for xlat entities */
+enum xlat_style xlat_verbosity = XLAT_STYLE_ABBREV;
+
 static const char *outfname;
 /* If -ff, points to stderr. Else, it's our common output log */
 static FILE *shared_log;
+static bool open_append;
 
 struct tcb *printing_tcp;
 static struct tcb *current_tcp;
 
+struct tcb_wait_data {
+       enum trace_event te; /**< Event passed to dispatch_event() */
+       int status;          /**< status, returned by wait4() */
+       unsigned long msg;   /**< Value returned by PTRACE_GETEVENTMSG */
+       siginfo_t si;        /**< siginfo, returned by PTRACE_GETSIGINFO */
+};
+
 static struct tcb **tcbtab;
 static unsigned int nprocs;
 static size_t tcbtabsize;
 
+static struct tcb_wait_data *tcb_wait_tab;
+static size_t tcb_wait_tab_size;
+
+
 #ifndef HAVE_PROGRAM_INVOCATION_NAME
 char *program_invocation_name;
 #endif
@@ -162,22 +160,24 @@ char *program_invocation_name;
 unsigned os_release; /* generated from uname()'s u.release */
 
 static void detach(struct tcb *tcp);
-static void cleanup(void);
+static void cleanup(int sig);
 static void interrupt(int sig);
-static sigset_t start_set, blocked_set;
 
 #ifdef HAVE_SIG_ATOMIC_T
-static volatile sig_atomic_t interrupted;
+static volatile sig_atomic_t interrupted, restart_failed;
 #else
-static volatile int interrupted;
+static volatile int interrupted, restart_failed;
 #endif
 
+static sigset_t timer_set;
+static void timer_sighandler(int);
+
 #ifndef HAVE_STRERROR
 
-#if !HAVE_DECL_SYS_ERRLIST
+# if !HAVE_DECL_SYS_ERRLIST
 extern int sys_nerr;
 extern char *sys_errlist[];
-#endif
+# endif
 
 const char *
 strerror(int err_no)
@@ -185,7 +185,7 @@ strerror(int err_no)
        static char buf[sizeof("Unknown error %d") + sizeof(int)*3];
 
        if (err_no < 1 || err_no >= sys_nerr) {
-               sprintf(buf, "Unknown error %d", err_no);
+               xsprintf(buf, "Unknown error %d", err_no);
                return buf;
        }
        return sys_errlist[err_no];
@@ -197,9 +197,26 @@ static void
 print_version(void)
 {
        static const char features[] =
-#ifdef USE_LIBUNWIND
-               " stack-unwind"
-#endif /* USE_LIBUNWIND */
+#ifdef ENABLE_STACKTRACE
+               " stack-trace=" USE_UNWINDER
+#endif
+#ifdef USE_DEMANGLE
+               " stack-demangle"
+#endif
+#if SUPPORTED_PERSONALITIES > 1
+# if defined HAVE_M32_MPERS
+               " m32-mpers"
+# else
+               " no-m32-mpers"
+# endif
+#endif /* SUPPORTED_PERSONALITIES > 1 */
+#if SUPPORTED_PERSONALITIES > 2
+# if defined HAVE_MX32_MPERS
+               " mx32-mpers"
+# else
+               " no-mx32-mpers"
+# endif
+#endif /* SUPPORTED_PERSONALITIES > 2 */
                "";
 
        printf("%s -- version %s\n"
@@ -214,46 +231,64 @@ print_version(void)
 static void
 usage(void)
 {
+#ifdef ENABLE_STACKTRACE
+# define K_OPT "k"
+#else
+# define K_OPT ""
+#endif
+
        printf("\
-usage: strace [-CdffhiqrtttTvVwxxy] [-I n] [-e expr]...\n\
-              [-a column] [-o file] [-s strsize] [-P path]...\n\
-              -p pid... / [-D] [-E var=val]... [-u username] PROG [ARGS]\n\
-   or: strace -c[dfw] [-I n] [-e expr]... [-O overhead] [-S sortby]\n\
-              -p pid... / [-D] [-E var=val]... [-u username] PROG [ARGS]\n\
+usage: strace [-ACdffhi" K_OPT "qqrtttTvVwxxyyzZ] [-I n] [-b execve] [-e expr]...\n\
+              [-a column] [-o file] [-s strsize] [-X format] [-P path]...\n\
+              [-p pid]... [--seccomp-bpf]\n\
+             { -p pid | [-D] [-E var=val]... [-u username] PROG [ARGS] }\n\
+   or: strace -c[dfwzZ] [-I n] [-b execve] [-e expr]... [-O overhead]\n\
+              [-S sortby] [-P path]... [-p pid]... [--seccomp-bpf]\n\
+              { -p pid | [-D] [-E var=val]... [-u username] PROG [ARGS] }\n\
 \n\
 Output format:\n\
+  -A             open the file provided in the -o option in append mode\n\
   -a column      alignment COLUMN for printing syscall results (default %d)\n\
   -i             print instruction pointer at time of syscall\n\
 "
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
 "\
-  -k             obtain stack trace between each syscall (experimental)\n\
+  -k             obtain stack trace between each syscall\n\
 "
 #endif
 "\
   -o file        send trace output to FILE instead of stderr\n\
   -q             suppress messages about attaching, detaching, etc.\n\
+  -qq            suppress messages about process exit status as well.\n\
   -r             print relative timestamp\n\
   -s strsize     limit length of print strings to STRSIZE chars (default %d)\n\
   -t             print absolute timestamp\n\
   -tt            print absolute timestamp with usecs\n\
   -T             print time spent in each syscall\n\
+  -v             verbose mode: print entities unabbreviated\n\
   -x             print non-ascii strings in hex\n\
   -xx            print all strings in hex\n\
+  -X format      set the format for printing of named constants and flags\n\
   -y             print paths associated with file descriptor arguments\n\
-  -yy            print protocol specific information associated with socket file descriptors\n\
+  -yy            print protocol specific information associated with socket\n\
+                 file descriptors\n\
 \n\
 Statistics:\n\
-  -c             count time, calls, and errors for each syscall and report summary\n\
+  -c             count time, calls, and errors for each syscall and report\n\
+                 summary\n\
   -C             like -c but also print regular output\n\
   -O overhead    set overhead for tracing syscalls to OVERHEAD usecs\n\
-  -S sortby      sort syscall counts by: time, calls, name, nothing (default %s)\n\
+  -S sortby      sort syscall counts by: time, calls, errors, name, nothing\n\
+                 (default %s)\n\
   -w             summarise syscall latency (default is system time)\n\
 \n\
 Filtering:\n\
   -e expr        a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
-     options:    trace, abbrev, verbose, raw, signal, read, write, fault\n\
+     options:    trace, abbrev, verbose, raw, signal, read, write, fault,\n\
+                 inject, status, kvm\n\
   -P path        trace accesses to path\n\
+  -z             print only syscalls that returned without an error code\n\
+  -Z             print only syscalls that returned with an error code\n\
 \n\
 Tracing:\n\
   -b execve      detach on execve syscall\n\
@@ -274,27 +309,25 @@ Startup:\n\
   -u username    run command as username handling setuid and/or setgid\n\
 \n\
 Miscellaneous:\n\
+  --seccomp-bpf  enable seccomp-bpf filtering\n\
   -d             enable debug output to stderr\n\
-  -v             verbose mode: print unabbreviated argv, stat, termios, etc. args\n\
-  -h             print help message\n\
-  -V             print version\n\
+  -h, --help     print help message\n\
+  -V, --version  print version\n\
 "
 /* ancient, no one should use it
 -F -- attempt to follow vforks (deprecated, use -f)\n\
  */
-/* this is broken, so don't document it
--z -- print only succeeding syscalls\n\
- */
 , DEFAULT_ACOLUMN, DEFAULT_STRLEN, DEFAULT_SORTBY);
        exit(0);
+
+#undef K_OPT
 }
 
 void ATTRIBUTE_NORETURN
 die(void)
 {
        if (strace_tracer_pid == getpid()) {
-               cflag = 0;
-               cleanup();
+               cleanup(0);
                exit(1);
        }
 
@@ -312,7 +345,6 @@ static const char *ptrace_attach_cmd;
 static int
 ptrace_attach_or_seize(int pid)
 {
-#if USE_SEIZE
        int r;
        if (!use_seize)
                return ptrace_attach_cmd = "PTRACE_ATTACH",
@@ -322,10 +354,18 @@ ptrace_attach_or_seize(int pid)
                return ptrace_attach_cmd = "PTRACE_SEIZE", r;
        r = ptrace(PTRACE_INTERRUPT, pid, 0L, 0L);
        return ptrace_attach_cmd = "PTRACE_INTERRUPT", r;
-#else
-               return ptrace_attach_cmd = "PTRACE_ATTACH",
-                      ptrace(PTRACE_ATTACH, pid, 0L, 0L);
-#endif
+}
+
+static const char *
+ptrace_op_str(unsigned int op)
+{
+       const char *str = xlookup(ptrace_cmds, op);
+       if (str)
+               return str;
+
+       static char buf[sizeof(op) * 3];
+       xsprintf(buf, "%u", op);
+       return buf;
 }
 
 /*
@@ -339,28 +379,13 @@ static int
 ptrace_restart(const unsigned int op, struct tcb *const tcp, unsigned int sig)
 {
        int err;
-       const char *msg;
 
        errno = 0;
        ptrace(op, tcp->pid, 0L, (unsigned long) sig);
        err = errno;
-       if (!err)
+       if (!err || err == ESRCH)
                return 0;
 
-       switch (op) {
-               case PTRACE_CONT:
-                       msg = "CONT";
-                       break;
-               case PTRACE_DETACH:
-                       msg = "DETACH";
-                       break;
-               case PTRACE_LISTEN:
-                       msg = "LISTEN";
-                       break;
-               default:
-                       msg = "SYSCALL";
-       }
-
        /*
         * Why curcol != 0? Otherwise sometimes we get this:
         *
@@ -371,13 +396,13 @@ ptrace_restart(const unsigned int op, struct tcb *const tcp, unsigned int sig)
         * but before we tried to restart it. Log looks ugly.
         */
        if (current_tcp && current_tcp->curcol != 0) {
-               tprintf(" <ptrace(%s):%s>\n", msg, strerror(err));
+               tprintf(" <Cannot restart pid %d with ptrace(%s): %s>\n",
+                       tcp->pid, ptrace_op_str(op), strerror(err));
                line_ended();
        }
-       if (err == ESRCH)
-               return 0;
        errno = err;
-       perror_msg("ptrace(PTRACE_%s,pid:%d,sig:%u)", msg, tcp->pid, sig);
+       perror_msg("ptrace(%s,pid:%d,sig:%u)",
+                  ptrace_op_str(op), tcp->pid, sig);
        return -1;
 }
 
@@ -400,16 +425,8 @@ set_cloexec_flag(int fd)
        if (flags == newflags)
                return;
 
-       fcntl(fd, F_SETFD, newflags); /* never fails */
-}
-
-static void
-kill_save_errno(pid_t pid, int sig)
-{
-       int saved_errno = errno;
-
-       (void) kill(pid, sig);
-       errno = saved_errno;
+       if (fcntl(fd, F_SETFD, newflags)) /* never fails */
+               perror_msg_and_die("fcntl(%d, F_SETFD, %#x)", fd, newflags);
 }
 
 /*
@@ -426,35 +443,13 @@ swap_uid(void)
        }
 }
 
-#ifdef _LARGEFILE64_SOURCE
-# ifdef HAVE_FOPEN64
-#  define fopen_for_output fopen64
-# else
-#  define fopen_for_output fopen
-# endif
-# define struct_stat struct stat64
-# define stat_file stat64
-# define struct_dirent struct dirent64
-# define read_dir readdir64
-# define struct_rlimit struct rlimit64
-# define set_rlimit setrlimit64
-#else
-# define fopen_for_output fopen
-# define struct_stat struct stat
-# define stat_file stat
-# define struct_dirent struct dirent
-# define read_dir readdir
-# define struct_rlimit struct rlimit
-# define set_rlimit setrlimit
-#endif
-
 static FILE *
 strace_fopen(const char *path)
 {
        FILE *fp;
 
        swap_uid();
-       fp = fopen_for_output(path, "w");
+       fp = fopen_stream(path, open_append ? "a" : "w");
        if (!fp)
                perror_msg_and_die("Can't fopen '%s'", path);
        swap_uid();
@@ -512,6 +507,19 @@ strace_popen(const char *command)
        return fp;
 }
 
+static void
+outf_perror(const struct tcb * const tcp)
+{
+       if (tcp->outf == stderr)
+               return;
+
+       /* This is ugly, but we don't store separate file names */
+       if (followfork >= 2)
+               perror_msg("%s.%u", outfname, tcp->pid);
+       else
+               perror_msg("%s", outfname);
+}
+
 ATTRIBUTE_FORMAT((printf, 1, 0))
 static void
 tvprintf(const char *const fmt, va_list args)
@@ -520,8 +528,7 @@ tvprintf(const char *const fmt, va_list args)
                int n = vfprintf(current_tcp->outf, fmt, args);
                if (n < 0) {
                        /* very unlikely due to vfprintf buffering */
-                       if (current_tcp->outf != stderr)
-                               perror_msg("%s", outfname);
+                       outf_perror(current_tcp);
                } else
                        current_tcp->curcol += n;
        }
@@ -550,8 +557,7 @@ tprints(const char *str)
                        return;
                }
                /* very unlikely due to fputs_unlocked buffering */
-               if (current_tcp->outf != stderr)
-                       perror_msg("%s", outfname);
+               outf_perror(current_tcp);
        }
 }
 
@@ -579,8 +585,8 @@ tprintf_comment(const char *fmt, ...)
 static void
 flush_tcp_output(const struct tcb *const tcp)
 {
-       if (fflush(tcp->outf) && tcp->outf != stderr)
-               perror_msg("%s", outfname);
+       if (fflush(tcp->outf))
+               outf_perror(tcp);
 }
 
 void
@@ -596,6 +602,16 @@ line_ended(void)
        }
 }
 
+void
+set_current_tcp(const struct tcb *tcp)
+{
+       current_tcp = (struct tcb *) tcp;
+
+       /* Sync current_personality and stuff */
+       if (current_tcp)
+               set_personality(current_tcp->currpers);
+}
+
 void
 printleader(struct tcb *tcp)
 {
@@ -606,8 +622,9 @@ printleader(struct tcb *tcp)
                printing_tcp = tcp;
 
        if (printing_tcp) {
-               current_tcp = printing_tcp;
-               if (printing_tcp->curcol != 0 && (followfork < 2 || printing_tcp == tcp)) {
+               set_current_tcp(printing_tcp);
+               if (!tcp->staged_output_data && printing_tcp->curcol != 0 &&
+                   (followfork < 2 || printing_tcp == tcp)) {
                        /*
                         * case 1: we have a shared log (i.e. not -ff), and last line
                         * wasn't finished (same or different tcb, doesn't matter).
@@ -620,7 +637,7 @@ printleader(struct tcb *tcp)
        }
 
        printing_tcp = tcp;
-       current_tcp = tcp;
+       set_current_tcp(tcp);
        current_tcp->curcol = 0;
 
        if (print_pid_pfx)
@@ -629,32 +646,49 @@ printleader(struct tcb *tcp)
                tprintf("[pid %5u] ", tcp->pid);
 
        if (tflag) {
-               char str[sizeof("HH:MM:SS")];
-               struct timeval tv, dtv;
-               static struct timeval otv;
-
-               gettimeofday(&tv, NULL);
-               if (rflag) {
-                       if (otv.tv_sec == 0)
-                               otv = tv;
-                       tv_sub(&dtv, &tv, &otv);
-                       tprintf("%6ld.%06ld ",
-                               (long) dtv.tv_sec, (long) dtv.tv_usec);
-                       otv = tv;
-               } else if (tflag > 2) {
-                       tprintf("%ld.%06ld ",
-                               (long) tv.tv_sec, (long) tv.tv_usec);
+               struct timespec ts;
+               clock_gettime(CLOCK_REALTIME, &ts);
+
+               if (tflag > 2) {
+                       tprintf("%lld.%06ld ",
+                               (long long) ts.tv_sec, (long) ts.tv_nsec / 1000);
                } else {
-                       time_t local = tv.tv_sec;
-                       strftime(str, sizeof(str), "%T", localtime(&local));
+                       time_t local = ts.tv_sec;
+                       char str[MAX(sizeof("HH:MM:SS"), sizeof(ts.tv_sec) * 3)];
+                       struct tm *tm = localtime(&local);
+
+                       if (tm)
+                               strftime(str, sizeof(str), "%T", tm);
+                       else
+                               xsprintf(str, "%lld", (long long) local);
                        if (tflag > 1)
-                               tprintf("%s.%06ld ", str, (long) tv.tv_usec);
+                               tprintf("%s.%06ld ",
+                                       str, (long) ts.tv_nsec / 1000);
                        else
                                tprintf("%s ", str);
                }
        }
+
+       if (rflag) {
+               struct timespec ts;
+               clock_gettime(CLOCK_MONOTONIC, &ts);
+
+               static struct timespec ots;
+               if (ots.tv_sec == 0)
+                       ots = ts;
+
+               struct timespec dts;
+               ts_sub(&dts, &ts, &ots);
+               ots = ts;
+
+               tprintf("%s%6ld.%06ld%s ",
+                       tflag ? "(+" : "",
+                       (long) dts.tv_sec, (long) dts.tv_nsec / 1000,
+                       tflag ? ")" : "");
+       }
+
        if (iflag)
-               print_pc(tcp);
+               print_instruction_pointer(tcp);
 }
 
 void
@@ -669,14 +703,20 @@ tabto(void)
  * may create bogus empty FILE.<nonexistant_pid>, and then die.
  */
 static void
-newoutf(struct tcb *tcp)
+after_successful_attach(struct tcb *tcp, const unsigned int flags)
 {
+       tcp->flags |= TCB_ATTACHED | TCB_STARTUP | flags;
        tcp->outf = shared_log; /* if not -ff mode, the same file is for all */
        if (followfork >= 2) {
                char name[PATH_MAX];
-               sprintf(name, "%s.%u", outfname, tcp->pid);
+               xsprintf(name, "%s.%u", outfname, tcp->pid);
                tcp->outf = strace_fopen(name);
        }
+
+#ifdef ENABLE_STACKTRACE
+       if (stack_trace_enabled)
+               unwind_tcb_init(tcp);
+#endif
 }
 
 static void
@@ -714,16 +754,11 @@ alloctcb(int pid)
                tcp = tcbtab[i];
                if (!tcp->pid) {
                        memset(tcp, 0, sizeof(*tcp));
+                       list_init(&tcp->wait_list);
                        tcp->pid = pid;
 #if SUPPORTED_PERSONALITIES > 1
                        tcp->currpers = current_personality;
 #endif
-
-#ifdef USE_LIBUNWIND
-                       if (stack_trace_enabled)
-                               unwind_tcb_init(tcp);
-#endif
-
                        nprocs++;
                        debug_msg("new tcb for pid %d, active tcbs:%d",
                                  tcp->pid, nprocs);
@@ -770,38 +805,60 @@ droptcb(struct tcb *tcp)
        if (tcp->pid == 0)
                return;
 
+       if (cflag && debug_flag) {
+               struct timespec dt;
+
+               ts_sub(&dt, &tcp->stime, &tcp->atime);
+               debug_func_msg("pid %d: %.9f seconds of system time spent "
+                              "since attach", tcp->pid, ts_float(&dt));
+       }
+
        int p;
        for (p = 0; p < SUPPORTED_PERSONALITIES; ++p)
                free(tcp->inject_vec[p]);
 
        free_tcb_priv_data(tcp);
 
-#ifdef USE_LIBUNWIND
-       if (stack_trace_enabled) {
+#ifdef ENABLE_STACKTRACE
+       if (stack_trace_enabled)
                unwind_tcb_fin(tcp);
-       }
 #endif
 
+#ifdef HAVE_LINUX_KVM_H
+       kvm_vcpu_info_free(tcp);
+#endif
+
+       if (tcp->mmap_cache)
+               tcp->mmap_cache->free_fn(tcp, __func__);
+
        nprocs--;
        debug_msg("dropped tcb for pid %d, %d remain", tcp->pid, nprocs);
 
        if (tcp->outf) {
+               bool publish = true;
+               if (!is_complete_set(status_set, NUMBER_OF_STATUSES)) {
+                       publish = is_number_in_set(STATUS_DETACHED, status_set);
+                       strace_close_memstream(tcp, publish);
+               }
+
                if (followfork >= 2) {
-                       if (tcp->curcol != 0)
+                       if (tcp->curcol != 0 && publish)
                                fprintf(tcp->outf, " <detached ...>\n");
                        fclose(tcp->outf);
                } else {
-                       if (printing_tcp == tcp && tcp->curcol != 0)
+                       if (printing_tcp == tcp && tcp->curcol != 0 && publish)
                                fprintf(tcp->outf, " <detached ...>\n");
                        flush_tcp_output(tcp);
                }
        }
 
        if (current_tcp == tcp)
-               current_tcp = NULL;
+               set_current_tcp(NULL);
        if (printing_tcp == tcp)
                printing_tcp = NULL;
 
+       list_remove(&tcp->wait_list);
+
        memset(tcp, 0, sizeof(*tcp));
 }
 
@@ -1003,17 +1060,16 @@ attach_tcb(struct tcb *const tcp)
                return;
        }
 
-       tcp->flags |= TCB_ATTACHED | TCB_GRABBED | TCB_STARTUP |
-                     post_attach_sigstop;
-       newoutf(tcp);
+       after_successful_attach(tcp, TCB_GRABBED | post_attach_sigstop);
        debug_msg("attach to pid %d (main) succeeded", tcp->pid);
 
-       char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
+       static const char task_path[] = "/proc/%d/task";
+       char procdir[sizeof(task_path) + sizeof(int) * 3];
        DIR *dir;
        unsigned int ntid = 0, nerr = 0;
 
        if (followfork && tcp->pid != strace_child &&
-           sprintf(procdir, "/proc/%d/task", tcp->pid) > 0 &&
+           xsprintf(procdir, task_path, tcp->pid) > 0 &&
            (dir = opendir(procdir)) != NULL) {
                struct_dirent *de;
 
@@ -1032,12 +1088,10 @@ attach_tcb(struct tcb *const tcp)
                                                 ptrace_attach_cmd, tid);
                                continue;
                        }
-                       debug_msg("attach to pid %d succeeded", tid);
 
-                       struct tcb *tid_tcp = alloctcb(tid);
-                       tid_tcp->flags |= TCB_ATTACHED | TCB_GRABBED |
-                                         TCB_STARTUP | post_attach_sigstop;
-                       newoutf(tid_tcp);
+                       after_successful_attach(alloctcb(tid),
+                                               TCB_GRABBED | post_attach_sigstop);
+                       debug_msg("attach to pid %d succeeded", tid);
                }
 
                closedir(dir);
@@ -1061,15 +1115,6 @@ startup_attach(void)
        unsigned int tcbi;
        struct tcb *tcp;
 
-       /*
-        * Block user interruptions as we would leave the traced
-        * process stopped (process state T) if we would terminate in
-        * between PTRACE_ATTACH and wait4() on SIGSTOP.
-        * We rely on cleanup() from this point on.
-        */
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &blocked_set, NULL);
-
        if (daemonized_tracer) {
                pid_t pid = fork();
                if (pid < 0)
@@ -1109,12 +1154,8 @@ startup_attach(void)
 
                attach_tcb(tcp);
 
-               if (interactive) {
-                       sigprocmask(SIG_SETMASK, &start_set, NULL);
-                       if (interrupted)
-                               goto ret;
-                       sigprocmask(SIG_SETMASK, &blocked_set, NULL);
-               }
+               if (interrupted)
+                       return;
        } /* for each tcbtab[] */
 
        if (daemonized_tracer) {
@@ -1125,10 +1166,6 @@ startup_attach(void)
                kill(parent_pid, SIGKILL);
                strace_child = 0;
        }
-
- ret:
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &start_set, NULL);
 }
 
 /* Stack-o-phobic exec helper, in the hope to work around
@@ -1197,6 +1234,10 @@ exec_or_die(void)
        if (params_for_tracee.child_sa.sa_handler != SIG_DFL)
                sigaction(SIGCHLD, &params_for_tracee.child_sa, NULL);
 
+       debug_msg("seccomp filter %s",
+                 seccomp_filtering ? "enabled" : "disabled");
+       if (seccomp_filtering)
+               init_seccomp_filter();
        execv(params->pathname, params->argv);
        perror_msg_and_die("exec");
 }
@@ -1268,7 +1309,7 @@ redirect_standard_fds(void)
 static void
 startup_child(char **argv)
 {
-       struct_stat statbuf;
+       strace_stat_t statbuf;
        const char *filename;
        size_t filename_len;
        char pathname[PATH_MAX];
@@ -1399,18 +1440,20 @@ startup_child(char **argv)
                                kill(pid, SIGCONT);
                }
                tcp = alloctcb(pid);
-               tcp->flags |= TCB_ATTACHED | TCB_STARTUP
-                           | TCB_SKIP_DETACH_ON_FIRST_EXEC
-                           | (NOMMU_SYSTEM ? 0 : (TCB_HIDE_LOG | post_attach_sigstop));
-               newoutf(tcp);
+               after_successful_attach(tcp, TCB_SKIP_DETACH_ON_FIRST_EXEC
+                                            | (NOMMU_SYSTEM ? 0
+                                               : (TCB_HIDE_LOG
+                                                  | post_attach_sigstop)));
        } else {
                /* With -D, we are *child* here, the tracee is our parent. */
                strace_child = strace_tracer_pid;
                strace_tracer_pid = getpid();
                tcp = alloctcb(strace_child);
                tcp->flags |= TCB_SKIP_DETACH_ON_FIRST_EXEC | TCB_HIDE_LOG;
-               /* attaching will be done later, by startup_attach */
-               /* note: we don't do newoutf(tcp) here either! */
+               /*
+                * Attaching will be done later, by startup_attach.
+                * Note: we don't do after_successful_attach() here either!
+                */
 
                /* NOMMU BUG! -D mode is active, we (child) return,
                 * and we will scribble over parent's stack!
@@ -1433,6 +1476,10 @@ startup_child(char **argv)
                 * to create a genuine separate stack and execute on it.
                 */
        }
+
+       if (seccomp_filtering)
+               tcp->flags |= TCB_SECCOMP_FILTER;
+
        /*
         * A case where straced process is part of a pipe:
         * { sleep 1; yes | head -n99999; } | strace -o/dev/null sh -c 'exec <&-; sleep 9'
@@ -1447,7 +1494,6 @@ startup_child(char **argv)
        redirect_standard_fds();
 }
 
-#if USE_SEIZE
 static void
 test_ptrace_seize(void)
 {
@@ -1497,50 +1543,37 @@ test_ptrace_seize(void)
                error_func_msg_and_die("unexpected wait status %#x", status);
        }
 }
-#else /* !USE_SEIZE */
-# define test_ptrace_seize() ((void)0)
-#endif
 
-static unsigned
+static unsigned int
 get_os_release(void)
 {
-       unsigned rel;
-       const char *p;
        struct utsname u;
        if (uname(&u) < 0)
                perror_msg_and_die("uname");
-       /* u.release has this form: "3.2.9[-some-garbage]" */
-       rel = 0;
-       p = u.release;
-       for (;;) {
-               if (!(*p >= '0' && *p <= '9'))
-                       error_msg_and_die("Bad OS release string: '%s'", u.release);
-               /* Note: this open-codes KERNEL_VERSION(): */
-               rel = (rel << 8) | atoi(p);
-               if (rel >= KERNEL_VERSION(1, 0, 0))
-                       break;
-               while (*p >= '0' && *p <= '9')
-                       p++;
-               if (*p != '.') {
-                       if (rel >= KERNEL_VERSION(0, 1, 0)) {
-                               /* "X.Y-something" means "X.Y.0" */
-                               rel <<= 8;
-                               break;
-                       }
-                       error_msg_and_die("Bad OS release string: '%s'", u.release);
+       /*
+        * u.release string consists of at most three parts
+        * and normally has this form: "3.2.9[-some-garbage]",
+        * "X.Y-something" means "X.Y.0".
+        */
+       const char *p = u.release;
+       unsigned int rel = 0;
+       for (unsigned int parts = 0; parts < 3; ++parts) {
+               unsigned int n = 0;
+               for (; (*p >= '0') && (*p <= '9'); ++p) {
+                       n *= 10;
+                       n += *p - '0';
                }
-               p++;
+               rel <<= 8;
+               rel |= n;
+               if (*p == '.')
+                       ++p;
        }
        return rel;
 }
 
 static void
-set_sigaction(int signo, void (*sighandler)(int), struct sigaction *oldact)
+set_sighandler(int signo, void (*sighandler)(int), struct sigaction *oldact)
 {
-       /* if signal handler is a function, add the signal to blocked_set */
-       if (sighandler != SIG_IGN && sighandler != SIG_DFL)
-               sigaddset(&blocked_set, signo);
-
        const struct sigaction sa = { .sa_handler = sighandler };
        sigaction(signo, &sa, oldact);
 }
@@ -1557,7 +1590,7 @@ static void ATTRIBUTE_NOINLINE
 init(int argc, char *argv[])
 {
        int c, i;
-       int optF = 0;
+       int optF = 0, zflags = 0;
 
        if (!program_invocation_name || !*program_invocation_name) {
                static char name[] = "strace";
@@ -1575,21 +1608,38 @@ init(int argc, char *argv[])
        qualify("trace=all");
        qualify("abbrev=all");
        qualify("verbose=all");
+       qualify("status=all");
 #if DEFAULT_QUAL_FLAGS != (QUAL_TRACE | QUAL_ABBREV | QUAL_VERBOSE)
 # error Bug in DEFAULT_QUAL_FLAGS
 #endif
        qualify("signal=all");
-       while ((c = getopt(argc, argv, "+"
-#ifdef USE_LIBUNWIND
+
+       static const char optstring[] = "+"
+#ifdef ENABLE_STACKTRACE
            "k"
 #endif
-           "a:b:cCdDe:E:fFhiI:o:O:p:P:qrs:S:tTu:vVwxyz")) != EOF) {
+           "a:Ab:cCdDe:E:fFhiI:o:O:p:P:qrs:S:tTu:vVwxX:yzZ";
+
+       enum {
+               SECCOMP_OPTION = 0x100
+       };
+       static const struct option longopts[] = {
+               { "seccomp-bpf", no_argument, 0, SECCOMP_OPTION },
+               { "help", no_argument, 0, 'h' },
+               { "version", no_argument, 0, 'V' },
+               { 0, 0, 0, 0 }
+       };
+
+       while ((c = getopt_long(argc, argv, optstring, longopts, NULL)) != EOF) {
                switch (c) {
                case 'a':
                        acolumn = string_to_uint(optarg);
                        if (acolumn < 0)
                                error_opt_arg(c, optarg);
                        break;
+               case 'A':
+                       open_append = true;
+                       break;
                case 'b':
                        if (strcmp(optarg, "execve") != 0)
                                error_msg_and_die("Syscall '%s' for -b isn't supported",
@@ -1638,7 +1688,7 @@ init(int argc, char *argv[])
                        if (opt_intr <= 0)
                                error_opt_arg(c, optarg);
                        break;
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
                case 'k':
                        stack_trace_enabled = true;
                        break;
@@ -1647,10 +1697,8 @@ init(int argc, char *argv[])
                        outfname = optarg;
                        break;
                case 'O':
-                       i = string_to_uint(optarg);
-                       if (i < 0)
+                       if (set_overhead(optarg) < 0)
                                error_opt_arg(c, optarg);
-                       set_overhead(i);
                        break;
                case 'p':
                        process_opt_p_list(optarg);
@@ -1695,11 +1743,31 @@ init(int argc, char *argv[])
                case 'x':
                        xflag++;
                        break;
+               case 'X':
+                       if (!strcmp(optarg, "raw"))
+                               xlat_verbosity = XLAT_STYLE_RAW;
+                       else if (!strcmp(optarg, "abbrev"))
+                               xlat_verbosity = XLAT_STYLE_ABBREV;
+                       else if (!strcmp(optarg, "verbose"))
+                               xlat_verbosity = XLAT_STYLE_VERBOSE;
+                       else
+                               error_opt_arg(c, optarg);
+                       break;
                case 'y':
                        show_fd_path++;
                        break;
                case 'z':
-                       not_failing_only = 1;
+                       clear_number_set_array(status_set, 1);
+                       add_number_to_set(STATUS_SUCCESSFUL, status_set);
+                       zflags++;
+                       break;
+               case 'Z':
+                       clear_number_set_array(status_set, 1);
+                       add_number_to_set(STATUS_FAILED, status_set);
+                       zflags++;
+                       break;
+               case SECCOMP_OPTION:
+                       seccomp_filtering = true;
                        break;
                default:
                        error_msg_and_help(NULL);
@@ -1718,6 +1786,16 @@ init(int argc, char *argv[])
                error_msg_and_help("PROG [ARGS] must be specified with -D");
        }
 
+       if (seccomp_filtering) {
+               if (nprocs && (!argc || debug_flag))
+                       error_msg("--seccomp-bpf is not enabled for processes"
+                                 " attached with -p");
+               if (!followfork) {
+                       error_msg("--seccomp-bpf implies -f");
+                       followfork = 1;
+               }
+       }
+
        if (optF) {
                if (followfork) {
                        error_msg("deprecated option -F ignored");
@@ -1739,10 +1817,8 @@ init(int argc, char *argv[])
        if (cflag == CFLAG_ONLY_STATS) {
                if (iflag)
                        error_msg("-%c has no effect with -c", 'i');
-#ifdef USE_LIBUNWIND
                if (stack_trace_enabled)
                        error_msg("-%c has no effect with -c", 'k');
-#endif
                if (rflag)
                        error_msg("-%c has no effect with -c", 'r');
                if (tflag)
@@ -1753,30 +1829,24 @@ init(int argc, char *argv[])
                        error_msg("-%c has no effect with -c", 'y');
        }
 
-       if (rflag) {
-               if (tflag > 1)
-                       error_msg("-tt has no effect with -r");
-               tflag = 1;
-       }
+#ifndef HAVE_OPEN_MEMSTREAM
+       if (!is_complete_set(status_set, NUMBER_OF_STATUSES))
+               error_msg_and_help("open_memstream is required to use -z, -Z, or -e status");
+#endif
+
+       if (zflags > 1)
+               error_msg("Only the last of -z/-Z options will take effect. "
+                         "See status qualifier for more complex filters.");
 
        acolumn_spaces = xmalloc(acolumn + 1);
        memset(acolumn_spaces, ' ', acolumn);
        acolumn_spaces[acolumn] = '\0';
 
-       sigprocmask(SIG_SETMASK, NULL, &start_set);
-       memcpy(&blocked_set, &start_set, sizeof(blocked_set));
-
-       set_sigaction(SIGCHLD, SIG_DFL, &params_for_tracee.child_sa);
-
-#ifdef USE_LIBUNWIND
-       if (stack_trace_enabled) {
-               unsigned int tcbi;
+       set_sighandler(SIGCHLD, SIG_DFL, &params_for_tracee.child_sa);
 
+#ifdef ENABLE_STACKTRACE
+       if (stack_trace_enabled)
                unwind_init();
-               for (tcbi = 0; tcbi < tcbtabsize; ++tcbi) {
-                       unwind_tcb_init(tcbtab[tcbi]);
-               }
-       }
 #endif
 
        /* See if they want to run as another user. */
@@ -1801,8 +1871,15 @@ init(int argc, char *argv[])
                ptrace_setoptions |= PTRACE_O_TRACECLONE |
                                     PTRACE_O_TRACEFORK |
                                     PTRACE_O_TRACEVFORK;
+
+       if (seccomp_filtering)
+               check_seccomp_filter();
+       if (seccomp_filtering)
+               ptrace_setoptions |= PTRACE_O_TRACESECCOMP;
+
        debug_msg("ptrace_setoptions = %#x", ptrace_setoptions);
        test_ptrace_seize();
+       test_ptrace_get_syscall_info();
 
        /*
         * Is something weird with our stdin and/or stdout -
@@ -1853,6 +1930,8 @@ init(int argc, char *argv[])
         * no           1       1       INTR_WHILE_WAIT
         */
 
+       if (daemonized_tracer && !opt_intr)
+               opt_intr = INTR_BLOCK_TSTP_TOO;
        if (outfname && argc) {
                if (!opt_intr)
                        opt_intr = INTR_NEVER;
@@ -1872,24 +1951,29 @@ init(int argc, char *argv[])
                startup_child(argv);
        }
 
-       set_sigaction(SIGTTOU, SIG_IGN, NULL);
-       set_sigaction(SIGTTIN, SIG_IGN, NULL);
+       set_sighandler(SIGTTOU, SIG_IGN, NULL);
+       set_sighandler(SIGTTIN, SIG_IGN, NULL);
        if (opt_intr != INTR_ANYWHERE) {
                if (opt_intr == INTR_BLOCK_TSTP_TOO)
-                       set_sigaction(SIGTSTP, SIG_IGN, NULL);
+                       set_sighandler(SIGTSTP, SIG_IGN, NULL);
                /*
                 * In interactive mode (if no -o OUTFILE, or -p PID is used),
-                * fatal signals are blocked while syscall stop is processed,
-                * and acted on in between, when waiting for new syscall stops.
-                * In non-interactive mode, signals are ignored.
+                * fatal signals are handled asynchronously and acted
+                * when waiting for process state changes.
+                * In non-interactive mode these signals are ignored.
                 */
-               set_sigaction(SIGHUP, interactive ? interrupt : SIG_IGN, NULL);
-               set_sigaction(SIGINT, interactive ? interrupt : SIG_IGN, NULL);
-               set_sigaction(SIGQUIT, interactive ? interrupt : SIG_IGN, NULL);
-               set_sigaction(SIGPIPE, interactive ? interrupt : SIG_IGN, NULL);
-               set_sigaction(SIGTERM, interactive ? interrupt : SIG_IGN, NULL);
+               set_sighandler(SIGHUP, interactive ? interrupt : SIG_IGN, NULL);
+               set_sighandler(SIGINT, interactive ? interrupt : SIG_IGN, NULL);
+               set_sighandler(SIGQUIT, interactive ? interrupt : SIG_IGN, NULL);
+               set_sighandler(SIGPIPE, interactive ? interrupt : SIG_IGN, NULL);
+               set_sighandler(SIGTERM, interactive ? interrupt : SIG_IGN, NULL);
        }
 
+       sigemptyset(&timer_set);
+       sigaddset(&timer_set, SIGALRM);
+       sigprocmask(SIG_BLOCK, &timer_set, NULL);
+       set_sighandler(SIGALRM, timer_sighandler, NULL);
+
        if (nprocs != 0 || daemonized_tracer)
                startup_attach();
 
@@ -1902,31 +1986,36 @@ init(int argc, char *argv[])
 }
 
 static struct tcb *
-pid2tcb(int pid)
+pid2tcb(const int pid)
 {
-       unsigned int i;
-
        if (pid <= 0)
                return NULL;
 
-       for (i = 0; i < tcbtabsize; i++) {
-               struct tcb *tcp = tcbtab[i];
+#define PID2TCB_CACHE_SIZE 1024U
+#define PID2TCB_CACHE_MASK (PID2TCB_CACHE_SIZE - 1)
+
+       static struct tcb *pid2tcb_cache[PID2TCB_CACHE_SIZE];
+       struct tcb **const ptcp = &pid2tcb_cache[pid & PID2TCB_CACHE_MASK];
+       struct tcb *tcp = *ptcp;
+
+       if (tcp && tcp->pid == pid)
+               return tcp;
+
+       for (unsigned int i = 0; i < tcbtabsize; ++i) {
+               tcp = tcbtab[i];
                if (tcp->pid == pid)
-                       return tcp;
+                       return *ptcp = tcp;
        }
 
        return NULL;
 }
 
 static void
-cleanup(void)
+cleanup(int fatal_sig)
 {
        unsigned int i;
        struct tcb *tcp;
-       int fatal_sig;
 
-       /* 'interrupted' is a volatile object, fetch it only once */
-       fatal_sig = interrupted;
        if (!fatal_sig)
                fatal_sig = SIGTERM;
 
@@ -1941,8 +2030,6 @@ cleanup(void)
                }
                detach(tcp);
        }
-       if (cflag)
-               call_summary(shared_log);
 }
 
 static void
@@ -1960,18 +2047,14 @@ print_debug_info(const int pid, int status)
 
        strcpy(buf, "???");
        if (WIFSIGNALED(status))
-#ifdef WCOREDUMP
-               sprintf(buf, "WIFSIGNALED,%ssig=%s",
+               xsprintf(buf, "WIFSIGNALED,%ssig=%s",
                                WCOREDUMP(status) ? "core," : "",
-                               signame(WTERMSIG(status)));
-#else
-               sprintf(buf, "WIFSIGNALED,sig=%s",
-                               signame(WTERMSIG(status)));
-#endif
+                               sprintsigname(WTERMSIG(status)));
        if (WIFEXITED(status))
-               sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
+               xsprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
        if (WIFSTOPPED(status))
-               sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
+               xsprintf(buf, "WIFSTOPPED,sig=%s",
+                        sprintsigname(WSTOPSIG(status)));
        evbuf[0] = '\0';
        if (event != 0) {
                static const char *const event_names[] = {
@@ -1981,6 +2064,7 @@ print_debug_info(const int pid, int status)
                        [PTRACE_EVENT_VFORK_DONE] = "VFORK_DONE",
                        [PTRACE_EVENT_EXEC]  = "EXEC",
                        [PTRACE_EVENT_EXIT]  = "EXIT",
+                       [PTRACE_EVENT_SECCOMP]  = "SECCOMP",
                        /* [PTRACE_EVENT_STOP (=128)] would make biggish array */
                };
                const char *e = "??";
@@ -1988,7 +2072,7 @@ print_debug_info(const int pid, int status)
                        e = event_names[event];
                else if (event == PTRACE_EVENT_STOP)
                        e = "STOP";
-               sprintf(evbuf, ",EVENT_%s (%u)", e, event);
+               xsprintf(evbuf, ",EVENT_%s (%u)", e, event);
        }
        error_msg("[wait(0x%06x) = %u] %s%s", status, pid, buf, evbuf);
 }
@@ -2012,8 +2096,7 @@ maybe_allocate_tcb(const int pid, int status)
        if (followfork) {
                /* We assume it's a fork/vfork/clone child */
                struct tcb *tcp = alloctcb(pid);
-               tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
-               newoutf(tcp);
+               after_successful_attach(tcp, post_attach_sigstop);
                if (!qflag)
                        error_msg("Process %d attached", pid);
                return tcp;
@@ -2031,37 +2114,60 @@ maybe_allocate_tcb(const int pid, int status)
        }
 }
 
+/*
+ * Under Linux, execve changes pid to thread leader's pid, and we see this
+ * changed pid on EVENT_EXEC and later, execve sysexit.  Leader "disappears"
+ * without exit notification.  Let user know that, drop leader's tcb, and fix
+ * up pid in execve thread's tcb.  Effectively, execve thread's tcb replaces
+ * leader's tcb.
+ *
+ * BTW, leader is 'stuck undead' (doesn't report WIFEXITED on exit syscall)
+ * in multi-threaded programs exactly in order to handle this case.
+ */
 static struct tcb *
 maybe_switch_tcbs(struct tcb *tcp, const int pid)
 {
-       FILE *fp;
-       struct tcb *execve_thread;
-       long old_pid = 0;
+       /*
+        * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
+        * On 2.6 and earlier it can return garbage.
+        */
+       if (os_release < KERNEL_VERSION(3, 0, 0))
+               return NULL;
+
+       const long old_pid = tcb_wait_tab[tcp->wait_data_idx].msg;
 
-       if (ptrace(PTRACE_GETEVENTMSG, pid, NULL, &old_pid) < 0)
-               return tcp;
        /* Avoid truncation in pid2tcb() param passing */
        if (old_pid <= 0 || old_pid == pid)
-               return tcp;
+               return NULL;
        if ((unsigned long) old_pid > UINT_MAX)
-               return tcp;
-       execve_thread = pid2tcb(old_pid);
+               return NULL;
+       struct tcb *execve_thread = pid2tcb(old_pid);
        /* It should be !NULL, but I feel paranoid */
        if (!execve_thread)
-               return tcp;
+               return NULL;
 
        if (execve_thread->curcol != 0) {
                /*
-                * One case we are here is -ff:
-                * try "strace -oLOG -ff test/threaded_execve"
+                * One case we are here is -ff, try
+                * "strace -oLOG -ff test/threaded_execve".
+                * Another case is demonstrated by
+                * tests/maybe_switch_current_tcp.c
                 */
                fprintf(execve_thread->outf, " <pid changed to %d ...>\n", pid);
                /*execve_thread->curcol = 0; - no need, see code below */
        }
-       /* Swap output FILEs (needed for -ff) */
-       fp = execve_thread->outf;
+       /* Swap output FILEs and memstream (needed for -ff) */
+       FILE *fp = execve_thread->outf;
        execve_thread->outf = tcp->outf;
        tcp->outf = fp;
+       if (execve_thread->staged_output_data || tcp->staged_output_data) {
+               struct staged_output_data *staged_output_data;
+
+               staged_output_data = execve_thread->staged_output_data;
+               execve_thread->staged_output_data = tcp->staged_output_data;
+               tcp->staged_output_data = staged_output_data;
+       }
+
        /* And their column positions */
        execve_thread->curcol = tcp->curcol;
        tcp->curcol = 0;
@@ -2074,12 +2180,29 @@ maybe_switch_tcbs(struct tcb *tcp, const int pid)
                printleader(tcp);
                tprintf("+++ superseded by execve in pid %lu +++\n", old_pid);
                line_ended();
+               /*
+                * Need to reopen memstream for thread
+                * as we closed it in droptcb.
+                */
+               if (!is_complete_set(status_set, NUMBER_OF_STATUSES))
+                       strace_open_memstream(tcp);
                tcp->flags |= TCB_REPRINT;
        }
 
        return tcp;
 }
 
+static struct tcb *
+maybe_switch_current_tcp(void)
+{
+       struct tcb *tcp = maybe_switch_tcbs(current_tcp, current_tcp->pid);
+
+       if (tcp)
+               set_current_tcp(tcp);
+
+       return tcp;
+}
+
 static void
 print_signalled(struct tcb *tcp, const int pid, int status)
 {
@@ -2091,14 +2214,9 @@ print_signalled(struct tcb *tcp, const int pid, int status)
        if (cflag != CFLAG_ONLY_STATS
            && is_number_in_set(WTERMSIG(status), signal_set)) {
                printleader(tcp);
-#ifdef WCOREDUMP
                tprintf("+++ killed by %s %s+++\n",
-                       signame(WTERMSIG(status)),
+                       sprintsigname(WTERMSIG(status)),
                        WCOREDUMP(status) ? "(core dumped) " : "");
-#else
-               tprintf("+++ killed by %s +++\n",
-                       signame(WTERMSIG(status)));
-#endif
                line_ended();
        }
 }
@@ -2127,12 +2245,17 @@ print_stopped(struct tcb *tcp, const siginfo_t *si, const unsigned int sig)
            && is_number_in_set(sig, signal_set)) {
                printleader(tcp);
                if (si) {
-                       tprintf("--- %s ", signame(sig));
+                       tprintf("--- %s ", sprintsigname(sig));
                        printsiginfo(si);
                        tprints(" ---\n");
                } else
-                       tprintf("--- stopped by %s ---\n", signame(sig));
+                       tprintf("--- stopped by %s ---\n", sprintsigname(sig));
                line_ended();
+
+#ifdef ENABLE_STACKTRACE
+               if (stack_trace_enabled)
+                       unwind_tcb_print(tcp);
+#endif
        }
 }
 
@@ -2156,6 +2279,10 @@ startup_tcb(struct tcb *tcp)
 
        if ((tcp->flags & TCB_GRABBED) && (get_scno(tcp) == 1))
                tcp->s_prev_ent = tcp->s_ent;
+
+       if (cflag) {
+               tcp->atime = tcp->stime;
+       }
 }
 
 static void
@@ -2168,19 +2295,14 @@ print_event_exit(struct tcb *tcp)
 
        if (followfork < 2 && printing_tcp && printing_tcp != tcp
            && printing_tcp->curcol != 0) {
-               current_tcp = printing_tcp;
+               set_current_tcp(printing_tcp);
                tprints(" <unfinished ...>\n");
                flush_tcp_output(printing_tcp);
                printing_tcp->curcol = 0;
-               current_tcp = tcp;
+               set_current_tcp(tcp);
        }
 
-       if ((followfork < 2 && printing_tcp != tcp)
-           || (tcp->flags & TCB_REPRINT)) {
-               tcp->flags &= ~TCB_REPRINT;
-               printleader(tcp);
-               tprintf("<... %s resumed>", tcp->s_ent->sys_name);
-       }
+       print_syscall_resume(tcp);
 
        if (!(tcp->sys_func_rval & RVAL_DECODED)) {
                /*
@@ -2189,23 +2311,87 @@ print_event_exit(struct tcb *tcp)
                 */
                tprints(" <unfinished ...>");
        }
+
+       printing_tcp = tcp;
        tprints(") ");
        tabto();
        tprints("= ?\n");
+       if (!is_complete_set(status_set, NUMBER_OF_STATUSES)) {
+               bool publish = is_number_in_set(STATUS_UNFINISHED, status_set);
+               strace_close_memstream(tcp, publish);
+       }
        line_ended();
 }
 
-static enum trace_event
-next_event(int *pstatus, siginfo_t *si)
+static size_t
+trace_wait_data_size(struct tcb *tcp)
 {
-       int pid;
-       int wait_errno;
-       int status;
-       struct tcb *tcp;
-       struct rusage ru;
+       return sizeof(struct tcb_wait_data);
+}
+
+static struct tcb_wait_data *
+init_trace_wait_data(void *p)
+{
+       struct tcb_wait_data *wd = p;
+
+       memset(wd, 0, sizeof(*wd));
 
+       return wd;
+}
+
+static struct tcb_wait_data *
+copy_trace_wait_data(const struct tcb_wait_data *wd)
+{
+       struct tcb_wait_data *new_wd = xmalloc(sizeof(*new_wd));
+
+       memcpy(new_wd, wd, sizeof(*wd));
+
+       return new_wd;
+}
+
+static void
+free_trace_wait_data(struct tcb_wait_data *wd)
+{
+       free(wd);
+}
+
+static void
+tcb_wait_tab_check_size(const size_t size)
+{
+       while (size >= tcb_wait_tab_size) {
+               tcb_wait_tab = xgrowarray(tcb_wait_tab,
+                                         &tcb_wait_tab_size,
+                                         sizeof(tcb_wait_tab[0]));
+       }
+}
+
+static const struct tcb_wait_data *
+next_event(void)
+{
        if (interrupted)
-               return TE_BREAK;
+               return NULL;
+
+       invalidate_umove_cache();
+
+       struct tcb *tcp = NULL;
+       struct list_item *elem;
+
+       static EMPTY_LIST(pending_tcps);
+       /* Handle the queued tcbs before waiting for new events.  */
+       if (!list_is_empty(&pending_tcps))
+               goto next_event_get_tcp;
+
+       static struct tcb *extra_tcp;
+       static size_t wait_extra_data_idx;
+       /* Handle the extra tcb event.  */
+       if (extra_tcp) {
+               tcp = extra_tcp;
+               extra_tcp = NULL;
+               tcp->wait_data_idx = wait_extra_data_idx;
+
+               debug_msg("dequeued extra event for pid %u", tcp->pid);
+               goto next_event_exit;
+       }
 
        /*
         * Used to exit simply when nprocs hits zero, but in this testcase:
@@ -2225,127 +2411,243 @@ next_event(int *pstatus, siginfo_t *si)
                 * on exit. Oh well...
                 */
                if (nprocs == 0)
-                       return TE_BREAK;
+                       return NULL;
        }
 
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &start_set, NULL);
-       pid = wait4(-1, pstatus, __WALL, (cflag ? &ru : NULL));
-       wait_errno = errno;
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &blocked_set, NULL);
+       const bool unblock_delay_timer = is_delay_timer_armed();
 
-       if (pid < 0) {
-               if (wait_errno == EINTR)
-                       return TE_NEXT;
-               if (nprocs == 0 && wait_errno == ECHILD)
-                       return TE_BREAK;
-               /*
-                * If nprocs > 0, ECHILD is not expected,
-                * treat it as any other error here:
-                */
-               errno = wait_errno;
-               perror_msg_and_die("wait4(__WALL)");
-       }
+       /*
+        * The window of opportunity to handle expirations
+        * of the delay timer opens here.
+        *
+        * Unblock the signal handler for the delay timer
+        * iff the delay timer is already created.
+        */
+       if (unblock_delay_timer)
+               sigprocmask(SIG_UNBLOCK, &timer_set, NULL);
+
+       /*
+        * If the delay timer has expired, then its expiration
+        * has been handled already by the signal handler.
+        *
+        * If the delay timer expires during wait4(),
+        * then the system call will be interrupted and
+        * the expiration will be handled by the signal handler.
+        */
+       int status;
+       struct rusage ru;
+       int pid = wait4(-1, &status, __WALL, (cflag ? &ru : NULL));
+       int wait_errno = errno;
 
-       status = *pstatus;
+       /*
+        * The window of opportunity to handle expirations
+        * of the delay timer closes here.
+        *
+        * Block the signal handler for the delay timer
+        * iff it was unblocked earlier.
+        */
+       if (unblock_delay_timer) {
+               sigprocmask(SIG_BLOCK, &timer_set, NULL);
 
-       if (pid == popen_pid) {
-               if (!WIFSTOPPED(status))
-                       popen_pid = 0;
-               return TE_NEXT;
+               if (restart_failed)
+                       return NULL;
        }
 
-       if (debug_flag)
-               print_debug_info(pid, status);
+       size_t wait_tab_pos = 0;
+       bool wait_nohang = false;
 
-       /* Look up 'pid' in our table. */
-       tcp = pid2tcb(pid);
+       /*
+        * Wait for new events until wait4() returns 0 (meaning that there's
+        * nothing more to wait for for now), or a second event for some tcb
+        * appears (which may happen if a tracee was SIGKILL'ed, for example).
+        */
+       for (;;) {
+               struct tcb_wait_data *wd;
 
-       if (!tcp) {
-               tcp = maybe_allocate_tcb(pid, status);
-               if (!tcp)
-                       return TE_NEXT;
-       }
+               if (pid < 0) {
+                       if (wait_errno == EINTR)
+                               break;
+                       if (wait_nohang)
+                               break;
+                       if (nprocs == 0 && wait_errno == ECHILD)
+                               return NULL;
+                       /*
+                        * If nprocs > 0, ECHILD is not expected,
+                        * treat it as any other error here:
+                        */
+                       errno = wait_errno;
+                       perror_msg_and_die("wait4(__WALL)");
+               }
 
-       clear_regs();
+               if (!pid)
+                       break;
 
-       /* Set current output file */
-       current_tcp = tcp;
+               if (pid == popen_pid) {
+                       if (!WIFSTOPPED(status))
+                               popen_pid = 0;
+                       break;
+               }
 
-       if (cflag) {
-               tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
-               tcp->stime = ru.ru_stime;
-       }
+               if (debug_flag)
+                       print_debug_info(pid, status);
 
-       if (WIFSIGNALED(status))
-               return TE_SIGNALLED;
+               /* Look up 'pid' in our table. */
+               tcp = pid2tcb(pid);
 
-       if (WIFEXITED(status))
-               return TE_EXITED;
+               if (!tcp) {
+                       tcp = maybe_allocate_tcb(pid, status);
+                       if (!tcp)
+                               goto next_event_wait_next;
+               }
 
-       /*
-        * As WCONTINUED flag has not been specified to wait4,
-        * it cannot be WIFCONTINUED(status), so the only case
-        * that remains is WIFSTOPPED(status).
-        */
+               if (cflag) {
+                       tcp->stime.tv_sec = ru.ru_stime.tv_sec;
+                       tcp->stime.tv_nsec = ru.ru_stime.tv_usec * 1000;
+               }
 
-       /* Is this the very first time we see this tracee stopped? */
-       if (tcp->flags & TCB_STARTUP)
-               startup_tcb(tcp);
+               tcb_wait_tab_check_size(wait_tab_pos);
 
-       const unsigned int sig = WSTOPSIG(status);
-       const unsigned int event = (unsigned int) status >> 16;
+               /* Initialise a new wait data structure.  */
+               wd = tcb_wait_tab + wait_tab_pos;
+               init_trace_wait_data(wd);
+               wd->status = status;
 
-       switch (event) {
-       case 0:
-               /*
-                * Is this post-attach SIGSTOP?
-                * Interestingly, the process may stop
-                * with STOPSIG equal to some other signal
-                * than SIGSTOP if we happened to attach
-                * just before the process takes a signal.
-                */
-               if (sig == SIGSTOP && (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
-                       debug_func_msg("ignored SIGSTOP on pid %d", tcp->pid);
-                       tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
-                       return TE_RESTART;
-               } else if (sig == syscall_trap_sig) {
-                       return TE_SYSCALL_STOP;
+               if (WIFSIGNALED(status)) {
+                       wd->te = TE_SIGNALLED;
+               } else if (WIFEXITED(status)) {
+                       wd->te = TE_EXITED;
                } else {
-                       *si = (siginfo_t) {};
                        /*
-                        * True if tracee is stopped by signal
-                        * (as opposed to "tracee received signal").
-                        * TODO: shouldn't we check for errno == EINVAL too?
-                        * We can get ESRCH instead, you know...
+                        * As WCONTINUED flag has not been specified to wait4,
+                        * it cannot be WIFCONTINUED(status), so the only case
+                        * that remains is WIFSTOPPED(status).
                         */
-                       bool stopped = ptrace(PTRACE_GETSIGINFO, pid, 0, si) < 0;
-                       return stopped ? TE_GROUP_STOP : TE_SIGNAL_DELIVERY_STOP;
+
+                       const unsigned int sig = WSTOPSIG(status);
+                       const unsigned int event = (unsigned int) status >> 16;
+
+                       switch (event) {
+                       case 0:
+                               /*
+                                * Is this post-attach SIGSTOP?
+                                * Interestingly, the process may stop
+                                * with STOPSIG equal to some other signal
+                                * than SIGSTOP if we happened to attach
+                                * just before the process takes a signal.
+                                */
+                               if (sig == SIGSTOP &&
+                                   (tcp->flags & TCB_IGNORE_ONE_SIGSTOP)) {
+                                       debug_func_msg("ignored SIGSTOP on "
+                                                      "pid %d", tcp->pid);
+                                       tcp->flags &= ~TCB_IGNORE_ONE_SIGSTOP;
+                                       wd->te = TE_RESTART;
+                               } else if (sig == syscall_trap_sig) {
+                                       wd->te = TE_SYSCALL_STOP;
+                               } else {
+                                       /*
+                                        * True if tracee is stopped by signal
+                                        * (as opposed to "tracee received
+                                        * signal").
+                                        * TODO: shouldn't we check for
+                                        * errno == EINVAL too?
+                                        * We can get ESRCH instead, you know...
+                                        */
+                                       bool stopped = ptrace(PTRACE_GETSIGINFO,
+                                               pid, 0, &wd->si) < 0;
+
+                                       wd->te = stopped ? TE_GROUP_STOP
+                                                        : TE_SIGNAL_DELIVERY_STOP;
+                               }
+                               break;
+                       case PTRACE_EVENT_STOP:
+                               /*
+                                * PTRACE_INTERRUPT-stop or group-stop.
+                                * PTRACE_INTERRUPT-stop has sig == SIGTRAP here.
+                                */
+                               switch (sig) {
+                               case SIGSTOP:
+                               case SIGTSTP:
+                               case SIGTTIN:
+                               case SIGTTOU:
+                                       wd->te = TE_GROUP_STOP;
+                                       break;
+                               default:
+                                       wd->te = TE_RESTART;
+                               }
+                               break;
+                       case PTRACE_EVENT_EXEC:
+                                       /*
+                                        * TODO: shouldn't we check for
+                                        * errno == EINVAL here, too?
+                                        * We can get ESRCH instead, you know...
+                                        */
+                               if (ptrace(PTRACE_GETEVENTMSG, pid, NULL,
+                                   &wd->msg) < 0)
+                                       wd->msg = 0;
+
+                               wd->te = TE_STOP_BEFORE_EXECVE;
+                               break;
+                       case PTRACE_EVENT_EXIT:
+                               wd->te = TE_STOP_BEFORE_EXIT;
+                               break;
+                       case PTRACE_EVENT_SECCOMP:
+                               wd->te = TE_SECCOMP;
+                               break;
+                       default:
+                               wd->te = TE_RESTART;
+                       }
                }
-               break;
-#if USE_SEIZE
-       case PTRACE_EVENT_STOP:
-               /*
-                * PTRACE_INTERRUPT-stop or group-stop.
-                * PTRACE_INTERRUPT-stop has sig == SIGTRAP here.
-                */
-               switch (sig) {
-               case SIGSTOP:
-               case SIGTSTP:
-               case SIGTTIN:
-               case SIGTTOU:
-                       return TE_GROUP_STOP;
+
+               if (!wd->te)
+                       error_func_msg("Tracing event hasn't been determined "
+                                      "for pid %d, status %0#x", pid, status);
+
+               if (!list_is_empty(&tcp->wait_list)) {
+                       wait_extra_data_idx = wait_tab_pos;
+                       extra_tcp = tcp;
+                       debug_func_msg("queued extra pid %d", tcp->pid);
+               } else {
+                       tcp->wait_data_idx = wait_tab_pos;
+                       list_append(&pending_tcps, &tcp->wait_list);
+                       debug_func_msg("queued pid %d", tcp->pid);
                }
-               return TE_RESTART;
-#endif
-       case PTRACE_EVENT_EXEC:
-               return TE_STOP_BEFORE_EXECVE;
-       case PTRACE_EVENT_EXIT:
-               return TE_STOP_BEFORE_EXIT;
-       default:
-               return TE_RESTART;
+
+               wait_tab_pos++;
+
+               if (extra_tcp)
+                       break;
+
+next_event_wait_next:
+               pid = wait4(-1, &status, __WALL | WNOHANG, (cflag ? &ru : NULL));
+               wait_errno = errno;
+               wait_nohang = true;
+       }
+
+next_event_get_tcp:
+       elem = list_remove_head(&pending_tcps);
+
+       if (!elem) {
+               tcb_wait_tab_check_size(0);
+               memset(tcb_wait_tab, 0, sizeof(*tcb_wait_tab));
+               tcb_wait_tab->te = TE_NEXT;
+
+               return tcb_wait_tab;
+       } else {
+               tcp = list_elem(elem, struct tcb, wait_list);
+               debug_func_msg("dequeued pid %d", tcp->pid);
        }
+
+next_event_exit:
+       /* Is this the very first time we see this tracee stopped? */
+       if (tcp->flags & TCB_STARTUP)
+               startup_tcb(tcp);
+
+       clear_regs(tcp);
+
+       /* Set current output file */
+       set_current_tcp(tcp);
+
+       return tcb_wait_tab + tcp->wait_data_idx;
 }
 
 static int
@@ -2362,10 +2664,10 @@ trace_syscall(struct tcb *tcp, unsigned int *sig)
                syscall_entering_finish(tcp, res);
                return res;
        } else {
-               struct timeval tv = {};
-               int res = syscall_exiting_decode(tcp, &tv);
+               struct timespec ts = {};
+               int res = syscall_exiting_decode(tcp, &ts);
                if (res != 0) {
-                       res = syscall_exiting_trace(tcp, tv, res);
+                       res = syscall_exiting_trace(tcp, &ts, res);
                }
                syscall_exiting_finish(tcp);
                return res;
@@ -2374,12 +2676,23 @@ trace_syscall(struct tcb *tcp, unsigned int *sig)
 
 /* Returns true iff the main trace loop has to continue. */
 static bool
-dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
+dispatch_event(const struct tcb_wait_data *wd)
 {
-       unsigned int restart_op = PTRACE_SYSCALL;
+       unsigned int restart_op;
        unsigned int restart_sig = 0;
+       enum trace_event te = wd ? wd->te : TE_BREAK;
+       /*
+        * Copy wd->status to a non-const variable to workaround glibc bugs
+        * around union wait fixed by glibc commit glibc-2.24~391
+        */
+       int status = wd ? wd->status : 0;
+
+       if (current_tcp && has_seccomp_filter(current_tcp))
+               restart_op = seccomp_filter_restart_operator(current_tcp);
+       else
+               restart_op = PTRACE_SYSCALL;
 
-       switch (ret) {
+       switch (te) {
        case TE_BREAK:
                return false;
 
@@ -2389,6 +2702,27 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
        case TE_RESTART:
                break;
 
+       case TE_SECCOMP:
+               if (!has_seccomp_filter(current_tcp)) {
+                       /*
+                        * We don't know if forks/clones have a seccomp filter
+                        * when they are created, but we can detect it when we
+                        * have a seccomp-stop.
+                        * In such a case, if !seccomp_before_sysentry, we have
+                        * already processed the syscall entry, so we avoid
+                        * processing it a second time.
+                        */
+                       current_tcp->flags |= TCB_SECCOMP_FILTER;
+                       restart_op = PTRACE_SYSCALL;
+                       break;
+               }
+
+               if (seccomp_before_sysentry) {
+                       restart_op = PTRACE_SYSCALL;
+                       break;
+               }
+               ATTRIBUTE_FALLTHROUGH;
+
        case TE_SYSCALL_STOP:
                if (trace_syscall(current_tcp, &restart_sig) < 0) {
                        /*
@@ -2404,20 +2738,56 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
                         */
                        return true;
                }
+               if (has_seccomp_filter(current_tcp)) {
+                       /*
+                        * Syscall and seccomp stops can happen in different
+                        * orders depending on kernel.  strace tests this in
+                        * check_seccomp_order_tracer().
+                        *
+                        * Linux 3.5--4.7:
+                        * (seccomp-stop before syscall-entry-stop)
+                        *         +--> seccomp-stop ->-PTRACE_SYSCALL->-+
+                        *         |                                     |
+                        *     PTRACE_CONT                   syscall-entry-stop
+                        *         |                                     |
+                        * syscall-exit-stop <---PTRACE_SYSCALL-----<----+
+                        *
+                        * Linux 4.8+:
+                        * (seccomp-stop after syscall-entry-stop)
+                        *                 syscall-entry-stop
+                        *
+                        *         +---->-----PTRACE_CONT---->----+
+                        *         |                              |
+                        *  syscall-exit-stop               seccomp-stop
+                        *         |                              |
+                        *         +----<----PTRACE_SYSCALL---<---+
+                        *
+                        * Note in Linux 4.8+, we restart in PTRACE_CONT
+                        * after syscall-exit to skip the syscall-entry-stop.
+                        * The next seccomp-stop will be treated as a syscall
+                        * entry.
+                        *
+                        * The line below implements this behavior.
+                        * Note that exiting(current_tcp) actually marks
+                        * a syscall-entry-stop because the flag was inverted
+                        * in the above call to trace_syscall.
+                        */
+                       restart_op = exiting(current_tcp) ? PTRACE_SYSCALL : PTRACE_CONT;
+               }
                break;
 
        case TE_SIGNAL_DELIVERY_STOP:
-               restart_sig = WSTOPSIG(*pstatus);
-               print_stopped(current_tcp, si, restart_sig);
+               restart_sig = WSTOPSIG(status);
+               print_stopped(current_tcp, &wd->si, restart_sig);
                break;
 
        case TE_SIGNALLED:
-               print_signalled(current_tcp, current_tcp->pid, *pstatus);
+               print_signalled(current_tcp, current_tcp->pid, status);
                droptcb(current_tcp);
                return true;
 
        case TE_GROUP_STOP:
-               restart_sig = WSTOPSIG(*pstatus);
+               restart_sig = WSTOPSIG(status);
                print_stopped(current_tcp, NULL, restart_sig);
                if (use_seize) {
                        /*
@@ -2432,28 +2802,37 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
                break;
 
        case TE_EXITED:
-               print_exited(current_tcp, current_tcp->pid, *pstatus);
+               print_exited(current_tcp, current_tcp->pid, status);
                droptcb(current_tcp);
                return true;
 
        case TE_STOP_BEFORE_EXECVE:
+               /* The syscall succeeded, clear the flag.  */
+               current_tcp->flags &= ~TCB_CHECK_EXEC_SYSCALL;
                /*
-                * Under Linux, execve changes pid to thread leader's pid,
-                * and we see this changed pid on EVENT_EXEC and later,
-                * execve sysexit. Leader "disappears" without exit
-                * notification. Let user know that, drop leader's tcb,
-                * and fix up pid in execve thread's tcb.
-                * Effectively, execve thread's tcb replaces leader's tcb.
-                *
-                * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
-                * on exit syscall) in multithreaded programs exactly
-                * in order to handle this case.
-                *
-                * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
-                * On 2.6 and earlier, it can return garbage.
+                * Check that we are inside syscall now (next event after
+                * PTRACE_EVENT_EXEC should be for syscall exiting).  If it is
+                * not the case, we might have a situation when we attach to a
+                * process and the first thing we see is a PTRACE_EVENT_EXEC
+                * and all the following syscall state tracking is screwed up
+                * otherwise.
                 */
-               if (os_release >= KERNEL_VERSION(3, 0, 0))
-                       current_tcp = maybe_switch_tcbs(current_tcp, current_tcp->pid);
+               if (!maybe_switch_current_tcp() && entering(current_tcp)) {
+                       int ret;
+
+                       error_msg("Stray PTRACE_EVENT_EXEC from pid %d"
+                                 ", trying to recover...",
+                                 current_tcp->pid);
+
+                       current_tcp->flags |= TCB_RECOVERING;
+                       ret = trace_syscall(current_tcp, &restart_sig);
+                       current_tcp->flags &= ~TCB_RECOVERING;
+
+                       if (ret < 0) {
+                               /* The reason is described in TE_SYSCALL_STOP */
+                               return true;
+                       }
+               }
 
                if (detach_on_execve) {
                        if (current_tcp->flags & TCB_SKIP_DETACH_ON_FIRST_EXEC) {
@@ -2474,6 +2853,17 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
        if (interrupted)
                return false;
 
+       /* If the process is being delayed, do not ptrace_restart just yet */
+       if (syscall_delayed(current_tcp)) {
+               if (current_tcp->delayed_wait_data)
+                       error_func_msg("pid %d has delayed wait data set"
+                                      " already", current_tcp->pid);
+
+               current_tcp->delayed_wait_data = copy_trace_wait_data(wd);
+
+               return true;
+       }
+
        if (ptrace_restart(restart_op, current_tcp, restart_sig) < 0) {
                /* Note: ptrace_restart emitted error message */
                exit_code = 1;
@@ -2482,6 +2872,87 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
        return true;
 }
 
+static bool
+restart_delayed_tcb(struct tcb *const tcp)
+{
+       struct tcb_wait_data *wd = tcp->delayed_wait_data;
+
+       if (!wd) {
+               error_func_msg("No delayed wait data found for pid %d",
+                              tcp->pid);
+               wd = init_trace_wait_data(alloca(trace_wait_data_size(tcp)));
+       }
+
+       wd->te = TE_RESTART;
+
+       debug_func_msg("pid %d", tcp->pid);
+
+       tcp->flags &= ~TCB_DELAYED;
+
+       struct tcb *const prev_tcp = current_tcp;
+       current_tcp = tcp;
+       bool ret = dispatch_event(wd);
+       current_tcp = prev_tcp;
+
+       free_trace_wait_data(tcp->delayed_wait_data);
+       tcp->delayed_wait_data = NULL;
+
+       return ret;
+}
+
+static bool
+restart_delayed_tcbs(void)
+{
+       struct tcb *tcp_next = NULL;
+       struct timespec ts_now;
+
+       clock_gettime(CLOCK_MONOTONIC, &ts_now);
+
+       for (size_t i = 0; i < tcbtabsize; i++) {
+               struct tcb *tcp = tcbtab[i];
+
+               if (tcp->pid && syscall_delayed(tcp)) {
+                       if (ts_cmp(&ts_now, &tcp->delay_expiration_time) > 0) {
+                               if (!restart_delayed_tcb(tcp))
+                                       return false;
+                       } else {
+                               /* Check whether this tcb is the next.  */
+                               if (!tcp_next ||
+                                   ts_cmp(&tcp_next->delay_expiration_time,
+                                          &tcp->delay_expiration_time) > 0) {
+                                       tcp_next = tcp;
+                               }
+                       }
+               }
+       }
+
+       if (tcp_next)
+               arm_delay_timer(tcp_next);
+
+       return true;
+}
+
+/*
+ * As this signal handler does a lot of work that is not suitable
+ * for signal handlers, extra care must be taken to ensure that
+ * it is enabled only in those places where it's safe.
+ */
+static void
+timer_sighandler(int sig)
+{
+       delay_timer_expired();
+
+       if (restart_failed)
+               return;
+
+       int saved_errno = errno;
+
+       if (!restart_delayed_tcbs())
+               restart_failed = 1;
+
+       errno = saved_errno;
+}
+
 #ifdef ENABLE_COVERAGE_GCOV
 extern void __gcov_flush(void);
 #endif
@@ -2489,7 +2960,11 @@ extern void __gcov_flush(void);
 static void ATTRIBUTE_NORETURN
 terminate(void)
 {
-       cleanup();
+       int sig = interrupted;
+
+       cleanup(sig);
+       if (cflag)
+               call_summary(shared_log);
        fflush(NULL);
        if (shared_log != stderr)
                fclose(shared_log);
@@ -2497,6 +2972,9 @@ terminate(void)
                while (waitpid(popen_pid, NULL, 0) < 0 && errno == EINTR)
                        ;
        }
+       if (sig) {
+               exit_code = 0x100 | sig;
+       }
        if (exit_code > 0xff) {
                /* Avoid potential core file clobbering.  */
                struct_rlimit rlim = {0, 0};
@@ -2529,13 +3007,12 @@ terminate(void)
 int
 main(int argc, char *argv[])
 {
+       setlocale(LC_ALL, "");
        init(argc, argv);
 
        exit_code = !nprocs;
 
-       int status;
-       siginfo_t si;
-       while (dispatch_event(next_event(&status, &si), &status, &si))
+       while (dispatch_event(next_event()))
                ;
        terminate();
 }