]> granicus.if.org Git - strace/blobdiff - strace.c
tests: check decoding of vcpu auxstr
[strace] / strace.c
index a62f9c6ca2b45f4e5b268455cdb062296e158b3f..467e8a6eb9609b72cf5143fd8f2a8e00c5a45103 100644 (file)
--- a/strace.c
+++ b/strace.c
@@ -3,7 +3,7 @@
  * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
  * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
  * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
- * Copyright (c) 1999-2017 The strace developers.
+ * Copyright (c) 1999-2018 The strace developers.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,8 +31,9 @@
 
 #include "defs.h"
 #include <stdarg.h>
-#include <sys/param.h>
+#include <limits.h>
 #include <fcntl.h>
+#include "ptrace.h"
 #include <signal.h>
 #include <sys/resource.h>
 #include <sys/wait.h>
 #include <pwd.h>
 #include <grp.h>
 #include <dirent.h>
+#include <locale.h>
 #include <sys/utsname.h>
 #ifdef HAVE_PRCTL
 # include <sys/prctl.h>
 #endif
 #include <asm/unistd.h>
 
+#include "largefile_wrappers.h"
+#include "mmap_cache.h"
 #include "number_set.h"
 #include "scno.h"
-#include "ptrace.h"
 #include "printsiginfo.h"
 #include "trace_event.h"
 #include "xstring.h"
+#include "delay.h"
 
 /* In some libc, these aren't declared. Do it ourself: */
 extern char **environ;
 extern int optind;
 extern char *optarg;
 
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
 /* if this is true do the stack trace for every system call */
 bool stack_trace_enabled;
 #endif
@@ -120,13 +124,8 @@ static int opt_intr;
  */
 static bool daemonized_tracer;
 
-#if USE_SEIZE
 static int post_attach_sigstop = TCB_IGNORE_ONE_SIGSTOP;
-# define use_seize (post_attach_sigstop == 0)
-#else
-# define post_attach_sigstop TCB_IGNORE_ONE_SIGSTOP
-# define use_seize 0
-#endif
+#define use_seize (post_attach_sigstop == 0)
 
 /* Sometimes we want to print only succeeding syscalls. */
 bool not_failing_only;
@@ -148,9 +147,13 @@ unsigned int max_strlen = DEFAULT_STRLEN;
 static int acolumn = DEFAULT_ACOLUMN;
 static char *acolumn_spaces;
 
+/* Default output style for xlat entities */
+enum xlat_style xlat_verbosity = XLAT_STYLE_ABBREV;
+
 static const char *outfname;
 /* If -ff, points to stderr. Else, it's our common output log */
 static FILE *shared_log;
+static bool open_append;
 
 struct tcb *printing_tcp;
 static struct tcb *current_tcp;
@@ -168,14 +171,16 @@ unsigned os_release; /* generated from uname()'s u.release */
 static void detach(struct tcb *tcp);
 static void cleanup(void);
 static void interrupt(int sig);
-static sigset_t start_set, blocked_set;
 
 #ifdef HAVE_SIG_ATOMIC_T
-static volatile sig_atomic_t interrupted;
+static volatile sig_atomic_t interrupted, restart_failed;
 #else
-static volatile int interrupted;
+static volatile int interrupted, restart_failed;
 #endif
 
+static sigset_t timer_set;
+static void timer_sighandler(int);
+
 #ifndef HAVE_STRERROR
 
 #if !HAVE_DECL_SYS_ERRLIST
@@ -201,9 +206,26 @@ static void
 print_version(void)
 {
        static const char features[] =
-#ifdef USE_LIBUNWIND
-               " stack-unwind"
-#endif /* USE_LIBUNWIND */
+#ifdef ENABLE_STACKTRACE
+               " stack-trace=" USE_UNWINDER
+#endif
+#ifdef USE_DEMANGLE
+               " stack-demangle"
+#endif
+#if SUPPORTED_PERSONALITIES > 1
+# if defined HAVE_M32_MPERS
+               " m32-mpers"
+# else
+               " no-m32-mpers"
+# endif
+#endif /* SUPPORTED_PERSONALITIES > 1 */
+#if SUPPORTED_PERSONALITIES > 2
+# if defined HAVE_MX32_MPERS
+               " mx32-mpers"
+# else
+               " no-mx32-mpers"
+# endif
+#endif /* SUPPORTED_PERSONALITIES > 2 */
                "";
 
        printf("%s -- version %s\n"
@@ -229,9 +251,9 @@ Output format:\n\
   -a column      alignment COLUMN for printing syscall results (default %d)\n\
   -i             print instruction pointer at time of syscall\n\
 "
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
 "\
-  -k             obtain stack trace between each syscall (experimental)\n\
+  -k             obtain stack trace between each syscall\n\
 "
 #endif
 "\
@@ -256,7 +278,11 @@ Statistics:\n\
 \n\
 Filtering:\n\
   -e expr        a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
-     options:    trace, abbrev, verbose, raw, signal, read, write, fault\n\
+     options:    trace, abbrev, verbose, raw, signal, read, write, fault"
+#ifdef HAVE_LINUX_KVM_H
+              ", kvm"
+#endif
+"\n\
   -P path        trace accesses to path\n\
 \n\
 Tracing:\n\
@@ -316,7 +342,6 @@ static const char *ptrace_attach_cmd;
 static int
 ptrace_attach_or_seize(int pid)
 {
-#if USE_SEIZE
        int r;
        if (!use_seize)
                return ptrace_attach_cmd = "PTRACE_ATTACH",
@@ -326,10 +351,6 @@ ptrace_attach_or_seize(int pid)
                return ptrace_attach_cmd = "PTRACE_SEIZE", r;
        r = ptrace(PTRACE_INTERRUPT, pid, 0L, 0L);
        return ptrace_attach_cmd = "PTRACE_INTERRUPT", r;
-#else
-               return ptrace_attach_cmd = "PTRACE_ATTACH",
-                      ptrace(PTRACE_ATTACH, pid, 0L, 0L);
-#endif
 }
 
 /*
@@ -404,7 +425,8 @@ set_cloexec_flag(int fd)
        if (flags == newflags)
                return;
 
-       fcntl(fd, F_SETFD, newflags); /* never fails */
+       if (fcntl(fd, F_SETFD, newflags)) /* never fails */
+               perror_msg_and_die("fcntl(%d, F_SETFD, %#x)", fd, newflags);
 }
 
 static void
@@ -430,35 +452,13 @@ swap_uid(void)
        }
 }
 
-#ifdef _LARGEFILE64_SOURCE
-# ifdef HAVE_FOPEN64
-#  define fopen_for_output fopen64
-# else
-#  define fopen_for_output fopen
-# endif
-# define struct_stat struct stat64
-# define stat_file stat64
-# define struct_dirent struct dirent64
-# define read_dir readdir64
-# define struct_rlimit struct rlimit64
-# define set_rlimit setrlimit64
-#else
-# define fopen_for_output fopen
-# define struct_stat struct stat
-# define stat_file stat
-# define struct_dirent struct dirent
-# define read_dir readdir
-# define struct_rlimit struct rlimit
-# define set_rlimit setrlimit
-#endif
-
 static FILE *
 strace_fopen(const char *path)
 {
        FILE *fp;
 
        swap_uid();
-       fp = fopen_for_output(path, "w");
+       fp = fopen_stream(path, open_append ? "a" : "w");
        if (!fp)
                perror_msg_and_die("Can't fopen '%s'", path);
        swap_uid();
@@ -516,6 +516,19 @@ strace_popen(const char *command)
        return fp;
 }
 
+static void
+outf_perror(const struct tcb * const tcp)
+{
+       if (tcp->outf == stderr)
+               return;
+
+       /* This is ugly, but we don't store separate file names */
+       if (followfork >= 2)
+               perror_msg("%s.%u", outfname, tcp->pid);
+       else
+               perror_msg("%s", outfname);
+}
+
 ATTRIBUTE_FORMAT((printf, 1, 0))
 static void
 tvprintf(const char *const fmt, va_list args)
@@ -524,8 +537,7 @@ tvprintf(const char *const fmt, va_list args)
                int n = vfprintf(current_tcp->outf, fmt, args);
                if (n < 0) {
                        /* very unlikely due to vfprintf buffering */
-                       if (current_tcp->outf != stderr)
-                               perror_msg("%s", outfname);
+                       outf_perror(current_tcp);
                } else
                        current_tcp->curcol += n;
        }
@@ -554,8 +566,7 @@ tprints(const char *str)
                        return;
                }
                /* very unlikely due to fputs_unlocked buffering */
-               if (current_tcp->outf != stderr)
-                       perror_msg("%s", outfname);
+               outf_perror(current_tcp);
        }
 }
 
@@ -583,8 +594,8 @@ tprintf_comment(const char *fmt, ...)
 static void
 flush_tcp_output(const struct tcb *const tcp)
 {
-       if (fflush(tcp->outf) && tcp->outf != stderr)
-               perror_msg("%s", outfname);
+       if (fflush(tcp->outf))
+               outf_perror(tcp);
 }
 
 void
@@ -600,6 +611,16 @@ line_ended(void)
        }
 }
 
+void
+set_current_tcp(const struct tcb *tcp)
+{
+       current_tcp = (struct tcb *) tcp;
+
+       /* Sync current_personality and stuff */
+       if (current_tcp)
+               set_personality(current_tcp->currpers);
+}
+
 void
 printleader(struct tcb *tcp)
 {
@@ -610,7 +631,7 @@ printleader(struct tcb *tcp)
                printing_tcp = tcp;
 
        if (printing_tcp) {
-               current_tcp = printing_tcp;
+               set_current_tcp(printing_tcp);
                if (printing_tcp->curcol != 0 && (followfork < 2 || printing_tcp == tcp)) {
                        /*
                         * case 1: we have a shared log (i.e. not -ff), and last line
@@ -624,7 +645,7 @@ printleader(struct tcb *tcp)
        }
 
        printing_tcp = tcp;
-       current_tcp = tcp;
+       set_current_tcp(tcp);
        current_tcp->curcol = 0;
 
        if (print_pid_pfx)
@@ -633,30 +654,47 @@ printleader(struct tcb *tcp)
                tprintf("[pid %5u] ", tcp->pid);
 
        if (tflag) {
-               char str[sizeof("HH:MM:SS")];
-               struct timeval tv, dtv;
-               static struct timeval otv;
-
-               gettimeofday(&tv, NULL);
-               if (rflag) {
-                       if (otv.tv_sec == 0)
-                               otv = tv;
-                       tv_sub(&dtv, &tv, &otv);
-                       tprintf("%6ld.%06ld ",
-                               (long) dtv.tv_sec, (long) dtv.tv_usec);
-                       otv = tv;
-               } else if (tflag > 2) {
-                       tprintf("%ld.%06ld ",
-                               (long) tv.tv_sec, (long) tv.tv_usec);
+               struct timespec ts;
+               clock_gettime(CLOCK_REALTIME, &ts);
+
+               if (tflag > 2) {
+                       tprintf("%lld.%06ld ",
+                               (long long) ts.tv_sec, (long) ts.tv_nsec / 1000);
                } else {
-                       time_t local = tv.tv_sec;
-                       strftime(str, sizeof(str), "%T", localtime(&local));
+                       time_t local = ts.tv_sec;
+                       char str[MAX(sizeof("HH:MM:SS"), sizeof(ts.tv_sec) * 3)];
+                       struct tm *tm = localtime(&local);
+
+                       if (tm)
+                               strftime(str, sizeof(str), "%T", tm);
+                       else
+                               xsprintf(str, "%lld", (long long) local);
                        if (tflag > 1)
-                               tprintf("%s.%06ld ", str, (long) tv.tv_usec);
+                               tprintf("%s.%06ld ",
+                                       str, (long) ts.tv_nsec / 1000);
                        else
                                tprintf("%s ", str);
                }
        }
+
+       if (rflag) {
+               struct timespec ts;
+               clock_gettime(CLOCK_MONOTONIC, &ts);
+
+               static struct timespec ots;
+               if (ots.tv_sec == 0)
+                       ots = ts;
+
+               struct timespec dts;
+               ts_sub(&dts, &ts, &ots);
+               ots = ts;
+
+               tprintf("%s%6ld.%06ld%s ",
+                       tflag ? "(+" : "",
+                       (long) dts.tv_sec, (long) dts.tv_nsec / 1000,
+                       tflag ? ")" : "");
+       }
+
        if (iflag)
                print_pc(tcp);
 }
@@ -673,14 +711,20 @@ tabto(void)
  * may create bogus empty FILE.<nonexistant_pid>, and then die.
  */
 static void
-newoutf(struct tcb *tcp)
+after_successful_attach(struct tcb *tcp, const unsigned int flags)
 {
+       tcp->flags |= TCB_ATTACHED | TCB_STARTUP | flags;
        tcp->outf = shared_log; /* if not -ff mode, the same file is for all */
        if (followfork >= 2) {
                char name[PATH_MAX];
                xsprintf(name, "%s.%u", outfname, tcp->pid);
                tcp->outf = strace_fopen(name);
        }
+
+#ifdef ENABLE_STACKTRACE
+       if (stack_trace_enabled)
+               unwind_tcb_init(tcp);
+#endif
 }
 
 static void
@@ -722,12 +766,6 @@ alloctcb(int pid)
 #if SUPPORTED_PERSONALITIES > 1
                        tcp->currpers = current_personality;
 #endif
-
-#ifdef USE_LIBUNWIND
-                       if (stack_trace_enabled)
-                               unwind_tcb_init(tcp);
-#endif
-
                        nprocs++;
                        debug_msg("new tcb for pid %d, active tcbs:%d",
                                  tcp->pid, nprocs);
@@ -780,12 +818,18 @@ droptcb(struct tcb *tcp)
 
        free_tcb_priv_data(tcp);
 
-#ifdef USE_LIBUNWIND
-       if (stack_trace_enabled) {
+#ifdef ENABLE_STACKTRACE
+       if (stack_trace_enabled)
                unwind_tcb_fin(tcp);
-       }
 #endif
 
+#ifdef HAVE_LINUX_KVM_H
+       kvm_vcpu_info_free(tcp);
+#endif
+
+       if (tcp->mmap_cache)
+               tcp->mmap_cache->free_fn(tcp, __func__);
+
        nprocs--;
        debug_msg("dropped tcb for pid %d, %d remain", tcp->pid, nprocs);
 
@@ -802,7 +846,7 @@ droptcb(struct tcb *tcp)
        }
 
        if (current_tcp == tcp)
-               current_tcp = NULL;
+               set_current_tcp(NULL);
        if (printing_tcp == tcp)
                printing_tcp = NULL;
 
@@ -1007,9 +1051,7 @@ attach_tcb(struct tcb *const tcp)
                return;
        }
 
-       tcp->flags |= TCB_ATTACHED | TCB_GRABBED | TCB_STARTUP |
-                     post_attach_sigstop;
-       newoutf(tcp);
+       after_successful_attach(tcp, TCB_GRABBED | post_attach_sigstop);
        debug_msg("attach to pid %d (main) succeeded", tcp->pid);
 
        static const char task_path[] = "/proc/%d/task";
@@ -1037,12 +1079,10 @@ attach_tcb(struct tcb *const tcp)
                                                 ptrace_attach_cmd, tid);
                                continue;
                        }
-                       debug_msg("attach to pid %d succeeded", tid);
 
-                       struct tcb *tid_tcp = alloctcb(tid);
-                       tid_tcp->flags |= TCB_ATTACHED | TCB_GRABBED |
-                                         TCB_STARTUP | post_attach_sigstop;
-                       newoutf(tid_tcp);
+                       after_successful_attach(alloctcb(tid),
+                                               TCB_GRABBED | post_attach_sigstop);
+                       debug_msg("attach to pid %d succeeded", tid);
                }
 
                closedir(dir);
@@ -1066,15 +1106,6 @@ startup_attach(void)
        unsigned int tcbi;
        struct tcb *tcp;
 
-       /*
-        * Block user interruptions as we would leave the traced
-        * process stopped (process state T) if we would terminate in
-        * between PTRACE_ATTACH and wait4() on SIGSTOP.
-        * We rely on cleanup() from this point on.
-        */
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &blocked_set, NULL);
-
        if (daemonized_tracer) {
                pid_t pid = fork();
                if (pid < 0)
@@ -1114,12 +1145,8 @@ startup_attach(void)
 
                attach_tcb(tcp);
 
-               if (interactive) {
-                       sigprocmask(SIG_SETMASK, &start_set, NULL);
-                       if (interrupted)
-                               goto ret;
-                       sigprocmask(SIG_SETMASK, &blocked_set, NULL);
-               }
+               if (interrupted)
+                       return;
        } /* for each tcbtab[] */
 
        if (daemonized_tracer) {
@@ -1130,10 +1157,6 @@ startup_attach(void)
                kill(parent_pid, SIGKILL);
                strace_child = 0;
        }
-
- ret:
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &start_set, NULL);
 }
 
 /* Stack-o-phobic exec helper, in the hope to work around
@@ -1404,18 +1427,20 @@ startup_child(char **argv)
                                kill(pid, SIGCONT);
                }
                tcp = alloctcb(pid);
-               tcp->flags |= TCB_ATTACHED | TCB_STARTUP
-                           | TCB_SKIP_DETACH_ON_FIRST_EXEC
-                           | (NOMMU_SYSTEM ? 0 : (TCB_HIDE_LOG | post_attach_sigstop));
-               newoutf(tcp);
+               after_successful_attach(tcp, TCB_SKIP_DETACH_ON_FIRST_EXEC
+                                            | (NOMMU_SYSTEM ? 0
+                                               : (TCB_HIDE_LOG
+                                                  | post_attach_sigstop)));
        } else {
                /* With -D, we are *child* here, the tracee is our parent. */
                strace_child = strace_tracer_pid;
                strace_tracer_pid = getpid();
                tcp = alloctcb(strace_child);
                tcp->flags |= TCB_SKIP_DETACH_ON_FIRST_EXEC | TCB_HIDE_LOG;
-               /* attaching will be done later, by startup_attach */
-               /* note: we don't do newoutf(tcp) here either! */
+               /*
+                * Attaching will be done later, by startup_attach.
+                * Note: we don't do after_successful_attach() here either!
+                */
 
                /* NOMMU BUG! -D mode is active, we (child) return,
                 * and we will scribble over parent's stack!
@@ -1452,7 +1477,6 @@ startup_child(char **argv)
        redirect_standard_fds();
 }
 
-#if USE_SEIZE
 static void
 test_ptrace_seize(void)
 {
@@ -1502,9 +1526,6 @@ test_ptrace_seize(void)
                error_func_msg_and_die("unexpected wait status %#x", status);
        }
 }
-#else /* !USE_SEIZE */
-# define test_ptrace_seize() ((void)0)
-#endif
 
 static unsigned
 get_os_release(void)
@@ -1542,10 +1563,6 @@ get_os_release(void)
 static void
 set_sighandler(int signo, void (*sighandler)(int), struct sigaction *oldact)
 {
-       /* if signal handler is a function, add the signal to blocked_set */
-       if (sighandler != SIG_IGN && sighandler != SIG_DFL)
-               sigaddset(&blocked_set, signo);
-
        const struct sigaction sa = { .sa_handler = sighandler };
        sigaction(signo, &sa, oldact);
 }
@@ -1585,16 +1602,19 @@ init(int argc, char *argv[])
 #endif
        qualify("signal=all");
        while ((c = getopt(argc, argv, "+"
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
            "k"
 #endif
-           "a:b:cCdDe:E:fFhiI:o:O:p:P:qrs:S:tTu:vVwxyz")) != EOF) {
+           "a:Ab:cCdDe:E:fFhiI:o:O:p:P:qrs:S:tTu:vVwxX:yz")) != EOF) {
                switch (c) {
                case 'a':
                        acolumn = string_to_uint(optarg);
                        if (acolumn < 0)
                                error_opt_arg(c, optarg);
                        break;
+               case 'A':
+                       open_append = true;
+                       break;
                case 'b':
                        if (strcmp(optarg, "execve") != 0)
                                error_msg_and_die("Syscall '%s' for -b isn't supported",
@@ -1643,7 +1663,7 @@ init(int argc, char *argv[])
                        if (opt_intr <= 0)
                                error_opt_arg(c, optarg);
                        break;
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
                case 'k':
                        stack_trace_enabled = true;
                        break;
@@ -1700,6 +1720,16 @@ init(int argc, char *argv[])
                case 'x':
                        xflag++;
                        break;
+               case 'X':
+                       if (!strcmp(optarg, "raw"))
+                               xlat_verbosity = XLAT_STYLE_RAW;
+                       else if (!strcmp(optarg, "abbrev"))
+                               xlat_verbosity = XLAT_STYLE_ABBREV;
+                       else if (!strcmp(optarg, "verbose"))
+                               xlat_verbosity = XLAT_STYLE_VERBOSE;
+                       else
+                               error_opt_arg(c, optarg);
+                       break;
                case 'y':
                        show_fd_path++;
                        break;
@@ -1744,7 +1774,7 @@ init(int argc, char *argv[])
        if (cflag == CFLAG_ONLY_STATS) {
                if (iflag)
                        error_msg("-%c has no effect with -c", 'i');
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
                if (stack_trace_enabled)
                        error_msg("-%c has no effect with -c", 'k');
 #endif
@@ -1758,30 +1788,15 @@ init(int argc, char *argv[])
                        error_msg("-%c has no effect with -c", 'y');
        }
 
-       if (rflag) {
-               if (tflag > 1)
-                       error_msg("-tt has no effect with -r");
-               tflag = 1;
-       }
-
        acolumn_spaces = xmalloc(acolumn + 1);
        memset(acolumn_spaces, ' ', acolumn);
        acolumn_spaces[acolumn] = '\0';
 
-       sigprocmask(SIG_SETMASK, NULL, &start_set);
-       memcpy(&blocked_set, &start_set, sizeof(blocked_set));
-
        set_sighandler(SIGCHLD, SIG_DFL, &params_for_tracee.child_sa);
 
-#ifdef USE_LIBUNWIND
-       if (stack_trace_enabled) {
-               unsigned int tcbi;
-
+#ifdef ENABLE_STACKTRACE
+       if (stack_trace_enabled)
                unwind_init();
-               for (tcbi = 0; tcbi < tcbtabsize; ++tcbi) {
-                       unwind_tcb_init(tcbtab[tcbi]);
-               }
-       }
 #endif
 
        /* See if they want to run as another user. */
@@ -1884,9 +1899,9 @@ init(int argc, char *argv[])
                        set_sighandler(SIGTSTP, SIG_IGN, NULL);
                /*
                 * In interactive mode (if no -o OUTFILE, or -p PID is used),
-                * fatal signals are blocked while syscall stop is processed,
-                * and acted on in between, when waiting for new syscall stops.
-                * In non-interactive mode, signals are ignored.
+                * fatal signals are handled asynchronously and acted
+                * when waiting for process state changes.
+                * In non-interactive mode these signals are ignored.
                 */
                set_sighandler(SIGHUP, interactive ? interrupt : SIG_IGN, NULL);
                set_sighandler(SIGINT, interactive ? interrupt : SIG_IGN, NULL);
@@ -1895,6 +1910,11 @@ init(int argc, char *argv[])
                set_sighandler(SIGTERM, interactive ? interrupt : SIG_IGN, NULL);
        }
 
+       sigemptyset(&timer_set);
+       sigaddset(&timer_set, SIGALRM);
+       sigprocmask(SIG_BLOCK, &timer_set, NULL);
+       set_sighandler(SIGALRM, timer_sighandler, NULL);
+
        if (nprocs != 0 || daemonized_tracer)
                startup_attach();
 
@@ -1907,17 +1927,25 @@ init(int argc, char *argv[])
 }
 
 static struct tcb *
-pid2tcb(int pid)
+pid2tcb(const int pid)
 {
-       unsigned int i;
-
        if (pid <= 0)
                return NULL;
 
-       for (i = 0; i < tcbtabsize; i++) {
-               struct tcb *tcp = tcbtab[i];
+#define PID2TCB_CACHE_SIZE 1024U
+#define PID2TCB_CACHE_MASK (PID2TCB_CACHE_SIZE - 1)
+
+       static struct tcb *pid2tcb_cache[PID2TCB_CACHE_SIZE];
+       struct tcb **const ptcp = &pid2tcb_cache[pid & PID2TCB_CACHE_MASK];
+       struct tcb *tcp = *ptcp;
+
+       if (tcp && tcp->pid == pid)
+               return tcp;
+
+       for (unsigned int i = 0; i < tcbtabsize; ++i) {
+               tcp = tcbtab[i];
                if (tcp->pid == pid)
-                       return tcp;
+                       return *ptcp = tcp;
        }
 
        return NULL;
@@ -2017,8 +2045,7 @@ maybe_allocate_tcb(const int pid, int status)
        if (followfork) {
                /* We assume it's a fork/vfork/clone child */
                struct tcb *tcp = alloctcb(pid);
-               tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
-               newoutf(tcp);
+               after_successful_attach(tcp, post_attach_sigstop);
                if (!qflag)
                        error_msg("Process %d attached", pid);
                return tcp;
@@ -2173,11 +2200,11 @@ print_event_exit(struct tcb *tcp)
 
        if (followfork < 2 && printing_tcp && printing_tcp != tcp
            && printing_tcp->curcol != 0) {
-               current_tcp = printing_tcp;
+               set_current_tcp(printing_tcp);
                tprints(" <unfinished ...>\n");
                flush_tcp_output(printing_tcp);
                printing_tcp->curcol = 0;
-               current_tcp = tcp;
+               set_current_tcp(tcp);
        }
 
        if ((followfork < 2 && printing_tcp != tcp)
@@ -2194,6 +2221,8 @@ print_event_exit(struct tcb *tcp)
                 */
                tprints(" <unfinished ...>");
        }
+
+       printing_tcp = tcp;
        tprints(") ");
        tabto();
        tprints("= ?\n");
@@ -2204,7 +2233,6 @@ static enum trace_event
 next_event(int *pstatus, siginfo_t *si)
 {
        int pid;
-       int wait_errno;
        int status;
        struct tcb *tcp;
        struct rusage ru;
@@ -2233,12 +2261,42 @@ next_event(int *pstatus, siginfo_t *si)
                        return TE_BREAK;
        }
 
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &start_set, NULL);
+       const bool unblock_delay_timer = is_delay_timer_armed();
+
+       /*
+        * The window of opportunity to handle expirations
+        * of the delay timer opens here.
+        *
+        * Unblock the signal handler for the delay timer
+        * iff the delay timer is already created.
+        */
+       if (unblock_delay_timer)
+               sigprocmask(SIG_UNBLOCK, &timer_set, NULL);
+
+       /*
+        * If the delay timer has expired, then its expiration
+        * has been handled already by the signal handler.
+        *
+        * If the delay timer expires during wait4(),
+        * then the system call will be interrupted and
+        * the expiration will be handled by the signal handler.
+        */
        pid = wait4(-1, pstatus, __WALL, (cflag ? &ru : NULL));
-       wait_errno = errno;
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &blocked_set, NULL);
+       const int wait_errno = errno;
+
+       /*
+        * The window of opportunity to handle expirations
+        * of the delay timer closes here.
+        *
+        * Block the signal handler for the delay timer
+        * iff it was unblocked earlier.
+        */
+       if (unblock_delay_timer) {
+               sigprocmask(SIG_BLOCK, &timer_set, NULL);
+
+               if (restart_failed)
+                       return TE_BREAK;
+       }
 
        if (pid < 0) {
                if (wait_errno == EINTR)
@@ -2276,11 +2334,15 @@ next_event(int *pstatus, siginfo_t *si)
        clear_regs(tcp);
 
        /* Set current output file */
-       current_tcp = tcp;
+       set_current_tcp(tcp);
 
        if (cflag) {
-               tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
-               tcp->stime = ru.ru_stime;
+               struct timespec stime = {
+                       .tv_sec = ru.ru_stime.tv_sec,
+                       .tv_nsec = ru.ru_stime.tv_usec * 1000
+               };
+               ts_sub(&tcp->dtime, &stime, &tcp->stime);
+               tcp->stime = stime;
        }
 
        if (WIFSIGNALED(status))
@@ -2329,7 +2391,6 @@ next_event(int *pstatus, siginfo_t *si)
                        return stopped ? TE_GROUP_STOP : TE_SIGNAL_DELIVERY_STOP;
                }
                break;
-#if USE_SEIZE
        case PTRACE_EVENT_STOP:
                /*
                 * PTRACE_INTERRUPT-stop or group-stop.
@@ -2343,7 +2404,6 @@ next_event(int *pstatus, siginfo_t *si)
                        return TE_GROUP_STOP;
                }
                return TE_RESTART;
-#endif
        case PTRACE_EVENT_EXEC:
                return TE_STOP_BEFORE_EXECVE;
        case PTRACE_EVENT_EXIT:
@@ -2367,10 +2427,10 @@ trace_syscall(struct tcb *tcp, unsigned int *sig)
                syscall_entering_finish(tcp, res);
                return res;
        } else {
-               struct timeval tv = {};
-               int res = syscall_exiting_decode(tcp, &tv);
+               struct timespec ts = {};
+               int res = syscall_exiting_decode(tcp, &ts);
                if (res != 0) {
-                       res = syscall_exiting_trace(tcp, tv, res);
+                       res = syscall_exiting_trace(tcp, &ts, res);
                }
                syscall_exiting_finish(tcp);
                return res;
@@ -2442,6 +2502,31 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
                return true;
 
        case TE_STOP_BEFORE_EXECVE:
+               /*
+                * Check that we are inside syscall now (next event after
+                * PTRACE_EVENT_EXEC should be for syscall exiting).  If it is
+                * not the case, we might have a situation when we attach to a
+                * process and the first thing we see is a PTRACE_EVENT_EXEC
+                * and all the following syscall state tracking is screwed up
+                * otherwise.
+                */
+               if (entering(current_tcp)) {
+                       int ret;
+
+                       error_msg("Stray PTRACE_EVENT_EXEC from pid %d"
+                                 ", trying to recover...",
+                                 current_tcp->pid);
+
+                       current_tcp->flags |= TCB_RECOVERING;
+                       ret = trace_syscall(current_tcp, &restart_sig);
+                       current_tcp->flags &= ~TCB_RECOVERING;
+
+                       if (ret < 0) {
+                               /* The reason is described in TE_SYSCALL_STOP */
+                               return true;
+                       }
+               }
+
                /*
                 * Under Linux, execve changes pid to thread leader's pid,
                 * and we see this changed pid on EVENT_EXEC and later,
@@ -2458,7 +2543,8 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
                 * On 2.6 and earlier, it can return garbage.
                 */
                if (os_release >= KERNEL_VERSION(3, 0, 0))
-                       current_tcp = maybe_switch_tcbs(current_tcp, current_tcp->pid);
+                       set_current_tcp(maybe_switch_tcbs(current_tcp,
+                                                         current_tcp->pid));
 
                if (detach_on_execve) {
                        if (current_tcp->flags & TCB_SKIP_DETACH_ON_FIRST_EXEC) {
@@ -2479,6 +2565,10 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
        if (interrupted)
                return false;
 
+       /* If the process is being delayed, do not ptrace_restart just yet */
+       if (syscall_delayed(current_tcp))
+               return true;
+
        if (ptrace_restart(restart_op, current_tcp, restart_sig) < 0) {
                /* Note: ptrace_restart emitted error message */
                exit_code = 1;
@@ -2487,6 +2577,74 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
        return true;
 }
 
+static bool
+restart_delayed_tcb(struct tcb *const tcp)
+{
+       debug_func_msg("pid %d", tcp->pid);
+
+       tcp->flags &= ~TCB_DELAYED;
+
+       struct tcb *const prev_tcp = current_tcp;
+       current_tcp = tcp;
+       bool ret = dispatch_event(TE_RESTART, NULL, NULL);
+       current_tcp = prev_tcp;
+
+       return ret;
+}
+
+static bool
+restart_delayed_tcbs(void)
+{
+       struct tcb *tcp_next = NULL;
+       struct timespec ts_now;
+
+       clock_gettime(CLOCK_MONOTONIC, &ts_now);
+
+       for (size_t i = 0; i < tcbtabsize; i++) {
+               struct tcb *tcp = tcbtab[i];
+
+               if (tcp->pid && syscall_delayed(tcp)) {
+                       if (ts_cmp(&ts_now, &tcp->delay_expiration_time) > 0) {
+                               if (!restart_delayed_tcb(tcp))
+                                       return false;
+                       } else {
+                               /* Check whether this tcb is the next.  */
+                               if (!tcp_next ||
+                                   ts_cmp(&tcp_next->delay_expiration_time,
+                                          &tcp->delay_expiration_time) > 0) {
+                                       tcp_next = tcp;
+                               }
+                       }
+               }
+       }
+
+       if (tcp_next)
+               arm_delay_timer(tcp_next);
+
+       return true;
+}
+
+/*
+ * As this signal handler does a lot of work that is not suitable
+ * for signal handlers, extra care must be taken to ensure that
+ * it is enabled only in those places where it's safe.
+ */
+static void
+timer_sighandler(int sig)
+{
+       delay_timer_expired();
+
+       if (restart_failed)
+               return;
+
+       int saved_errno = errno;
+
+       if (!restart_delayed_tcbs())
+               restart_failed = 1;
+
+       errno = saved_errno;
+}
+
 #ifdef ENABLE_COVERAGE_GCOV
 extern void __gcov_flush(void);
 #endif
@@ -2534,6 +2692,7 @@ terminate(void)
 int
 main(int argc, char *argv[])
 {
+       setlocale(LC_ALL, "");
        init(argc, argv);
 
        exit_code = !nprocs;