]> granicus.if.org Git - strace/blobdiff - strace.c
tests: check decoding of vcpu auxstr
[strace] / strace.c
index 0d23b08f275015305004d9fa1e1bdf8f5bacfeac..467e8a6eb9609b72cf5143fd8f2a8e00c5a45103 100644 (file)
--- a/strace.c
+++ b/strace.c
@@ -3,7 +3,7 @@
  * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
  * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
  * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
- * Copyright (c) 1999-2017 The strace developers.
+ * Copyright (c) 1999-2018 The strace developers.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 
 #include "defs.h"
 #include <stdarg.h>
-#include <sys/param.h>
+#include <limits.h>
 #include <fcntl.h>
+#include "ptrace.h"
 #include <signal.h>
 #include <sys/resource.h>
 #include <sys/wait.h>
 #include <sys/stat.h>
+#ifdef HAVE_PATHS_H
+# include <paths.h>
+#endif
 #include <pwd.h>
 #include <grp.h>
 #include <dirent.h>
+#include <locale.h>
 #include <sys/utsname.h>
 #ifdef HAVE_PRCTL
 # include <sys/prctl.h>
 #endif
 #include <asm/unistd.h>
 
+#include "largefile_wrappers.h"
+#include "mmap_cache.h"
 #include "number_set.h"
 #include "scno.h"
-#include "ptrace.h"
 #include "printsiginfo.h"
+#include "trace_event.h"
+#include "xstring.h"
+#include "delay.h"
 
 /* In some libc, these aren't declared. Do it ourself: */
 extern char **environ;
 extern int optind;
 extern char *optarg;
 
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
 /* if this is true do the stack trace for every system call */
 bool stack_trace_enabled;
 #endif
@@ -115,13 +124,8 @@ static int opt_intr;
  */
 static bool daemonized_tracer;
 
-#if USE_SEIZE
 static int post_attach_sigstop = TCB_IGNORE_ONE_SIGSTOP;
-# define use_seize (post_attach_sigstop == 0)
-#else
-# define post_attach_sigstop TCB_IGNORE_ONE_SIGSTOP
-# define use_seize 0
-#endif
+#define use_seize (post_attach_sigstop == 0)
 
 /* Sometimes we want to print only succeeding syscalls. */
 bool not_failing_only;
@@ -143,15 +147,20 @@ unsigned int max_strlen = DEFAULT_STRLEN;
 static int acolumn = DEFAULT_ACOLUMN;
 static char *acolumn_spaces;
 
+/* Default output style for xlat entities */
+enum xlat_style xlat_verbosity = XLAT_STYLE_ABBREV;
+
 static const char *outfname;
 /* If -ff, points to stderr. Else, it's our common output log */
 static FILE *shared_log;
+static bool open_append;
 
 struct tcb *printing_tcp;
 static struct tcb *current_tcp;
 
 static struct tcb **tcbtab;
-static unsigned int nprocs, tcbtabsize;
+static unsigned int nprocs;
+static size_t tcbtabsize;
 
 #ifndef HAVE_PROGRAM_INVOCATION_NAME
 char *program_invocation_name;
@@ -162,14 +171,16 @@ unsigned os_release; /* generated from uname()'s u.release */
 static void detach(struct tcb *tcp);
 static void cleanup(void);
 static void interrupt(int sig);
-static sigset_t start_set, blocked_set;
 
 #ifdef HAVE_SIG_ATOMIC_T
-static volatile sig_atomic_t interrupted;
+static volatile sig_atomic_t interrupted, restart_failed;
 #else
-static volatile int interrupted;
+static volatile int interrupted, restart_failed;
 #endif
 
+static sigset_t timer_set;
+static void timer_sighandler(int);
+
 #ifndef HAVE_STRERROR
 
 #if !HAVE_DECL_SYS_ERRLIST
@@ -183,7 +194,7 @@ strerror(int err_no)
        static char buf[sizeof("Unknown error %d") + sizeof(int)*3];
 
        if (err_no < 1 || err_no >= sys_nerr) {
-               sprintf(buf, "Unknown error %d", err_no);
+               xsprintf(buf, "Unknown error %d", err_no);
                return buf;
        }
        return sys_errlist[err_no];
@@ -195,9 +206,26 @@ static void
 print_version(void)
 {
        static const char features[] =
-#ifdef USE_LIBUNWIND
-               " stack-unwind"
-#endif /* USE_LIBUNWIND */
+#ifdef ENABLE_STACKTRACE
+               " stack-trace=" USE_UNWINDER
+#endif
+#ifdef USE_DEMANGLE
+               " stack-demangle"
+#endif
+#if SUPPORTED_PERSONALITIES > 1
+# if defined HAVE_M32_MPERS
+               " m32-mpers"
+# else
+               " no-m32-mpers"
+# endif
+#endif /* SUPPORTED_PERSONALITIES > 1 */
+#if SUPPORTED_PERSONALITIES > 2
+# if defined HAVE_MX32_MPERS
+               " mx32-mpers"
+# else
+               " no-mx32-mpers"
+# endif
+#endif /* SUPPORTED_PERSONALITIES > 2 */
                "";
 
        printf("%s -- version %s\n"
@@ -223,9 +251,9 @@ Output format:\n\
   -a column      alignment COLUMN for printing syscall results (default %d)\n\
   -i             print instruction pointer at time of syscall\n\
 "
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
 "\
-  -k             obtain stack trace between each syscall (experimental)\n\
+  -k             obtain stack trace between each syscall\n\
 "
 #endif
 "\
@@ -250,7 +278,11 @@ Statistics:\n\
 \n\
 Filtering:\n\
   -e expr        a qualifying expression: option=[!]all or option=[!]val1[,val2]...\n\
-     options:    trace, abbrev, verbose, raw, signal, read, write, fault\n\
+     options:    trace, abbrev, verbose, raw, signal, read, write, fault"
+#ifdef HAVE_LINUX_KVM_H
+              ", kvm"
+#endif
+"\n\
   -P path        trace accesses to path\n\
 \n\
 Tracing:\n\
@@ -293,8 +325,10 @@ die(void)
        if (strace_tracer_pid == getpid()) {
                cflag = 0;
                cleanup();
+               exit(1);
        }
-       exit(1);
+
+       _exit(1);
 }
 
 static void
@@ -308,7 +342,6 @@ static const char *ptrace_attach_cmd;
 static int
 ptrace_attach_or_seize(int pid)
 {
-#if USE_SEIZE
        int r;
        if (!use_seize)
                return ptrace_attach_cmd = "PTRACE_ATTACH",
@@ -318,10 +351,6 @@ ptrace_attach_or_seize(int pid)
                return ptrace_attach_cmd = "PTRACE_SEIZE", r;
        r = ptrace(PTRACE_INTERRUPT, pid, 0L, 0L);
        return ptrace_attach_cmd = "PTRACE_INTERRUPT", r;
-#else
-               return ptrace_attach_cmd = "PTRACE_ATTACH",
-                      ptrace(PTRACE_ATTACH, pid, 0L, 0L);
-#endif
 }
 
 /*
@@ -396,7 +425,8 @@ set_cloexec_flag(int fd)
        if (flags == newflags)
                return;
 
-       fcntl(fd, F_SETFD, newflags); /* never fails */
+       if (fcntl(fd, F_SETFD, newflags)) /* never fails */
+               perror_msg_and_die("fcntl(%d, F_SETFD, %#x)", fd, newflags);
 }
 
 static void
@@ -422,35 +452,13 @@ swap_uid(void)
        }
 }
 
-#ifdef _LARGEFILE64_SOURCE
-# ifdef HAVE_FOPEN64
-#  define fopen_for_output fopen64
-# else
-#  define fopen_for_output fopen
-# endif
-# define struct_stat struct stat64
-# define stat_file stat64
-# define struct_dirent struct dirent64
-# define read_dir readdir64
-# define struct_rlimit struct rlimit64
-# define set_rlimit setrlimit64
-#else
-# define fopen_for_output fopen
-# define struct_stat struct stat
-# define stat_file stat
-# define struct_dirent struct dirent
-# define read_dir readdir
-# define struct_rlimit struct rlimit
-# define set_rlimit setrlimit
-#endif
-
 static FILE *
 strace_fopen(const char *path)
 {
        FILE *fp;
 
        swap_uid();
-       fp = fopen_for_output(path, "w");
+       fp = fopen_stream(path, open_append ? "a" : "w");
        if (!fp)
                perror_msg_and_die("Can't fopen '%s'", path);
        swap_uid();
@@ -508,6 +516,19 @@ strace_popen(const char *command)
        return fp;
 }
 
+static void
+outf_perror(const struct tcb * const tcp)
+{
+       if (tcp->outf == stderr)
+               return;
+
+       /* This is ugly, but we don't store separate file names */
+       if (followfork >= 2)
+               perror_msg("%s.%u", outfname, tcp->pid);
+       else
+               perror_msg("%s", outfname);
+}
+
 ATTRIBUTE_FORMAT((printf, 1, 0))
 static void
 tvprintf(const char *const fmt, va_list args)
@@ -516,8 +537,7 @@ tvprintf(const char *const fmt, va_list args)
                int n = vfprintf(current_tcp->outf, fmt, args);
                if (n < 0) {
                        /* very unlikely due to vfprintf buffering */
-                       if (current_tcp->outf != stderr)
-                               perror_msg("%s", outfname);
+                       outf_perror(current_tcp);
                } else
                        current_tcp->curcol += n;
        }
@@ -546,8 +566,7 @@ tprints(const char *str)
                        return;
                }
                /* very unlikely due to fputs_unlocked buffering */
-               if (current_tcp->outf != stderr)
-                       perror_msg("%s", outfname);
+               outf_perror(current_tcp);
        }
 }
 
@@ -575,8 +594,8 @@ tprintf_comment(const char *fmt, ...)
 static void
 flush_tcp_output(const struct tcb *const tcp)
 {
-       if (fflush(tcp->outf) && tcp->outf != stderr)
-               perror_msg("%s", outfname);
+       if (fflush(tcp->outf))
+               outf_perror(tcp);
 }
 
 void
@@ -592,6 +611,16 @@ line_ended(void)
        }
 }
 
+void
+set_current_tcp(const struct tcb *tcp)
+{
+       current_tcp = (struct tcb *) tcp;
+
+       /* Sync current_personality and stuff */
+       if (current_tcp)
+               set_personality(current_tcp->currpers);
+}
+
 void
 printleader(struct tcb *tcp)
 {
@@ -602,7 +631,7 @@ printleader(struct tcb *tcp)
                printing_tcp = tcp;
 
        if (printing_tcp) {
-               current_tcp = printing_tcp;
+               set_current_tcp(printing_tcp);
                if (printing_tcp->curcol != 0 && (followfork < 2 || printing_tcp == tcp)) {
                        /*
                         * case 1: we have a shared log (i.e. not -ff), and last line
@@ -616,7 +645,7 @@ printleader(struct tcb *tcp)
        }
 
        printing_tcp = tcp;
-       current_tcp = tcp;
+       set_current_tcp(tcp);
        current_tcp->curcol = 0;
 
        if (print_pid_pfx)
@@ -625,30 +654,47 @@ printleader(struct tcb *tcp)
                tprintf("[pid %5u] ", tcp->pid);
 
        if (tflag) {
-               char str[sizeof("HH:MM:SS")];
-               struct timeval tv, dtv;
-               static struct timeval otv;
-
-               gettimeofday(&tv, NULL);
-               if (rflag) {
-                       if (otv.tv_sec == 0)
-                               otv = tv;
-                       tv_sub(&dtv, &tv, &otv);
-                       tprintf("%6ld.%06ld ",
-                               (long) dtv.tv_sec, (long) dtv.tv_usec);
-                       otv = tv;
-               } else if (tflag > 2) {
-                       tprintf("%ld.%06ld ",
-                               (long) tv.tv_sec, (long) tv.tv_usec);
+               struct timespec ts;
+               clock_gettime(CLOCK_REALTIME, &ts);
+
+               if (tflag > 2) {
+                       tprintf("%lld.%06ld ",
+                               (long long) ts.tv_sec, (long) ts.tv_nsec / 1000);
                } else {
-                       time_t local = tv.tv_sec;
-                       strftime(str, sizeof(str), "%T", localtime(&local));
+                       time_t local = ts.tv_sec;
+                       char str[MAX(sizeof("HH:MM:SS"), sizeof(ts.tv_sec) * 3)];
+                       struct tm *tm = localtime(&local);
+
+                       if (tm)
+                               strftime(str, sizeof(str), "%T", tm);
+                       else
+                               xsprintf(str, "%lld", (long long) local);
                        if (tflag > 1)
-                               tprintf("%s.%06ld ", str, (long) tv.tv_usec);
+                               tprintf("%s.%06ld ",
+                                       str, (long) ts.tv_nsec / 1000);
                        else
                                tprintf("%s ", str);
                }
        }
+
+       if (rflag) {
+               struct timespec ts;
+               clock_gettime(CLOCK_MONOTONIC, &ts);
+
+               static struct timespec ots;
+               if (ots.tv_sec == 0)
+                       ots = ts;
+
+               struct timespec dts;
+               ts_sub(&dts, &ts, &ots);
+               ots = ts;
+
+               tprintf("%s%6ld.%06ld%s ",
+                       tflag ? "(+" : "",
+                       (long) dts.tv_sec, (long) dts.tv_nsec / 1000,
+                       tflag ? ")" : "");
+       }
+
        if (iflag)
                print_pc(tcp);
 }
@@ -665,14 +711,20 @@ tabto(void)
  * may create bogus empty FILE.<nonexistant_pid>, and then die.
  */
 static void
-newoutf(struct tcb *tcp)
+after_successful_attach(struct tcb *tcp, const unsigned int flags)
 {
+       tcp->flags |= TCB_ATTACHED | TCB_STARTUP | flags;
        tcp->outf = shared_log; /* if not -ff mode, the same file is for all */
        if (followfork >= 2) {
-               char name[520 + sizeof(int) * 3];
-               sprintf(name, "%.512s.%u", outfname, tcp->pid);
+               char name[PATH_MAX];
+               xsprintf(name, "%s.%u", outfname, tcp->pid);
                tcp->outf = strace_fopen(name);
        }
+
+#ifdef ENABLE_STACKTRACE
+       if (stack_trace_enabled)
+               unwind_tcb_init(tcp);
+#endif
 }
 
 static void
@@ -683,20 +735,18 @@ expand_tcbtab(void)
           callers have pointers and it would be a pain.
           So tcbtab is a table of pointers.  Since we never
           free the TCBs, we allocate a single chunk of many.  */
-       unsigned int new_tcbtabsize, alloc_tcbtabsize;
+       size_t old_tcbtabsize;
        struct tcb *newtcbs;
+       struct tcb **tcb_ptr;
 
-       if (tcbtabsize) {
-               alloc_tcbtabsize = tcbtabsize;
-               new_tcbtabsize = tcbtabsize * 2;
-       } else {
-               new_tcbtabsize = alloc_tcbtabsize = 1;
-       }
+       old_tcbtabsize = tcbtabsize;
+
+       tcbtab = xgrowarray(tcbtab, &tcbtabsize, sizeof(tcbtab[0]));
+       newtcbs = xcalloc(tcbtabsize - old_tcbtabsize, sizeof(newtcbs[0]));
 
-       newtcbs = xcalloc(alloc_tcbtabsize, sizeof(newtcbs[0]));
-       tcbtab = xreallocarray(tcbtab, new_tcbtabsize, sizeof(tcbtab[0]));
-       while (tcbtabsize < new_tcbtabsize)
-               tcbtab[tcbtabsize++] = newtcbs++;
+       for (tcb_ptr = tcbtab + old_tcbtabsize;
+           tcb_ptr < tcbtab + tcbtabsize; tcb_ptr++, newtcbs++)
+               *tcb_ptr = newtcbs;
 }
 
 static struct tcb *
@@ -716,12 +766,6 @@ alloctcb(int pid)
 #if SUPPORTED_PERSONALITIES > 1
                        tcp->currpers = current_personality;
 #endif
-
-#ifdef USE_LIBUNWIND
-                       if (stack_trace_enabled)
-                               unwind_tcb_init(tcp);
-#endif
-
                        nprocs++;
                        debug_msg("new tcb for pid %d, active tcbs:%d",
                                  tcp->pid, nprocs);
@@ -774,12 +818,18 @@ droptcb(struct tcb *tcp)
 
        free_tcb_priv_data(tcp);
 
-#ifdef USE_LIBUNWIND
-       if (stack_trace_enabled) {
+#ifdef ENABLE_STACKTRACE
+       if (stack_trace_enabled)
                unwind_tcb_fin(tcp);
-       }
 #endif
 
+#ifdef HAVE_LINUX_KVM_H
+       kvm_vcpu_info_free(tcp);
+#endif
+
+       if (tcp->mmap_cache)
+               tcp->mmap_cache->free_fn(tcp, __func__);
+
        nprocs--;
        debug_msg("dropped tcb for pid %d, %d remain", tcp->pid, nprocs);
 
@@ -796,7 +846,7 @@ droptcb(struct tcb *tcp)
        }
 
        if (current_tcp == tcp)
-               current_tcp = NULL;
+               set_current_tcp(NULL);
        if (printing_tcp == tcp)
                printing_tcp = NULL;
 
@@ -837,14 +887,14 @@ detach(struct tcb *tcp)
        }
        if (errno != ESRCH) {
                /* Shouldn't happen. */
-               perror_msg("detach: ptrace(PTRACE_DETACH,%u)", tcp->pid);
+               perror_func_msg("ptrace(PTRACE_DETACH,%u)", tcp->pid);
                goto drop;
        }
        /* ESRCH: process is either not stopped or doesn't exist. */
        if (my_tkill(tcp->pid, 0) < 0) {
                if (errno != ESRCH)
                        /* Shouldn't happen. */
-                       perror_msg("detach: tkill(%u,0)", tcp->pid);
+                       perror_func_msg("tkill(%u,0)", tcp->pid);
                /* else: process doesn't exist. */
                goto drop;
        }
@@ -860,13 +910,13 @@ detach(struct tcb *tcp)
                if (!error)
                        goto wait_loop;
                if (errno != ESRCH)
-                       perror_msg("detach: ptrace(PTRACE_INTERRUPT,%u)", tcp->pid);
+                       perror_func_msg("ptrace(PTRACE_INTERRUPT,%u)", tcp->pid);
        } else {
                error = my_tkill(tcp->pid, SIGSTOP);
                if (!error)
                        goto wait_loop;
                if (errno != ESRCH)
-                       perror_msg("detach: tkill(%u,SIGSTOP)", tcp->pid);
+                       perror_func_msg("tkill(%u,SIGSTOP)", tcp->pid);
        }
        /* Either process doesn't exist, or some weird error. */
        goto drop;
@@ -887,7 +937,7 @@ detach(struct tcb *tcp)
                         * ^^^  WRONG! We expect this PID to exist,
                         * and want to emit a message otherwise:
                         */
-                       perror_msg("detach: waitpid(%u)", tcp->pid);
+                       perror_func_msg("waitpid(%u)", tcp->pid);
                        break;
                }
                if (!WIFSTOPPED(status)) {
@@ -1001,16 +1051,16 @@ attach_tcb(struct tcb *const tcp)
                return;
        }
 
-       tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
-       newoutf(tcp);
+       after_successful_attach(tcp, TCB_GRABBED | post_attach_sigstop);
        debug_msg("attach to pid %d (main) succeeded", tcp->pid);
 
-       char procdir[sizeof("/proc/%d/task") + sizeof(int) * 3];
+       static const char task_path[] = "/proc/%d/task";
+       char procdir[sizeof(task_path) + sizeof(int) * 3];
        DIR *dir;
        unsigned int ntid = 0, nerr = 0;
 
        if (followfork && tcp->pid != strace_child &&
-           sprintf(procdir, "/proc/%d/task", tcp->pid) > 0 &&
+           xsprintf(procdir, task_path, tcp->pid) > 0 &&
            (dir = opendir(procdir)) != NULL) {
                struct_dirent *de;
 
@@ -1029,12 +1079,10 @@ attach_tcb(struct tcb *const tcp)
                                                 ptrace_attach_cmd, tid);
                                continue;
                        }
-                       debug_msg("attach to pid %d succeeded", tid);
 
-                       struct tcb *tid_tcp = alloctcb(tid);
-                       tid_tcp->flags |= TCB_ATTACHED | TCB_STARTUP |
-                                         post_attach_sigstop;
-                       newoutf(tid_tcp);
+                       after_successful_attach(alloctcb(tid),
+                                               TCB_GRABBED | post_attach_sigstop);
+                       debug_msg("attach to pid %d succeeded", tid);
                }
 
                closedir(dir);
@@ -1058,20 +1106,11 @@ startup_attach(void)
        unsigned int tcbi;
        struct tcb *tcp;
 
-       /*
-        * Block user interruptions as we would leave the traced
-        * process stopped (process state T) if we would terminate in
-        * between PTRACE_ATTACH and wait4() on SIGSTOP.
-        * We rely on cleanup() from this point on.
-        */
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &blocked_set, NULL);
-
        if (daemonized_tracer) {
                pid_t pid = fork();
-               if (pid < 0) {
-                       perror_msg_and_die("fork");
-               }
+               if (pid < 0)
+                       perror_func_msg_and_die("fork");
+
                if (pid) { /* parent */
                        /*
                         * Wait for grandchild to attach to straced process
@@ -1106,12 +1145,8 @@ startup_attach(void)
 
                attach_tcb(tcp);
 
-               if (interactive) {
-                       sigprocmask(SIG_SETMASK, &start_set, NULL);
-                       if (interrupted)
-                               goto ret;
-                       sigprocmask(SIG_SETMASK, &blocked_set, NULL);
-               }
+               if (interrupted)
+                       return;
        } /* for each tcbtab[] */
 
        if (daemonized_tracer) {
@@ -1122,10 +1157,6 @@ startup_attach(void)
                kill(parent_pid, SIGKILL);
                strace_child = 0;
        }
-
- ret:
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &start_set, NULL);
 }
 
 /* Stack-o-phobic exec helper, in the hope to work around
@@ -1210,7 +1241,7 @@ open_dummy_desc(void)
        int fds[2];
 
        if (pipe(fds))
-               perror_msg_and_die("pipe");
+               perror_func_msg_and_die("pipe");
        close(fds[1]);
        set_cloexec_flag(fds[0]);
        return fds[0];
@@ -1348,9 +1379,9 @@ startup_child(char **argv)
 #endif
 
        pid = fork();
-       if (pid < 0) {
-               perror_msg_and_die("fork");
-       }
+       if (pid < 0)
+               perror_func_msg_and_die("fork");
+
        if ((pid != 0 && daemonized_tracer)
         || (pid == 0 && !daemonized_tracer)
        ) {
@@ -1396,18 +1427,20 @@ startup_child(char **argv)
                                kill(pid, SIGCONT);
                }
                tcp = alloctcb(pid);
-               tcp->flags |= TCB_ATTACHED | TCB_STARTUP
-                           | TCB_SKIP_DETACH_ON_FIRST_EXEC
-                           | (NOMMU_SYSTEM ? 0 : (TCB_HIDE_LOG | post_attach_sigstop));
-               newoutf(tcp);
+               after_successful_attach(tcp, TCB_SKIP_DETACH_ON_FIRST_EXEC
+                                            | (NOMMU_SYSTEM ? 0
+                                               : (TCB_HIDE_LOG
+                                                  | post_attach_sigstop)));
        } else {
                /* With -D, we are *child* here, the tracee is our parent. */
                strace_child = strace_tracer_pid;
                strace_tracer_pid = getpid();
                tcp = alloctcb(strace_child);
                tcp->flags |= TCB_SKIP_DETACH_ON_FIRST_EXEC | TCB_HIDE_LOG;
-               /* attaching will be done later, by startup_attach */
-               /* note: we don't do newoutf(tcp) here either! */
+               /*
+                * Attaching will be done later, by startup_attach.
+                * Note: we don't do after_successful_attach() here either!
+                */
 
                /* NOMMU BUG! -D mode is active, we (child) return,
                 * and we will scribble over parent's stack!
@@ -1444,7 +1477,6 @@ startup_child(char **argv)
        redirect_standard_fds();
 }
 
-#if USE_SEIZE
 static void
 test_ptrace_seize(void)
 {
@@ -1458,7 +1490,7 @@ test_ptrace_seize(void)
 
        pid = fork();
        if (pid < 0)
-               perror_msg_and_die("fork");
+               perror_func_msg_and_die("fork");
 
        if (pid == 0) {
                pause();
@@ -1485,19 +1517,15 @@ test_ptrace_seize(void)
                if (tracee_pid <= 0) {
                        if (errno == EINTR)
                                continue;
-                       perror_msg_and_die("%s: unexpected wait result %d",
-                                        __func__, tracee_pid);
+                       perror_func_msg_and_die("unexpected wait result %d",
+                                               tracee_pid);
                }
-               if (WIFSIGNALED(status)) {
+               if (WIFSIGNALED(status))
                        return;
-               }
-               error_msg_and_die("%s: unexpected wait status %#x",
-                                 __func__, status);
+
+               error_func_msg_and_die("unexpected wait status %#x", status);
        }
 }
-#else /* !USE_SEIZE */
-# define test_ptrace_seize() ((void)0)
-#endif
 
 static unsigned
 get_os_release(void)
@@ -1533,12 +1561,8 @@ get_os_release(void)
 }
 
 static void
-set_sigaction(int signo, void (*sighandler)(int), struct sigaction *oldact)
+set_sighandler(int signo, void (*sighandler)(int), struct sigaction *oldact)
 {
-       /* if signal handler is a function, add the signal to blocked_set */
-       if (sighandler != SIG_IGN && sighandler != SIG_DFL)
-               sigaddset(&blocked_set, signo);
-
        const struct sigaction sa = { .sa_handler = sighandler };
        sigaction(signo, &sa, oldact);
 }
@@ -1560,7 +1584,7 @@ init(int argc, char *argv[])
        if (!program_invocation_name || !*program_invocation_name) {
                static char name[] = "strace";
                program_invocation_name =
-                       (argv[0] && *argv[0]) ? argv[0] : name;
+                       (argc > 0 && argv[0] && *argv[0]) ? argv[0] : name;
        }
 
        strace_tracer_pid = getpid();
@@ -1577,14 +1601,20 @@ init(int argc, char *argv[])
 # error Bug in DEFAULT_QUAL_FLAGS
 #endif
        qualify("signal=all");
-       while ((c = getopt(argc, argv,
-               "+b:cCdfFhiqrtTvVwxyz"
-#ifdef USE_LIBUNWIND
-               "k"
+       while ((c = getopt(argc, argv, "+"
+#ifdef ENABLE_STACKTRACE
+           "k"
 #endif
-               "D"
-               "a:e:o:O:p:s:S:u:E:P:I:")) != EOF) {
+           "a:Ab:cCdDe:E:fFhiI:o:O:p:P:qrs:S:tTu:vVwxX:yz")) != EOF) {
                switch (c) {
+               case 'a':
+                       acolumn = string_to_uint(optarg);
+                       if (acolumn < 0)
+                               error_opt_arg(c, optarg);
+                       break;
+               case 'A':
+                       open_append = true;
+                       break;
                case 'b':
                        if (strcmp(optarg, "execve") != 0)
                                error_msg_and_die("Syscall '%s' for -b isn't supported",
@@ -1609,57 +1639,35 @@ init(int argc, char *argv[])
                case 'D':
                        daemonized_tracer = 1;
                        break;
-               case 'F':
-                       optF = 1;
+               case 'e':
+                       qualify(optarg);
+                       break;
+               case 'E':
+                       if (putenv(optarg) < 0)
+                               perror_msg_and_die("putenv");
                        break;
                case 'f':
                        followfork++;
                        break;
+               case 'F':
+                       optF = 1;
+                       break;
                case 'h':
                        usage();
                        break;
                case 'i':
                        iflag = 1;
                        break;
-               case 'q':
-                       qflag++;
-                       break;
-               case 'r':
-                       rflag = 1;
-                       break;
-               case 't':
-                       tflag++;
-                       break;
-               case 'T':
-                       Tflag = 1;
-                       break;
-               case 'w':
-                       count_wallclock = 1;
-                       break;
-               case 'x':
-                       xflag++;
-                       break;
-               case 'y':
-                       show_fd_path++;
-                       break;
-               case 'v':
-                       qualify("abbrev=none");
-                       break;
-               case 'V':
-                       print_version();
-                       exit(0);
-                       break;
-               case 'z':
-                       not_failing_only = 1;
-                       break;
-               case 'a':
-                       acolumn = string_to_uint(optarg);
-                       if (acolumn < 0)
+               case 'I':
+                       opt_intr = string_to_uint_upto(optarg, NUM_INTR_OPTS - 1);
+                       if (opt_intr <= 0)
                                error_opt_arg(c, optarg);
                        break;
-               case 'e':
-                       qualify(optarg);
+#ifdef ENABLE_STACKTRACE
+               case 'k':
+                       stack_trace_enabled = true;
                        break;
+#endif
                case 'o':
                        outfname = optarg;
                        break;
@@ -1675,6 +1683,12 @@ init(int argc, char *argv[])
                case 'P':
                        pathtrace_select(optarg);
                        break;
+               case 'q':
+                       qflag++;
+                       break;
+               case 'r':
+                       rflag = 1;
+                       break;
                case 's':
                        i = string_to_uint(optarg);
                        if (i < 0 || (unsigned int) i > -1U / 4)
@@ -1684,23 +1698,44 @@ init(int argc, char *argv[])
                case 'S':
                        set_sortby(optarg);
                        break;
+               case 't':
+                       tflag++;
+                       break;
+               case 'T':
+                       Tflag = 1;
+                       break;
                case 'u':
                        username = optarg;
                        break;
-#ifdef USE_LIBUNWIND
-               case 'k':
-                       stack_trace_enabled = true;
+               case 'v':
+                       qualify("abbrev=none");
                        break;
-#endif
-               case 'E':
-                       if (putenv(optarg) < 0)
-                               perror_msg_and_die("putenv");
+               case 'V':
+                       print_version();
+                       exit(0);
                        break;
-               case 'I':
-                       opt_intr = string_to_uint_upto(optarg, NUM_INTR_OPTS - 1);
-                       if (opt_intr <= 0)
+               case 'w':
+                       count_wallclock = 1;
+                       break;
+               case 'x':
+                       xflag++;
+                       break;
+               case 'X':
+                       if (!strcmp(optarg, "raw"))
+                               xlat_verbosity = XLAT_STYLE_RAW;
+                       else if (!strcmp(optarg, "abbrev"))
+                               xlat_verbosity = XLAT_STYLE_ABBREV;
+                       else if (!strcmp(optarg, "verbose"))
+                               xlat_verbosity = XLAT_STYLE_VERBOSE;
+                       else
                                error_opt_arg(c, optarg);
                        break;
+               case 'y':
+                       show_fd_path++;
+                       break;
+               case 'z':
+                       not_failing_only = 1;
+                       break;
                default:
                        error_msg_and_help(NULL);
                        break;
@@ -1710,16 +1745,23 @@ init(int argc, char *argv[])
        argv += optind;
        argc -= optind;
 
-       if (argc < 0 || (!argv[0] && !nprocs)) {
+       if (argc < 0 || (!nprocs && !argc)) {
                error_msg_and_help("must have PROG [ARGS] or -p PID");
        }
 
-       if (!argv[0] && daemonized_tracer) {
+       if (!argc && daemonized_tracer) {
                error_msg_and_help("PROG [ARGS] must be specified with -D");
        }
 
-       if (!followfork)
-               followfork = optF;
+       if (optF) {
+               if (followfork) {
+                       error_msg("deprecated option -F ignored");
+               } else {
+                       error_msg("option -F is deprecated, "
+                                 "please use -f instead");
+                       followfork = optF;
+               }
+       }
 
        if (followfork >= 2 && cflag) {
                error_msg_and_help("(-c or -C) and -ff are mutually exclusive");
@@ -1732,7 +1774,7 @@ init(int argc, char *argv[])
        if (cflag == CFLAG_ONLY_STATS) {
                if (iflag)
                        error_msg("-%c has no effect with -c", 'i');
-#ifdef USE_LIBUNWIND
+#ifdef ENABLE_STACKTRACE
                if (stack_trace_enabled)
                        error_msg("-%c has no effect with -c", 'k');
 #endif
@@ -1746,30 +1788,15 @@ init(int argc, char *argv[])
                        error_msg("-%c has no effect with -c", 'y');
        }
 
-       if (rflag) {
-               if (tflag > 1)
-                       error_msg("-tt has no effect with -r");
-               tflag = 1;
-       }
-
        acolumn_spaces = xmalloc(acolumn + 1);
        memset(acolumn_spaces, ' ', acolumn);
        acolumn_spaces[acolumn] = '\0';
 
-       sigprocmask(SIG_SETMASK, NULL, &start_set);
-       memcpy(&blocked_set, &start_set, sizeof(blocked_set));
-
-       set_sigaction(SIGCHLD, SIG_DFL, &params_for_tracee.child_sa);
-
-#ifdef USE_LIBUNWIND
-       if (stack_trace_enabled) {
-               unsigned int tcbi;
+       set_sighandler(SIGCHLD, SIG_DFL, &params_for_tracee.child_sa);
 
+#ifdef ENABLE_STACKTRACE
+       if (stack_trace_enabled)
                unwind_init();
-               for (tcbi = 0; tcbi < tcbtabsize; ++tcbi) {
-                       unwind_tcb_init(tcbtab[tcbi]);
-               }
-       }
 #endif
 
        /* See if they want to run as another user. */
@@ -1819,10 +1846,15 @@ init(int argc, char *argv[])
                         * when using popen, so prohibit it.
                         */
                        if (followfork >= 2)
-                               error_msg_and_help("piping the output and -ff are mutually exclusive");
+                               error_msg_and_help("piping the output and -ff "
+                                                  "are mutually exclusive");
                        shared_log = strace_popen(outfname + 1);
-               } else if (followfork < 2)
+               } else if (followfork < 2) {
                        shared_log = strace_fopen(outfname);
+               } else if (strlen(outfname) >= PATH_MAX - sizeof(int) * 3) {
+                       errno = ENAMETOOLONG;
+                       perror_msg_and_die("%s", outfname);
+               }
        } else {
                /* -ff without -o FILE is the same as single -f */
                if (followfork >= 2)
@@ -1841,7 +1873,7 @@ init(int argc, char *argv[])
         * no           1       1       INTR_WHILE_WAIT
         */
 
-       if (outfname && argv[0]) {
+       if (outfname && argc) {
                if (!opt_intr)
                        opt_intr = INTR_NEVER;
                if (!qflag)
@@ -1856,28 +1888,33 @@ init(int argc, char *argv[])
         * Also we do not need to be protected by them as during interruption
         * in the startup_child() mode we kill the spawned process anyway.
         */
-       if (argv[0]) {
+       if (argc) {
                startup_child(argv);
        }
 
-       set_sigaction(SIGTTOU, SIG_IGN, NULL);
-       set_sigaction(SIGTTIN, SIG_IGN, NULL);
+       set_sighandler(SIGTTOU, SIG_IGN, NULL);
+       set_sighandler(SIGTTIN, SIG_IGN, NULL);
        if (opt_intr != INTR_ANYWHERE) {
                if (opt_intr == INTR_BLOCK_TSTP_TOO)
-                       set_sigaction(SIGTSTP, SIG_IGN, NULL);
+                       set_sighandler(SIGTSTP, SIG_IGN, NULL);
                /*
                 * In interactive mode (if no -o OUTFILE, or -p PID is used),
-                * fatal signals are blocked while syscall stop is processed,
-                * and acted on in between, when waiting for new syscall stops.
-                * In non-interactive mode, signals are ignored.
+                * fatal signals are handled asynchronously and acted
+                * when waiting for process state changes.
+                * In non-interactive mode these signals are ignored.
                 */
-               set_sigaction(SIGHUP, interactive ? interrupt : SIG_IGN, NULL);
-               set_sigaction(SIGINT, interactive ? interrupt : SIG_IGN, NULL);
-               set_sigaction(SIGQUIT, interactive ? interrupt : SIG_IGN, NULL);
-               set_sigaction(SIGPIPE, interactive ? interrupt : SIG_IGN, NULL);
-               set_sigaction(SIGTERM, interactive ? interrupt : SIG_IGN, NULL);
+               set_sighandler(SIGHUP, interactive ? interrupt : SIG_IGN, NULL);
+               set_sighandler(SIGINT, interactive ? interrupt : SIG_IGN, NULL);
+               set_sighandler(SIGQUIT, interactive ? interrupt : SIG_IGN, NULL);
+               set_sighandler(SIGPIPE, interactive ? interrupt : SIG_IGN, NULL);
+               set_sighandler(SIGTERM, interactive ? interrupt : SIG_IGN, NULL);
        }
 
+       sigemptyset(&timer_set);
+       sigaddset(&timer_set, SIGALRM);
+       sigprocmask(SIG_BLOCK, &timer_set, NULL);
+       set_sighandler(SIGALRM, timer_sighandler, NULL);
+
        if (nprocs != 0 || daemonized_tracer)
                startup_attach();
 
@@ -1890,17 +1927,25 @@ init(int argc, char *argv[])
 }
 
 static struct tcb *
-pid2tcb(int pid)
+pid2tcb(const int pid)
 {
-       unsigned int i;
-
        if (pid <= 0)
                return NULL;
 
-       for (i = 0; i < tcbtabsize; i++) {
-               struct tcb *tcp = tcbtab[i];
+#define PID2TCB_CACHE_SIZE 1024U
+#define PID2TCB_CACHE_MASK (PID2TCB_CACHE_SIZE - 1)
+
+       static struct tcb *pid2tcb_cache[PID2TCB_CACHE_SIZE];
+       struct tcb **const ptcp = &pid2tcb_cache[pid & PID2TCB_CACHE_MASK];
+       struct tcb *tcp = *ptcp;
+
+       if (tcp && tcp->pid == pid)
+               return tcp;
+
+       for (unsigned int i = 0; i < tcbtabsize; ++i) {
+               tcp = tcbtab[i];
                if (tcp->pid == pid)
-                       return tcp;
+                       return *ptcp = tcp;
        }
 
        return NULL;
@@ -1949,17 +1994,17 @@ print_debug_info(const int pid, int status)
        strcpy(buf, "???");
        if (WIFSIGNALED(status))
 #ifdef WCOREDUMP
-               sprintf(buf, "WIFSIGNALED,%ssig=%s",
+               xsprintf(buf, "WIFSIGNALED,%ssig=%s",
                                WCOREDUMP(status) ? "core," : "",
                                signame(WTERMSIG(status)));
 #else
-               sprintf(buf, "WIFSIGNALED,sig=%s",
+               xsprintf(buf, "WIFSIGNALED,sig=%s",
                                signame(WTERMSIG(status)));
 #endif
        if (WIFEXITED(status))
-               sprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
+               xsprintf(buf, "WIFEXITED,exitcode=%u", WEXITSTATUS(status));
        if (WIFSTOPPED(status))
-               sprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
+               xsprintf(buf, "WIFSTOPPED,sig=%s", signame(WSTOPSIG(status)));
        evbuf[0] = '\0';
        if (event != 0) {
                static const char *const event_names[] = {
@@ -1976,7 +2021,7 @@ print_debug_info(const int pid, int status)
                        e = event_names[event];
                else if (event == PTRACE_EVENT_STOP)
                        e = "STOP";
-               sprintf(evbuf, ",EVENT_%s (%u)", e, event);
+               xsprintf(evbuf, ",EVENT_%s (%u)", e, event);
        }
        error_msg("[wait(0x%06x) = %u] %s%s", status, pid, buf, evbuf);
 }
@@ -2000,8 +2045,7 @@ maybe_allocate_tcb(const int pid, int status)
        if (followfork) {
                /* We assume it's a fork/vfork/clone child */
                struct tcb *tcp = alloctcb(pid);
-               tcp->flags |= TCB_ATTACHED | TCB_STARTUP | post_attach_sigstop;
-               newoutf(tcp);
+               after_successful_attach(tcp, post_attach_sigstop);
                if (!qflag)
                        error_msg("Process %d attached", pid);
                return tcp;
@@ -2142,7 +2186,7 @@ startup_tcb(struct tcb *tcp)
                }
        }
 
-       if (get_scno(tcp) == 1)
+       if ((tcp->flags & TCB_GRABBED) && (get_scno(tcp) == 1))
                tcp->s_prev_ent = tcp->s_ent;
 }
 
@@ -2156,11 +2200,11 @@ print_event_exit(struct tcb *tcp)
 
        if (followfork < 2 && printing_tcp && printing_tcp != tcp
            && printing_tcp->curcol != 0) {
-               current_tcp = printing_tcp;
+               set_current_tcp(printing_tcp);
                tprints(" <unfinished ...>\n");
                flush_tcp_output(printing_tcp);
                printing_tcp->curcol = 0;
-               current_tcp = tcp;
+               set_current_tcp(tcp);
        }
 
        if ((followfork < 2 && printing_tcp != tcp)
@@ -2177,75 +2221,18 @@ print_event_exit(struct tcb *tcp)
                 */
                tprints(" <unfinished ...>");
        }
+
+       printing_tcp = tcp;
        tprints(") ");
        tabto();
        tprints("= ?\n");
        line_ended();
 }
 
-enum trace_event {
-       /* Break the main loop. */
-       TE_BREAK,
-
-       /* Call next_event() again. */
-       TE_NEXT,
-
-       /* Restart the tracee with signal 0 and call next_event() again. */
-       TE_RESTART,
-
-       /*
-        * For all the events below, current_tcp is set to current tracee's
-        * tcb.  All the suggested actions imply that you want to continue
-        * tracing of the current tracee; alternatively, you can detach it.
-        */
-
-       /*
-        * Syscall entry or exit.
-        * Restart the tracee with signal 0, or with an injected signal number.
-        */
-       TE_SYSCALL_STOP,
-
-       /*
-        * Tracee received signal with number WSTOPSIG(*pstatus); signal info
-        * is written to *si.  Restart the tracee (with that signal number
-        * if you want to deliver it).
-        */
-       TE_SIGNAL_DELIVERY_STOP,
-
-       /*
-        * Tracee was killed by a signal with number WTERMSIG(*pstatus).
-        */
-       TE_SIGNALLED,
-
-       /*
-        * Tracee was stopped by a signal with number WSTOPSIG(*pstatus).
-        * Restart the tracee with that signal number.
-        */
-       TE_GROUP_STOP,
-
-       /*
-        * Tracee exited with status WEXITSTATUS(*pstatus).
-        */
-       TE_EXITED,
-
-       /*
-        * Tracee is going to perform execve().
-        * Restart the tracee with signal 0.
-        */
-       TE_STOP_BEFORE_EXECVE,
-
-       /*
-        * Tracee is going to terminate.
-        * Restart the tracee with signal 0.
-        */
-       TE_STOP_BEFORE_EXIT,
-};
-
 static enum trace_event
 next_event(int *pstatus, siginfo_t *si)
 {
        int pid;
-       int wait_errno;
        int status;
        struct tcb *tcp;
        struct rusage ru;
@@ -2274,12 +2261,42 @@ next_event(int *pstatus, siginfo_t *si)
                        return TE_BREAK;
        }
 
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &start_set, NULL);
+       const bool unblock_delay_timer = is_delay_timer_armed();
+
+       /*
+        * The window of opportunity to handle expirations
+        * of the delay timer opens here.
+        *
+        * Unblock the signal handler for the delay timer
+        * iff the delay timer is already created.
+        */
+       if (unblock_delay_timer)
+               sigprocmask(SIG_UNBLOCK, &timer_set, NULL);
+
+       /*
+        * If the delay timer has expired, then its expiration
+        * has been handled already by the signal handler.
+        *
+        * If the delay timer expires during wait4(),
+        * then the system call will be interrupted and
+        * the expiration will be handled by the signal handler.
+        */
        pid = wait4(-1, pstatus, __WALL, (cflag ? &ru : NULL));
-       wait_errno = errno;
-       if (interactive)
-               sigprocmask(SIG_SETMASK, &blocked_set, NULL);
+       const int wait_errno = errno;
+
+       /*
+        * The window of opportunity to handle expirations
+        * of the delay timer closes here.
+        *
+        * Block the signal handler for the delay timer
+        * iff it was unblocked earlier.
+        */
+       if (unblock_delay_timer) {
+               sigprocmask(SIG_BLOCK, &timer_set, NULL);
+
+               if (restart_failed)
+                       return TE_BREAK;
+       }
 
        if (pid < 0) {
                if (wait_errno == EINTR)
@@ -2314,14 +2331,18 @@ next_event(int *pstatus, siginfo_t *si)
                        return TE_NEXT;
        }
 
-       clear_regs();
+       clear_regs(tcp);
 
        /* Set current output file */
-       current_tcp = tcp;
+       set_current_tcp(tcp);
 
        if (cflag) {
-               tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
-               tcp->stime = ru.ru_stime;
+               struct timespec stime = {
+                       .tv_sec = ru.ru_stime.tv_sec,
+                       .tv_nsec = ru.ru_stime.tv_usec * 1000
+               };
+               ts_sub(&tcp->dtime, &stime, &tcp->stime);
+               tcp->stime = stime;
        }
 
        if (WIFSIGNALED(status))
@@ -2370,7 +2391,6 @@ next_event(int *pstatus, siginfo_t *si)
                        return stopped ? TE_GROUP_STOP : TE_SIGNAL_DELIVERY_STOP;
                }
                break;
-#if USE_SEIZE
        case PTRACE_EVENT_STOP:
                /*
                 * PTRACE_INTERRUPT-stop or group-stop.
@@ -2384,7 +2404,6 @@ next_event(int *pstatus, siginfo_t *si)
                        return TE_GROUP_STOP;
                }
                return TE_RESTART;
-#endif
        case PTRACE_EVENT_EXEC:
                return TE_STOP_BEFORE_EXECVE;
        case PTRACE_EVENT_EXIT:
@@ -2408,10 +2427,10 @@ trace_syscall(struct tcb *tcp, unsigned int *sig)
                syscall_entering_finish(tcp, res);
                return res;
        } else {
-               struct timeval tv = {};
-               int res = syscall_exiting_decode(tcp, &tv);
+               struct timespec ts = {};
+               int res = syscall_exiting_decode(tcp, &ts);
                if (res != 0) {
-                       res = syscall_exiting_trace(tcp, tv, res);
+                       res = syscall_exiting_trace(tcp, &ts, res);
                }
                syscall_exiting_finish(tcp);
                return res;
@@ -2483,6 +2502,31 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
                return true;
 
        case TE_STOP_BEFORE_EXECVE:
+               /*
+                * Check that we are inside syscall now (next event after
+                * PTRACE_EVENT_EXEC should be for syscall exiting).  If it is
+                * not the case, we might have a situation when we attach to a
+                * process and the first thing we see is a PTRACE_EVENT_EXEC
+                * and all the following syscall state tracking is screwed up
+                * otherwise.
+                */
+               if (entering(current_tcp)) {
+                       int ret;
+
+                       error_msg("Stray PTRACE_EVENT_EXEC from pid %d"
+                                 ", trying to recover...",
+                                 current_tcp->pid);
+
+                       current_tcp->flags |= TCB_RECOVERING;
+                       ret = trace_syscall(current_tcp, &restart_sig);
+                       current_tcp->flags &= ~TCB_RECOVERING;
+
+                       if (ret < 0) {
+                               /* The reason is described in TE_SYSCALL_STOP */
+                               return true;
+                       }
+               }
+
                /*
                 * Under Linux, execve changes pid to thread leader's pid,
                 * and we see this changed pid on EVENT_EXEC and later,
@@ -2499,7 +2543,8 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
                 * On 2.6 and earlier, it can return garbage.
                 */
                if (os_release >= KERNEL_VERSION(3, 0, 0))
-                       current_tcp = maybe_switch_tcbs(current_tcp, current_tcp->pid);
+                       set_current_tcp(maybe_switch_tcbs(current_tcp,
+                                                         current_tcp->pid));
 
                if (detach_on_execve) {
                        if (current_tcp->flags & TCB_SKIP_DETACH_ON_FIRST_EXEC) {
@@ -2520,6 +2565,10 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
        if (interrupted)
                return false;
 
+       /* If the process is being delayed, do not ptrace_restart just yet */
+       if (syscall_delayed(current_tcp))
+               return true;
+
        if (ptrace_restart(restart_op, current_tcp, restart_sig) < 0) {
                /* Note: ptrace_restart emitted error message */
                exit_code = 1;
@@ -2528,6 +2577,74 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si)
        return true;
 }
 
+static bool
+restart_delayed_tcb(struct tcb *const tcp)
+{
+       debug_func_msg("pid %d", tcp->pid);
+
+       tcp->flags &= ~TCB_DELAYED;
+
+       struct tcb *const prev_tcp = current_tcp;
+       current_tcp = tcp;
+       bool ret = dispatch_event(TE_RESTART, NULL, NULL);
+       current_tcp = prev_tcp;
+
+       return ret;
+}
+
+static bool
+restart_delayed_tcbs(void)
+{
+       struct tcb *tcp_next = NULL;
+       struct timespec ts_now;
+
+       clock_gettime(CLOCK_MONOTONIC, &ts_now);
+
+       for (size_t i = 0; i < tcbtabsize; i++) {
+               struct tcb *tcp = tcbtab[i];
+
+               if (tcp->pid && syscall_delayed(tcp)) {
+                       if (ts_cmp(&ts_now, &tcp->delay_expiration_time) > 0) {
+                               if (!restart_delayed_tcb(tcp))
+                                       return false;
+                       } else {
+                               /* Check whether this tcb is the next.  */
+                               if (!tcp_next ||
+                                   ts_cmp(&tcp_next->delay_expiration_time,
+                                          &tcp->delay_expiration_time) > 0) {
+                                       tcp_next = tcp;
+                               }
+                       }
+               }
+       }
+
+       if (tcp_next)
+               arm_delay_timer(tcp_next);
+
+       return true;
+}
+
+/*
+ * As this signal handler does a lot of work that is not suitable
+ * for signal handlers, extra care must be taken to ensure that
+ * it is enabled only in those places where it's safe.
+ */
+static void
+timer_sighandler(int sig)
+{
+       delay_timer_expired();
+
+       if (restart_failed)
+               return;
+
+       int saved_errno = errno;
+
+       if (!restart_delayed_tcbs())
+               restart_failed = 1;
+
+       errno = saved_errno;
+}
+
 #ifdef ENABLE_COVERAGE_GCOV
 extern void __gcov_flush(void);
 #endif
@@ -2575,6 +2692,7 @@ terminate(void)
 int
 main(int argc, char *argv[])
 {
+       setlocale(LC_ALL, "");
        init(argc, argv);
 
        exit_code = !nprocs;