From: Elvira Khabirova Date: Sun, 18 Feb 2018 20:21:48 +0000 (+0300) Subject: Implement delay injection X-Git-Tag: v4.22~66 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ba8e768a806e105da8788821eb5776d900447c0b;p=strace Implement delay injection Add -e inject=SET:delay_enter= and -e inject=SET:delay_exit= options. * configure.ac (AC_SEARCH_LIBS): Check for timer_create -lrt. * delay.c: New file. * Makefile.am (strace_SOURCES): Add it. (strace_LDADD): Add $(timer_LIBS). * defs.h (INJECT_F_DELAY_ENTER, INJECT_F_DELAY_EXIT, TCB_INJECT_DELAY_EXIT, TCB_DELAYED, inject_delay_exit, syscall_delayed): New macros. (alloc_delay_data, fill_delay_data, is_delay_timer_created, arm_delay_timer, delay_tcb): New prototypes. (struct inject_data): Replace reserved field with delay_idx. (struct tcb): Add delay_expiration_time field. * filter_qualify.c (parse_delay_token): New function. (parse_inject_token): Use it. (qualify_inject_common): Initialize struct inject_opts.data.delay_idx. * strace.c: Include (timer_jmp_buf, timer_set): New static variables. (timer_sighandler, restart_delayed_tcb, restart_delayed_tcbs): New functions. (init): Block SIGALRM, set SIGALRM handler. (dispatch_event): Do not restart delayed syscalls. (next_event): Unblock SIGALRM during wait4 invocation. * syscall.c (tamper_with_syscall_entering): Arm delay timer if INJECT_F_DELAY_ENTER injection flag is set, set TCB_INJECT_DELAY_EXIT flag if INJECT_F_DELAY_EXIT injection flag is set. tamper_with_syscall_exiting): Arm delay timer if inject_delay_exit. (syscall_exiting_trace): Call tamper_with_syscall_exiting in case of inject_delay_exit. (syscall_exiting_finish): Clear TCB_INJECT_DELAY_EXIT flag. * strace.1.in: Document delay injection. * NEWS: Mention this improvement. Co-Authored-by: Dmitry V. Levin --- diff --git a/Makefile.am b/Makefile.am index ed1cda26..6f840dd2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -64,7 +64,7 @@ include xlat/Makemodule.am strace_CPPFLAGS = $(AM_CPPFLAGS) strace_CFLAGS = $(AM_CFLAGS) strace_LDFLAGS = -strace_LDADD = libstrace.a $(clock_LIBS) +strace_LDADD = libstrace.a $(clock_LIBS) $(timer_LIBS) noinst_LIBRARIES = libstrace.a libstrace_a_CPPFLAGS = $(strace_CPPFLAGS) @@ -112,6 +112,7 @@ strace_SOURCES = \ copy_file_range.c \ count.c \ defs.h \ + delay.c \ desc.c \ dirent.c \ dirent64.c \ diff --git a/NEWS b/NEWS index ee9e2457..d65738b7 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,8 @@ Noteworthy changes in release ?.?? (????-??-??) in the output of paths associated with file descriptors. * Improvements + * Implemented delay injection (-e inject=SET:delay_enter= and + -e inject=SET:delay_exit= options). * IPv6 addresses shown in socket information in -yy mode are now printed in brackets. * Enhanced decoding of prctl and ptrace syscalls. diff --git a/configure.ac b/configure.ac index 0895cde7..4b8bc736 100644 --- a/configure.ac +++ b/configure.ac @@ -809,6 +809,16 @@ if test "x$ac_cv_lib_dl_dladdr" = xyes; then fi AC_SUBST(dl_LIBS) +saved_LIBS="$LIBS" +AC_SEARCH_LIBS([timer_create], [rt]) +LIBS="$saved_LIBS" +case "$ac_cv_search_timer_create" in + no) AC_MSG_FAILURE([failed to find timer_create]) ;; + -l*) timer_LIBS="$ac_cv_search_timer_create" ;; + *) timer_LIBS= ;; +esac +AC_SUBST(timer_LIBS) + saved_LIBS="$LIBS" AC_SEARCH_LIBS([clock_gettime], [rt]) LIBS="$saved_LIBS" diff --git a/defs.h b/defs.h index 702c7514..ef135528 100644 --- a/defs.h +++ b/defs.h @@ -173,11 +173,13 @@ typedef struct ioctlent { #define INJECT_F_SIGNAL 1 #define INJECT_F_RETVAL 2 +#define INJECT_F_DELAY_ENTER 4 +#define INJECT_F_DELAY_EXIT 8 struct inject_data { uint8_t flags; /* only 2 of 8 flags are used so far */ uint8_t signo; /* NSIG <= 128 */ - uint16_t reserved; + uint16_t delay_idx; /* index in delay_data_vec */ kernel_long_t rval; }; @@ -213,6 +215,7 @@ struct tcb { struct timespec stime; /* System time usage as of last process wait */ struct timespec dtime; /* Delta for system time usage */ struct timespec etime; /* Syscall entry time */ + struct timespec delay_expiration_time; /* When does the delay end */ struct mmap_cache_t *mmap_cache; unsigned int mmap_cache_size; @@ -250,6 +253,9 @@ struct tcb { * in the middle of a syscall */ #define TCB_RECOVERING 0x400 /* We try to recover after detecting incorrect * syscall entering/exiting state */ +#define TCB_INJECT_DELAY_EXIT 0x800 /* Current syscall needs to be delayed + on exit */ +#define TCB_DELAYED 0x1000 /* Current syscall has been delayed */ /* qualifier flags */ #define QUAL_TRACE 0x001 /* this system call should be traced */ @@ -272,6 +278,8 @@ struct tcb { #define hide_log(tcp) ((tcp)->flags & TCB_HIDE_LOG) #define syscall_tampered(tcp) ((tcp)->flags & TCB_TAMPERED) #define recovering(tcp) ((tcp)->flags & TCB_RECOVERING) +#define inject_delay_exit(tcp) ((tcp)->flags & TCB_INJECT_DELAY_EXIT) +#define syscall_delayed(tcp) ((tcp)->flags & TCB_DELAYED) #include "xlat.h" @@ -1016,6 +1024,12 @@ extern const unsigned int nsyscall_vec[SUPPORTED_PERSONALITIES]; extern const struct_sysent *const sysent_vec[SUPPORTED_PERSONALITIES]; extern struct inject_opts *inject_vec[SUPPORTED_PERSONALITIES]; +uint16_t alloc_delay_data(void); +void fill_delay_data(uint16_t delay_idx, int intval, bool isenter); +bool is_delay_timer_created(void); +void arm_delay_timer(const struct tcb *); +void delay_tcb(struct tcb *, uint16_t delay_idx, bool isenter); + #ifdef IN_MPERS_BOOTSTRAP /* Transform multi-line MPERS_PRINTER_DECL statements to one-liners. */ # define MPERS_PRINTER_DECL(type, name, ...) MPERS_PRINTER_DECL(type, name, __VA_ARGS__) diff --git a/delay.c b/delay.c new file mode 100644 index 00000000..8adf4cc5 --- /dev/null +++ b/delay.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2018 The strace developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "defs.h" + +struct inject_delay_data { + struct timespec ts_enter; + struct timespec ts_exit; +}; + +static struct inject_delay_data *delay_data_vec; +static size_t delay_data_vec_capacity; /* size of the arena */ +static size_t delay_data_vec_size; /* size of the used arena */ + +static timer_t delay_timer = (timer_t) -1; + +static void +expand_delay_data_vec(void) +{ + const size_t old_capacity = delay_data_vec_capacity; + delay_data_vec = xgrowarray(delay_data_vec, &delay_data_vec_capacity, + sizeof(*delay_data_vec)); + memset(delay_data_vec + old_capacity, 0, + (delay_data_vec_capacity - old_capacity) + * sizeof(*delay_data_vec)); +} + +uint16_t +alloc_delay_data(void) +{ + const uint16_t rval = delay_data_vec_size; + + if (rval < delay_data_vec_size) + error_func_msg_and_die("delay index overflow"); + + if (delay_data_vec_size == delay_data_vec_capacity) + expand_delay_data_vec(); + + ++delay_data_vec_size; + return rval; +} + +void +fill_delay_data(uint16_t delay_idx, int intval, bool isenter) +{ + if (delay_idx >= delay_data_vec_size) + error_func_msg_and_die("delay_idx >= delay_data_vec_size"); + + struct timespec *ts; + if (isenter) + ts = &(delay_data_vec[delay_idx].ts_enter); + else + ts = &(delay_data_vec[delay_idx].ts_exit); + + ts->tv_sec = intval / 1000000; + ts->tv_nsec = intval % 1000000 * 1000; +} + +bool +is_delay_timer_created(void) +{ + return delay_timer != (timer_t) -1; +} + +void +arm_delay_timer(const struct tcb *const tcp) +{ + const struct itimerspec its = { + .it_value = tcp->delay_expiration_time + }; + + if (timer_settime(delay_timer, TIMER_ABSTIME, &its, NULL)) + perror_msg_and_die("timer_settime"); + + debug_func_msg("timer set to %lld.%09ld for pid %d", + (long long) tcp->delay_expiration_time.tv_sec, + (long) tcp->delay_expiration_time.tv_nsec, + tcp->pid); +} + +void +delay_tcb(struct tcb *tcp, uint16_t delay_idx, bool isenter) +{ + if (delay_idx >= delay_data_vec_size) + error_func_msg_and_die("delay_idx >= delay_data_vec_size"); + + debug_func_msg("delaying pid %d on %s", + tcp->pid, isenter ? "enter" : "exit"); + tcp->flags |= TCB_DELAYED; + + struct timespec *ts_diff; + if (isenter) + ts_diff = &(delay_data_vec[delay_idx].ts_enter); + else + ts_diff = &(delay_data_vec[delay_idx].ts_exit); + + struct timespec ts_now; + clock_gettime(CLOCK_MONOTONIC, &ts_now); + ts_add(&tcp->delay_expiration_time, &ts_now, ts_diff); + + if (is_delay_timer_created()) { + struct itimerspec its; + if (timer_gettime(delay_timer, &its)) + perror_msg_and_die("timer_gettime"); + + const struct timespec *const ts_old = &its.it_value; + if (ts_nz(ts_old) && ts_cmp(ts_diff, ts_old) > 0) + return; + } else { + if (timer_create(CLOCK_MONOTONIC, NULL, &delay_timer)) + perror_msg_and_die("timer_create"); + } + + arm_delay_timer(tcp); +} diff --git a/filter_qualify.c b/filter_qualify.c index 49f0d612..f4a405cc 100644 --- a/filter_qualify.c +++ b/filter_qualify.c @@ -78,6 +78,26 @@ find_errno_by_name(const char *name) return -1; } +static bool +parse_delay_token(const char *input, struct inject_opts *fopts, bool isenter) +{ + unsigned flag = isenter ? INJECT_F_DELAY_ENTER : INJECT_F_DELAY_EXIT; + + if (fopts->data.flags & flag) /* duplicate */ + return false; + long long intval = string_to_ulonglong(input); + if (intval < 0) /* couldn't parse */ + return false; + + if (fopts->data.delay_idx == (uint16_t) -1) + fopts->data.delay_idx = alloc_delay_data(); + /* populate .ts_enter or .ts_exit */ + fill_delay_data(fopts->data.delay_idx, intval, isenter); + fopts->data.flags |= flag; + + return true; +} + static bool parse_inject_token(const char *const token, struct inject_opts *const fopts, const bool fault_tokens_only) @@ -157,6 +177,14 @@ parse_inject_token(const char *const token, struct inject_opts *const fopts, return false; fopts->data.signo = intval; fopts->data.flags |= INJECT_F_SIGNAL; + } else if (!fault_tokens_only + && (val = STR_STRIP_PREFIX(token, "delay_enter=")) != token) { + if (!parse_delay_token(val, fopts, true)) + return false; + } else if (!fault_tokens_only + && (val = STR_STRIP_PREFIX(token, "delay_exit=")) != token) { + if (!parse_delay_token(val, fopts, false)) + return false; } else { return false; } @@ -247,7 +275,10 @@ qualify_inject_common(const char *const str, { struct inject_opts opts = { .first = 1, - .step = 1 + .step = 1, + .data = { + .delay_idx = -1 + } }; char *copy = xstrdup(str); const char *name = @@ -261,7 +292,7 @@ qualify_inject_common(const char *const str, free(copy); - /* If neither of retval, error, or signal is specified, then ... */ + /* If neither of retval, error, signal or delay is specified, then ... */ if (!opts.data.flags) { if (fault_tokens_only) { /* in fault= syntax the default error code is ENOSYS. */ diff --git a/strace.1.in b/strace.1.in index b5eb772a..b0b692d3 100644 --- a/strace.1.in +++ b/strace.1.in @@ -584,14 +584,16 @@ Note that this is independent from the normal tracing of the system call which is controlled by the option .BR -e "\ " trace = write . .TP -\fB\-e\ inject\fR=\,\fIset\/\fR[:\fBerror\fR=\,\fIerrno\/\fR|:\fBretval\fR=\,\fIvalue\/\fR][:\fBsignal\fR=\,\fIsig\/\fR][:\fBwhen\fR=\,\fIexpr\/\fR] +\fB\-e\ inject\fR=\,\fIset\/\fR[:\fBerror\fR=\,\fIerrno\/\fR|:\fBretval\fR=\,\fIvalue\/\fR][:\fBsignal\fR=\,\fIsig\/\fR][:\fBdelay_enter\fR=\,\fIusecs\/\fR][:\fBdelay_exit\fR=\,\fIusecs\/\fR][:\fBwhen\fR=\,\fIexpr\/\fR] Perform syscall tampering for the specified set of syscalls. At least one of .BR error , .BR retval , +.BR signal , +.BR delay_enter , or -.B signal +.B delay_exit options has to be specified. .B error and @@ -618,14 +620,24 @@ or a numeric value within 1..\fBSIGRTMAX\fR range, that signal is delivered on entering every syscall specified by the .IR set . +If :\fBdelay_enter\fR=\,\fIusecs\/\fR or :\fBdelay_exit\fR=\,\fIusecs\/\fR +options are specified, delay injection is performed: the tracee is delayed +by at least +.IR usecs +microseconds on entering or exiting the syscall. + If :\fBsignal\fR=\,\fIsig\/\fR option is specified without -:\fBerror\fR=\,\fIerrno\/\fR or :\fBretval\fR=\,\fIvalue\/\fR options, +:\fBerror\fR=\,\fIerrno\/\fR, :\fBretval\fR=\,\fIvalue\/\fR or +:\fBdelay_{enter,exit}\fR=\,\fIusecs\/\fR options, then only a signal .I sig -is delivered without a syscall fault injection. +is delivered without a syscall fault or delay injection. Conversely, :\fBerror\fR=\,\fIerrno\/\fR or :\fBretval\fR=\,\fIvalue\/\fR option without -:\fBsignal\fR=\,\fIsig\/\fR option injects a fault without delivering a signal. +:\fBdelay_enter\fR=\,\fIusecs\/\fR, +:\fBdelay_exit\fR=\,\fIusecs\/\fR or +:\fBsignal\fR=\,\fIsig\/\fR options injects a fault without delivering a signal +or injecting a delay, etc. If both :\fBerror\fR=\,\fIerrno\/\fR or :\fBretval\fR=\,\fIvalue\/\fR and :\fBsignal\fR=\,\fIsig\/\fR options are specified, then both diff --git a/strace.c b/strace.c index e7da26f1..397f5d51 100644 --- a/strace.c +++ b/strace.c @@ -34,6 +34,7 @@ #include #include #include "ptrace.h" +#include #include #include #include @@ -172,6 +173,10 @@ static volatile sig_atomic_t interrupted; static volatile int interrupted; #endif +static jmp_buf timer_jmp_buf; +static sigset_t timer_set; +static void timer_sighandler(int); + #ifndef HAVE_STRERROR #if !HAVE_DECL_SYS_ERRLIST @@ -1881,6 +1886,11 @@ init(int argc, char *argv[]) set_sighandler(SIGTERM, interactive ? interrupt : SIG_IGN, NULL); } + sigemptyset(&timer_set); + sigaddset(&timer_set, SIGALRM); + sigprocmask(SIG_BLOCK, &timer_set, NULL); + set_sighandler(SIGALRM, timer_sighandler, NULL); + if (nprocs != 0 || daemonized_tracer) startup_attach(); @@ -2228,16 +2238,55 @@ next_event(int *pstatus, siginfo_t *si) return TE_BREAK; } + if (sigsetjmp(timer_jmp_buf, 1)) { + /* + * restart_delayed_tcbs() forwarded an error + * from dispatch_event(). + */ + return TE_BREAK; + } + + /* + * The window of opportunity to handle expirations + * of the delay timer opens here. + * + * Unblock the signal handler for the delay timer + * iff the delay timer is already created. + */ + if (is_delay_timer_created()) + sigprocmask(SIG_UNBLOCK, &timer_set, NULL); + + /* + * If the delay timer has expired, then its expiration + * has been handled already by the signal handler. + * + * If the delay timer expires during wait4(), + * then the system call will be interrupted and + * the expiration will be handled by the signal handler. + */ pid = wait4(-1, pstatus, __WALL, (cflag ? &ru : NULL)); + const int wait_errno = errno; + + /* + * The window of opportunity to handle expirations + * of the delay timer closes here. + * + * Block the signal handler for the delay timer + * iff the delay timer is already created. + */ + if (is_delay_timer_created()) + sigprocmask(SIG_BLOCK, &timer_set, NULL); + if (pid < 0) { - if (errno == EINTR) + if (wait_errno == EINTR) return TE_NEXT; - if (nprocs == 0 && errno == ECHILD) + if (nprocs == 0 && wait_errno == ECHILD) return TE_BREAK; /* * If nprocs > 0, ECHILD is not expected, * treat it as any other error here: */ + errno = wait_errno; perror_msg_and_die("wait4(__WALL)"); } @@ -2495,6 +2544,10 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si) if (interrupted) return false; + /* If the process is being delayed, do not ptrace_restart just yet */ + if (syscall_delayed(current_tcp)) + return true; + if (ptrace_restart(restart_op, current_tcp, restart_sig) < 0) { /* Note: ptrace_restart emitted error message */ exit_code = 1; @@ -2503,6 +2556,69 @@ dispatch_event(enum trace_event ret, int *pstatus, siginfo_t *si) return true; } +static bool +restart_delayed_tcb(struct tcb *const tcp) +{ + debug_func_msg("pid %d", tcp->pid); + + tcp->flags &= ~TCB_DELAYED; + + struct tcb *const prev_tcp = current_tcp; + current_tcp = tcp; + bool ret = dispatch_event(TE_RESTART, NULL, NULL); + current_tcp = prev_tcp; + + return ret; +} + +static bool +restart_delayed_tcbs(void) +{ + struct tcb *tcp_next = NULL; + struct timespec ts_now; + + clock_gettime(CLOCK_MONOTONIC, &ts_now); + + for (size_t i = 0; i < tcbtabsize; i++) { + struct tcb *tcp = tcbtab[i]; + + if (tcp->pid && syscall_delayed(tcp)) { + if (ts_cmp(&ts_now, &tcp->delay_expiration_time) > 0) { + if (!restart_delayed_tcb(tcp)) + return false; + } else { + /* Check whether this tcb is the next. */ + if (!tcp_next || + ts_cmp(&tcp_next->delay_expiration_time, + &tcp->delay_expiration_time) > 0) { + tcp_next = tcp; + } + } + } + } + + if (tcp_next) + arm_delay_timer(tcp_next); + + return true; +} + +/* + * As this signal handler does a lot of work that is not suitable + * for signal handlers, extra care must be taken to ensure that + * it is enabled only in those places where it's safe. + */ +static void +timer_sighandler(int sig) +{ + int saved_errno = errno; + + if (!restart_delayed_tcbs()) + siglongjmp(timer_jmp_buf, 1); + + errno = saved_errno; +} + #ifdef ENABLE_COVERAGE_GCOV extern void __gcov_flush(void); #endif diff --git a/syscall.c b/syscall.c index 75e15b6e..9f1f8642 100644 --- a/syscall.c +++ b/syscall.c @@ -544,6 +544,10 @@ tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo) if (opts->data.flags & INJECT_F_RETVAL && !arch_set_scno(tcp, -1)) tcp->flags |= TCB_TAMPERED; + if (opts->data.flags & INJECT_F_DELAY_ENTER) + delay_tcb(tcp, opts->data.delay_idx, true); + if (opts->data.flags & INJECT_F_DELAY_EXIT) + tcp->flags |= TCB_INJECT_DELAY_EXIT; } return 0; @@ -552,6 +556,16 @@ tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo) static long tamper_with_syscall_exiting(struct tcb *tcp) { + struct inject_opts *opts = tcb_inject_opts(tcp); + if (!opts) + return 0; + + if (inject_delay_exit(tcp)) + delay_tcb(tcp, opts->data.delay_idx, false); + + if (!syscall_tampered(tcp)) + return 0; + if (!syserror(tcp)) { error_msg("Failed to tamper with process %d: got no error " "(return value %#" PRI_klx ")", @@ -560,12 +574,8 @@ tamper_with_syscall_exiting(struct tcb *tcp) return 1; } - struct inject_opts *opts = tcb_inject_opts(tcp); bool update_tcb = false; - if (!opts) - return 0; - if (opts->data.rval >= 0) { kernel_long_t u_rval = tcp->u_rval; @@ -747,7 +757,7 @@ syscall_exiting_decode(struct tcb *tcp, struct timespec *pts) int syscall_exiting_trace(struct tcb *tcp, struct timespec *ts, int res) { - if (syscall_tampered(tcp)) + if (syscall_tampered(tcp) || inject_delay_exit(tcp)) tamper_with_syscall_exiting(tcp); if (cflag) { @@ -960,7 +970,7 @@ syscall_exiting_trace(struct tcb *tcp, struct timespec *ts, int res) void syscall_exiting_finish(struct tcb *tcp) { - tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED); + tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED | TCB_INJECT_DELAY_EXIT); tcp->sys_func_rval = 0; free_tcb_priv_data(tcp); }