]> granicus.if.org Git - strace/blob - syscall.c
Revert "Implement -e trace=%sched option"
[strace] / syscall.c
1 /*
2  * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3  * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4  * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5  * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6  * Copyright (c) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
7  *                     Linux for s390 port by D.J. Barrow
8  *                    <barrow_dj@mail.yahoo.com,djbarrow@de.ibm.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The name of the author may not be used to endorse or promote products
20  *    derived from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include "defs.h"
35 #include "native_defs.h"
36 #include "nsig.h"
37 #include <sys/param.h>
38
39 /* for struct iovec */
40 #include <sys/uio.h>
41
42 /* for __X32_SYSCALL_BIT */
43 #include <asm/unistd.h>
44
45 #include "regs.h"
46 #include "ptrace.h"
47
48 #if defined(SPARC64)
49 # undef PTRACE_GETREGS
50 # define PTRACE_GETREGS PTRACE_GETREGS64
51 # undef PTRACE_SETREGS
52 # define PTRACE_SETREGS PTRACE_SETREGS64
53 #endif
54
55 #if defined SPARC64
56 # include <asm/psrcompat.h>
57 #elif defined SPARC
58 # include <asm/psr.h>
59 #endif
60
61 #ifdef IA64
62 # include <asm/rse.h>
63 #endif
64
65 #ifndef NT_PRSTATUS
66 # define NT_PRSTATUS 1
67 #endif
68
69 #include "syscall.h"
70
71 /* Define these shorthand notations to simplify the syscallent files. */
72 #define TD TRACE_DESC
73 #define TF TRACE_FILE
74 #define TI TRACE_IPC
75 #define TN TRACE_NETWORK
76 #define TP TRACE_PROCESS
77 #define TS TRACE_SIGNAL
78 #define TM TRACE_MEMORY
79 #define TSF TRACE_STATFS
80 #define NF SYSCALL_NEVER_FAILS
81 #define MA MAX_ARGS
82 #define SI STACKTRACE_INVALIDATE_CACHE
83 #define SE STACKTRACE_CAPTURE_ON_ENTER
84 #define CST COMPAT_SYSCALL_TYPES
85
86 #define SEN(syscall_name) SEN_ ## syscall_name, SYS_FUNC_NAME(sys_ ## syscall_name)
87
88 const struct_sysent sysent0[] = {
89 #include "syscallent.h"
90 };
91
92 #if SUPPORTED_PERSONALITIES > 1
93 # include PERSONALITY1_INCLUDE_FUNCS
94 static const struct_sysent sysent1[] = {
95 # include "syscallent1.h"
96 };
97 #endif
98
99 #if SUPPORTED_PERSONALITIES > 2
100 # include PERSONALITY2_INCLUDE_FUNCS
101 static const struct_sysent sysent2[] = {
102 # include "syscallent2.h"
103 };
104 #endif
105
106 /* Now undef them since short defines cause wicked namespace pollution. */
107 #undef SEN
108 #undef TD
109 #undef TF
110 #undef TI
111 #undef TN
112 #undef TP
113 #undef TS
114 #undef TM
115 #undef TSF
116 #undef NF
117 #undef MA
118 #undef SI
119 #undef SE
120 #undef CST
121
122 /*
123  * `ioctlent[012].h' files are automatically generated by the auxiliary
124  * program `ioctlsort', such that the list is sorted by the `code' field.
125  * This has the side-effect of resolving the _IO.. macros into
126  * plain integers, eliminating the need to include here everything
127  * in "/usr/include".
128  */
129
130 const char *const errnoent0[] = {
131 #include "errnoent.h"
132 };
133 const char *const signalent0[] = {
134 #include "signalent.h"
135 };
136 const struct_ioctlent ioctlent0[] = {
137 #include "ioctlent0.h"
138 };
139
140 #if SUPPORTED_PERSONALITIES > 1
141 static const char *const errnoent1[] = {
142 # include "errnoent1.h"
143 };
144 static const char *const signalent1[] = {
145 # include "signalent1.h"
146 };
147 static const struct_ioctlent ioctlent1[] = {
148 # include "ioctlent1.h"
149 };
150 # include PERSONALITY0_INCLUDE_PRINTERS_DECLS
151 static const struct_printers printers0 = {
152 # include PERSONALITY0_INCLUDE_PRINTERS_DEFS
153 };
154 # include PERSONALITY1_INCLUDE_PRINTERS_DECLS
155 static const struct_printers printers1 = {
156 # include PERSONALITY1_INCLUDE_PRINTERS_DEFS
157 };
158 #endif
159
160 #if SUPPORTED_PERSONALITIES > 2
161 static const char *const errnoent2[] = {
162 # include "errnoent2.h"
163 };
164 static const char *const signalent2[] = {
165 # include "signalent2.h"
166 };
167 static const struct_ioctlent ioctlent2[] = {
168 # include "ioctlent2.h"
169 };
170 # include PERSONALITY2_INCLUDE_PRINTERS_DECLS
171 static const struct_printers printers2 = {
172 # include PERSONALITY2_INCLUDE_PRINTERS_DEFS
173 };
174 #endif
175
176 enum {
177         nsyscalls0 = ARRAY_SIZE(sysent0)
178 #if SUPPORTED_PERSONALITIES > 1
179         , nsyscalls1 = ARRAY_SIZE(sysent1)
180 # if SUPPORTED_PERSONALITIES > 2
181         , nsyscalls2 = ARRAY_SIZE(sysent2)
182 # endif
183 #endif
184 };
185
186 enum {
187         nerrnos0 = ARRAY_SIZE(errnoent0)
188 #if SUPPORTED_PERSONALITIES > 1
189         , nerrnos1 = ARRAY_SIZE(errnoent1)
190 # if SUPPORTED_PERSONALITIES > 2
191         , nerrnos2 = ARRAY_SIZE(errnoent2)
192 # endif
193 #endif
194 };
195
196 enum {
197         nsignals0 = ARRAY_SIZE(signalent0)
198 #if SUPPORTED_PERSONALITIES > 1
199         , nsignals1 = ARRAY_SIZE(signalent1)
200 # if SUPPORTED_PERSONALITIES > 2
201         , nsignals2 = ARRAY_SIZE(signalent2)
202 # endif
203 #endif
204 };
205
206 enum {
207         nioctlents0 = ARRAY_SIZE(ioctlent0)
208 #if SUPPORTED_PERSONALITIES > 1
209         , nioctlents1 = ARRAY_SIZE(ioctlent1)
210 # if SUPPORTED_PERSONALITIES > 2
211         , nioctlents2 = ARRAY_SIZE(ioctlent2)
212 # endif
213 #endif
214 };
215
216 #if SUPPORTED_PERSONALITIES > 1
217 const struct_sysent *sysent = sysent0;
218 const char *const *errnoent = errnoent0;
219 const char *const *signalent = signalent0;
220 const struct_ioctlent *ioctlent = ioctlent0;
221 const struct_printers *printers = &printers0;
222 #endif
223
224 unsigned nsyscalls = nsyscalls0;
225 unsigned nerrnos = nerrnos0;
226 unsigned nsignals = nsignals0;
227 unsigned nioctlents = nioctlents0;
228
229 const unsigned int nsyscall_vec[SUPPORTED_PERSONALITIES] = {
230         nsyscalls0,
231 #if SUPPORTED_PERSONALITIES > 1
232         nsyscalls1,
233 #endif
234 #if SUPPORTED_PERSONALITIES > 2
235         nsyscalls2,
236 #endif
237 };
238 const struct_sysent *const sysent_vec[SUPPORTED_PERSONALITIES] = {
239         sysent0,
240 #if SUPPORTED_PERSONALITIES > 1
241         sysent1,
242 #endif
243 #if SUPPORTED_PERSONALITIES > 2
244         sysent2,
245 #endif
246 };
247
248 #if SUPPORTED_PERSONALITIES > 1
249 unsigned current_personality;
250
251 # ifndef current_wordsize
252 unsigned current_wordsize;
253 static const int personality_wordsize[SUPPORTED_PERSONALITIES] = {
254         PERSONALITY0_WORDSIZE,
255         PERSONALITY1_WORDSIZE,
256 # if SUPPORTED_PERSONALITIES > 2
257         PERSONALITY2_WORDSIZE,
258 # endif
259 };
260 # endif
261
262 # ifndef current_klongsize
263 unsigned current_klongsize;
264 static const int personality_klongsize[SUPPORTED_PERSONALITIES] = {
265         PERSONALITY0_KLONGSIZE,
266         PERSONALITY1_KLONGSIZE,
267 #  if SUPPORTED_PERSONALITIES > 2
268         PERSONALITY2_KLONGSIZE,
269 #  endif
270 };
271 # endif
272
273 void
274 set_personality(int personality)
275 {
276         nsyscalls = nsyscall_vec[personality];
277         sysent = sysent_vec[personality];
278
279         switch (personality) {
280         case 0:
281                 errnoent = errnoent0;
282                 nerrnos = nerrnos0;
283                 ioctlent = ioctlent0;
284                 nioctlents = nioctlents0;
285                 signalent = signalent0;
286                 nsignals = nsignals0;
287                 printers = &printers0;
288                 break;
289
290         case 1:
291                 errnoent = errnoent1;
292                 nerrnos = nerrnos1;
293                 ioctlent = ioctlent1;
294                 nioctlents = nioctlents1;
295                 signalent = signalent1;
296                 nsignals = nsignals1;
297                 printers = &printers1;
298                 break;
299
300 # if SUPPORTED_PERSONALITIES > 2
301         case 2:
302                 errnoent = errnoent2;
303                 nerrnos = nerrnos2;
304                 ioctlent = ioctlent2;
305                 nioctlents = nioctlents2;
306                 signalent = signalent2;
307                 nsignals = nsignals2;
308                 printers = &printers2;
309                 break;
310 # endif
311         }
312
313         current_personality = personality;
314 # ifndef current_wordsize
315         current_wordsize = personality_wordsize[personality];
316 # endif
317 # ifndef current_klongsize
318         current_klongsize = personality_klongsize[personality];
319 # endif
320 }
321
322 static void
323 update_personality(struct tcb *tcp, unsigned int personality)
324 {
325         if (personality == current_personality)
326                 return;
327         set_personality(personality);
328
329         if (personality == tcp->currpers)
330                 return;
331         tcp->currpers = personality;
332
333 # undef PERSONALITY_NAMES
334 # if defined POWERPC64
335 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
336 # elif defined X86_64
337 #  define PERSONALITY_NAMES {"64 bit", "32 bit", "x32"}
338 # elif defined X32
339 #  define PERSONALITY_NAMES {"x32", "32 bit"}
340 # elif defined AARCH64
341 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
342 # elif defined TILE
343 #  define PERSONALITY_NAMES {"64-bit", "32-bit"}
344 # endif
345 # ifdef PERSONALITY_NAMES
346         if (!qflag) {
347                 static const char *const names[] = PERSONALITY_NAMES;
348                 error_msg("[ Process PID=%d runs in %s mode. ]",
349                           tcp->pid, names[personality]);
350         }
351 # endif
352 }
353 #endif
354
355 #ifdef SYS_socket_subcall
356 static void
357 decode_socket_subcall(struct tcb *tcp)
358 {
359         const int call = tcp->u_arg[0];
360
361         if (call < 1 || call >= SYS_socket_nsubcalls)
362                 return;
363
364         const kernel_ulong_t scno = SYS_socket_subcall + call;
365         const unsigned int nargs = sysent[scno].nargs;
366         uint64_t buf[nargs];
367
368         if (umoven(tcp, tcp->u_arg[1], nargs * current_wordsize, buf) < 0)
369                 return;
370
371         tcp->scno = scno;
372         tcp->qual_flg = qual_flags(scno);
373         tcp->s_ent = &sysent[scno];
374
375         unsigned int i;
376         for (i = 0; i < nargs; ++i)
377                 tcp->u_arg[i] = (sizeof(uint32_t) == current_wordsize)
378                                 ? ((uint32_t *) (void *) buf)[i] : buf[i];
379 }
380 #endif
381
382 #ifdef SYS_ipc_subcall
383 static void
384 decode_ipc_subcall(struct tcb *tcp)
385 {
386         unsigned int call = tcp->u_arg[0];
387         const unsigned int version = call >> 16;
388
389         if (version) {
390 # if defined S390 || defined S390X
391                 return;
392 # else
393 #  ifdef SPARC64
394                 if (current_wordsize == 8)
395                         return;
396 #  endif
397                 set_tcb_priv_ulong(tcp, version);
398                 call &= 0xffff;
399 # endif
400         }
401
402         switch (call) {
403                 case  1: case  2: case  3: case  4:
404                 case 11: case 12: case 13: case 14:
405                 case 21: case 22: case 23: case 24:
406                         break;
407                 default:
408                         return;
409         }
410
411         tcp->scno = SYS_ipc_subcall + call;
412         tcp->qual_flg = qual_flags(tcp->scno);
413         tcp->s_ent = &sysent[tcp->scno];
414
415         const unsigned int n = tcp->s_ent->nargs;
416         unsigned int i;
417         for (i = 0; i < n; i++)
418                 tcp->u_arg[i] = tcp->u_arg[i + 1];
419 }
420 #endif
421
422 #ifdef LINUX_MIPSO32
423 static void
424 decode_mips_subcall(struct tcb *tcp)
425 {
426         if (!scno_is_valid(tcp->u_arg[0]))
427                 return;
428         tcp->scno = tcp->u_arg[0];
429         tcp->qual_flg = qual_flags(tcp->scno);
430         tcp->s_ent = &sysent[tcp->scno];
431         memmove(&tcp->u_arg[0], &tcp->u_arg[1],
432                 sizeof(tcp->u_arg) - sizeof(tcp->u_arg[0]));
433         /*
434          * Fetching the last arg of 7-arg syscalls (fadvise64_64
435          * and sync_file_range) requires additional code,
436          * see linux/mips/get_syscall_args.c
437          */
438         if (tcp->s_ent->nargs == MAX_ARGS) {
439                 if (umoven(tcp,
440                            mips_REG_SP + MAX_ARGS * sizeof(tcp->u_arg[0]),
441                            sizeof(tcp->u_arg[0]),
442                            &tcp->u_arg[MAX_ARGS - 1]) < 0)
443                 tcp->u_arg[MAX_ARGS - 1] = 0;
444         }
445 }
446 #endif /* LINUX_MIPSO32 */
447
448 static void
449 dumpio(struct tcb *tcp)
450 {
451         if (syserror(tcp))
452                 return;
453
454         int fd = tcp->u_arg[0];
455         if (fd < 0)
456                 return;
457
458         if (is_number_in_set(fd, &read_set)) {
459                 switch (tcp->s_ent->sen) {
460                 case SEN_read:
461                 case SEN_pread:
462                 case SEN_recv:
463                 case SEN_recvfrom:
464                 case SEN_mq_timedreceive:
465                         dumpstr(tcp, tcp->u_arg[1], tcp->u_rval);
466                         return;
467                 case SEN_readv:
468                 case SEN_preadv:
469                 case SEN_preadv2:
470                         dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1],
471                                      tcp->u_rval);
472                         return;
473                 case SEN_recvmsg:
474                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], tcp->u_rval);
475                         return;
476                 case SEN_recvmmsg:
477                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
478                         return;
479                 }
480         }
481         if (is_number_in_set(fd, &write_set)) {
482                 switch (tcp->s_ent->sen) {
483                 case SEN_write:
484                 case SEN_pwrite:
485                 case SEN_send:
486                 case SEN_sendto:
487                 case SEN_mq_timedsend:
488                         dumpstr(tcp, tcp->u_arg[1], tcp->u_arg[2]);
489                         break;
490                 case SEN_writev:
491                 case SEN_pwritev:
492                 case SEN_pwritev2:
493                 case SEN_vmsplice:
494                         dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1], -1);
495                         break;
496                 case SEN_sendmsg:
497                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], -1);
498                         break;
499                 case SEN_sendmmsg:
500                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
501                         break;
502                 }
503         }
504 }
505
506 /*
507  * Shuffle syscall numbers so that we don't have huge gaps in syscall table.
508  * The shuffling should be an involution: shuffle_scno(shuffle_scno(n)) == n.
509  */
510 static kernel_ulong_t
511 shuffle_scno(kernel_ulong_t scno)
512 {
513 #if defined(ARM) || defined(AARCH64) /* So far only 32-bit ARM needs this */
514         if (scno < ARM_FIRST_SHUFFLED_SYSCALL)
515                 return scno;
516
517         /* __ARM_NR_cmpxchg? Swap with LAST_ORDINARY+1 */
518         if (scno == ARM_FIRST_SHUFFLED_SYSCALL)
519                 return 0x000ffff0;
520         if (scno == 0x000ffff0)
521                 return ARM_FIRST_SHUFFLED_SYSCALL;
522
523 # define ARM_SECOND_SHUFFLED_SYSCALL (ARM_FIRST_SHUFFLED_SYSCALL + 1)
524         /*
525          * Is it ARM specific syscall?
526          * Swap [0x000f0000, 0x000f0000 + LAST_SPECIAL] range
527          * with [SECOND_SHUFFLED, SECOND_SHUFFLED + LAST_SPECIAL] range.
528          */
529         if (scno >= 0x000f0000 &&
530             scno <= 0x000f0000 + ARM_LAST_SPECIAL_SYSCALL) {
531                 return scno - 0x000f0000 + ARM_SECOND_SHUFFLED_SYSCALL;
532         }
533         if (scno <= ARM_SECOND_SHUFFLED_SYSCALL + ARM_LAST_SPECIAL_SYSCALL) {
534                 return scno + 0x000f0000 - ARM_SECOND_SHUFFLED_SYSCALL;
535         }
536 #endif /* ARM || AARCH64 */
537
538         return scno;
539 }
540
541 const char *
542 err_name(unsigned long err)
543 {
544         if ((err < nerrnos) && errnoent[err])
545                 return errnoent[err];
546
547         return NULL;
548 }
549
550 static long get_regs_error;
551
552 void
553 clear_regs(void)
554 {
555         get_regs_error = -1;
556 }
557
558 static void get_regs(pid_t pid);
559 static int get_syscall_args(struct tcb *);
560 static int get_syscall_result(struct tcb *);
561 static int arch_get_scno(struct tcb *tcp);
562 static int arch_set_scno(struct tcb *, kernel_ulong_t);
563 static void get_error(struct tcb *, const bool);
564 static int arch_set_error(struct tcb *);
565 static int arch_set_success(struct tcb *);
566
567 struct inject_opts *inject_vec[SUPPORTED_PERSONALITIES];
568
569 static struct inject_opts *
570 tcb_inject_opts(struct tcb *tcp)
571 {
572         return (scno_in_range(tcp->scno) && tcp->inject_vec[current_personality])
573                ? &tcp->inject_vec[current_personality][tcp->scno] : NULL;
574 }
575
576
577 static long
578 tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo)
579 {
580         if (!tcp->inject_vec[current_personality]) {
581                 tcp->inject_vec[current_personality] =
582                         xcalloc(nsyscalls, sizeof(**inject_vec));
583                 memcpy(tcp->inject_vec[current_personality],
584                        inject_vec[current_personality],
585                        nsyscalls * sizeof(**inject_vec));
586         }
587
588         struct inject_opts *opts = tcb_inject_opts(tcp);
589
590         if (!opts || opts->first == 0)
591                 return 0;
592
593         --opts->first;
594
595         if (opts->first != 0)
596                 return 0;
597
598         opts->first = opts->step;
599
600         if (opts->signo > 0)
601                 *signo = opts->signo;
602         if (opts->rval != INJECT_OPTS_RVAL_DEFAULT && !arch_set_scno(tcp, -1))
603                 tcp->flags |= TCB_TAMPERED;
604
605         return 0;
606 }
607
608 static long
609 tamper_with_syscall_exiting(struct tcb *tcp)
610 {
611         struct inject_opts *opts = tcb_inject_opts(tcp);
612
613         if (!opts)
614                 return 0;
615
616         if (opts->rval >= 0) {
617                 kernel_long_t u_rval = tcp->u_rval;
618
619                 tcp->u_rval = opts->rval;
620                 if (arch_set_success(tcp)) {
621                         tcp->u_rval = u_rval;
622                 } else {
623                         tcp->u_error = 0;
624                 }
625         } else {
626                 unsigned long new_error = -opts->rval;
627
628                 if (new_error != tcp->u_error && new_error <= MAX_ERRNO_VALUE) {
629                         unsigned long u_error = tcp->u_error;
630
631                         tcp->u_error = new_error;
632                         if (arch_set_error(tcp)) {
633                                 tcp->u_error = u_error;
634                         }
635                 }
636         }
637
638         return 0;
639 }
640
641 static int
642 trace_syscall_entering(struct tcb *tcp, unsigned int *sig)
643 {
644         int res = get_scno(tcp);
645         if (res == 0)
646                 return res;
647
648         int scno_good = res;
649         if (res == 1)
650                 res = get_syscall_args(tcp);
651
652         if (res != 1) {
653                 printleader(tcp);
654                 tprintf("%s(", scno_good == 1 ? tcp->s_ent->sys_name : "????");
655                 /*
656                  * " <unavailable>" will be added later by the code which
657                  * detects ptrace errors.
658                  */
659                 goto ret;
660         }
661
662 #ifdef LINUX_MIPSO32
663         if (SEN_syscall == tcp->s_ent->sen)
664                 decode_mips_subcall(tcp);
665 #endif
666
667 #if defined(SYS_socket_subcall) || defined(SYS_ipc_subcall)
668         switch (tcp->s_ent->sen) {
669 # ifdef SYS_socket_subcall
670                 case SEN_socketcall:
671                         decode_socket_subcall(tcp);
672                         break;
673 # endif
674 # ifdef SYS_ipc_subcall
675                 case SEN_ipc:
676                         decode_ipc_subcall(tcp);
677                         break;
678 # endif
679         }
680 #endif
681
682         /* Restrain from fault injection while the trace executes strace code. */
683         if (hide_log(tcp)) {
684                 tcp->qual_flg &= ~QUAL_INJECT;
685         }
686
687         switch (tcp->s_ent->sen) {
688                 case SEN_execve:
689                 case SEN_execveat:
690 #if defined SPARC || defined SPARC64
691                 case SEN_execv:
692 #endif
693                         tcp->flags &= ~TCB_HIDE_LOG;
694                         break;
695         }
696
697         if (!(tcp->qual_flg & QUAL_TRACE)
698          || (tracing_paths && !pathtrace_match(tcp))
699         ) {
700                 tcp->flags |= TCB_INSYSCALL | TCB_FILTERED;
701                 tcp->sys_func_rval = 0;
702                 return 0;
703         }
704
705         tcp->flags &= ~TCB_FILTERED;
706
707         if (hide_log(tcp)) {
708                 res = 0;
709                 goto ret;
710         }
711
712         if (tcp->qual_flg & QUAL_INJECT)
713                 tamper_with_syscall_entering(tcp, sig);
714
715         if (cflag == CFLAG_ONLY_STATS) {
716                 res = 0;
717                 goto ret;
718         }
719
720 #ifdef USE_LIBUNWIND
721         if (stack_trace_enabled) {
722                 if (tcp->s_ent->sys_flags & STACKTRACE_CAPTURE_ON_ENTER)
723                         unwind_capture_stacktrace(tcp);
724         }
725 #endif
726
727         printleader(tcp);
728         tprintf("%s(", tcp->s_ent->sys_name);
729         if (tcp->qual_flg & QUAL_RAW)
730                 res = printargs(tcp);
731         else
732                 res = tcp->s_ent->sys_func(tcp);
733
734         fflush(tcp->outf);
735  ret:
736         tcp->flags |= TCB_INSYSCALL;
737         tcp->sys_func_rval = res;
738         /* Measure the entrance time as late as possible to avoid errors. */
739         if (Tflag || cflag)
740                 gettimeofday(&tcp->etime, NULL);
741         return res;
742 }
743
744 static bool
745 syscall_tampered(struct tcb *tcp)
746 {
747         return tcp->flags & TCB_TAMPERED;
748 }
749
750 static int
751 trace_syscall_exiting(struct tcb *tcp)
752 {
753         struct timeval tv;
754
755         /* Measure the exit time as early as possible to avoid errors. */
756         if ((Tflag || cflag) && !(filtered(tcp) || hide_log(tcp)))
757                 gettimeofday(&tv, NULL);
758
759 #ifdef USE_LIBUNWIND
760         if (stack_trace_enabled) {
761                 if (tcp->s_ent->sys_flags & STACKTRACE_INVALIDATE_CACHE)
762                         unwind_cache_invalidate(tcp);
763         }
764 #endif
765
766         if (filtered(tcp) || hide_log(tcp))
767                 goto ret;
768
769         get_regs(tcp->pid);
770 #if SUPPORTED_PERSONALITIES > 1
771         update_personality(tcp, tcp->currpers);
772 #endif
773         int res = (get_regs_error ? -1 : get_syscall_result(tcp));
774
775         if (syserror(tcp) && syscall_tampered(tcp))
776                 tamper_with_syscall_exiting(tcp);
777
778         if (cflag) {
779                 count_syscall(tcp, &tv);
780                 if (cflag == CFLAG_ONLY_STATS) {
781                         goto ret;
782                 }
783         }
784
785         /* If not in -ff mode, and printing_tcp != tcp,
786          * then the log currently does not end with output
787          * of _our syscall entry_, but with something else.
788          * We need to say which syscall's return is this.
789          *
790          * Forced reprinting via TCB_REPRINT is used only by
791          * "strace -ff -oLOG test/threaded_execve" corner case.
792          * It's the only case when -ff mode needs reprinting.
793          */
794         if ((followfork < 2 && printing_tcp != tcp) || (tcp->flags & TCB_REPRINT)) {
795                 tcp->flags &= ~TCB_REPRINT;
796                 printleader(tcp);
797                 tprintf("<... %s resumed> ", tcp->s_ent->sys_name);
798         }
799         printing_tcp = tcp;
800
801         tcp->s_prev_ent = NULL;
802         if (res != 1) {
803                 /* There was error in one of prior ptrace ops */
804                 tprints(") ");
805                 tabto();
806                 tprints("= ? <unavailable>\n");
807                 line_ended();
808                 tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED);
809                 tcp->sys_func_rval = 0;
810                 free_tcb_priv_data(tcp);
811                 return res;
812         }
813         tcp->s_prev_ent = tcp->s_ent;
814
815         int sys_res = 0;
816         if (tcp->qual_flg & QUAL_RAW) {
817                 /* sys_res = printargs(tcp); - but it's nop on sysexit */
818         } else {
819         /* FIXME: not_failing_only (IOW, option -z) is broken:
820          * failure of syscall is known only after syscall return.
821          * Thus we end up with something like this on, say, ENOENT:
822          *     open("doesnt_exist", O_RDONLY <unfinished ...>
823          *     {next syscall decode}
824          * whereas the intended result is that open(...) line
825          * is not shown at all.
826          */
827                 if (not_failing_only && tcp->u_error)
828                         goto ret;       /* ignore failed syscalls */
829                 if (tcp->sys_func_rval & RVAL_DECODED)
830                         sys_res = tcp->sys_func_rval;
831                 else
832                         sys_res = tcp->s_ent->sys_func(tcp);
833         }
834
835         tprints(") ");
836         tabto();
837         unsigned long u_error = tcp->u_error;
838
839         if (tcp->qual_flg & QUAL_RAW) {
840                 if (u_error) {
841                         tprintf("= -1 (errno %lu)", u_error);
842                 } else {
843                         tprintf("= %#" PRI_klx, tcp->u_rval);
844                 }
845                 if (syscall_tampered(tcp))
846                         tprints(" (INJECTED)");
847         }
848         else if (!(sys_res & RVAL_NONE) && u_error) {
849                 const char *u_error_str;
850
851                 switch (u_error) {
852                 /* Blocked signals do not interrupt any syscalls.
853                  * In this case syscalls don't return ERESTARTfoo codes.
854                  *
855                  * Deadly signals set to SIG_DFL interrupt syscalls
856                  * and kill the process regardless of which of the codes below
857                  * is returned by the interrupted syscall.
858                  * In some cases, kernel forces a kernel-generated deadly
859                  * signal to be unblocked and set to SIG_DFL (and thus cause
860                  * death) if it is blocked or SIG_IGNed: for example, SIGSEGV
861                  * or SIGILL. (The alternative is to leave process spinning
862                  * forever on the faulty instruction - not useful).
863                  *
864                  * SIG_IGNed signals and non-deadly signals set to SIG_DFL
865                  * (for example, SIGCHLD, SIGWINCH) interrupt syscalls,
866                  * but kernel will always restart them.
867                  */
868                 case ERESTARTSYS:
869                         /* Most common type of signal-interrupted syscall exit code.
870                          * The system call will be restarted with the same arguments
871                          * if SA_RESTART is set; otherwise, it will fail with EINTR.
872                          */
873                         tprints("= ? ERESTARTSYS (To be restarted if SA_RESTART is set)");
874                         break;
875                 case ERESTARTNOINTR:
876                         /* Rare. For example, fork() returns this if interrupted.
877                          * SA_RESTART is ignored (assumed set): the restart is unconditional.
878                          */
879                         tprints("= ? ERESTARTNOINTR (To be restarted)");
880                         break;
881                 case ERESTARTNOHAND:
882                         /* pause(), rt_sigsuspend() etc use this code.
883                          * SA_RESTART is ignored (assumed not set):
884                          * syscall won't restart (will return EINTR instead)
885                          * even after signal with SA_RESTART set. However,
886                          * after SIG_IGN or SIG_DFL signal it will restart
887                          * (thus the name "restart only if has no handler").
888                          */
889                         tprints("= ? ERESTARTNOHAND (To be restarted if no handler)");
890                         break;
891                 case ERESTART_RESTARTBLOCK:
892                         /* Syscalls like nanosleep(), poll() which can't be
893                          * restarted with their original arguments use this
894                          * code. Kernel will execute restart_syscall() instead,
895                          * which changes arguments before restarting syscall.
896                          * SA_RESTART is ignored (assumed not set) similarly
897                          * to ERESTARTNOHAND. (Kernel can't honor SA_RESTART
898                          * since restart data is saved in "restart block"
899                          * in task struct, and if signal handler uses a syscall
900                          * which in turn saves another such restart block,
901                          * old data is lost and restart becomes impossible)
902                          */
903                         tprints("= ? ERESTART_RESTARTBLOCK (Interrupted by signal)");
904                         break;
905                 default:
906                         u_error_str = err_name(u_error);
907                         if (u_error_str)
908                                 tprintf("= -1 %s (%s)",
909                                         u_error_str, strerror(u_error));
910                         else
911                                 tprintf("= -1 %lu (%s)",
912                                         u_error, strerror(u_error));
913                         break;
914                 }
915                 if (syscall_tampered(tcp))
916                         tprints(" (INJECTED)");
917                 if ((sys_res & RVAL_STR) && tcp->auxstr)
918                         tprintf(" (%s)", tcp->auxstr);
919         }
920         else {
921                 if (sys_res & RVAL_NONE)
922                         tprints("= ?");
923                 else {
924                         switch (sys_res & RVAL_MASK) {
925                         case RVAL_HEX:
926 #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG
927                                 if (current_wordsize < sizeof(tcp->u_rval)) {
928                                         tprintf("= %#x",
929                                                 (unsigned int) tcp->u_rval);
930                                 } else
931 #endif
932                                 {
933                                         tprintf("= %#" PRI_klx, tcp->u_rval);
934                                 }
935                                 break;
936                         case RVAL_OCTAL:
937                                 tprints("= ");
938                                 print_numeric_long_umask(tcp->u_rval);
939                                 break;
940                         case RVAL_UDECIMAL:
941 #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG
942                                 if (current_wordsize < sizeof(tcp->u_rval)) {
943                                         tprintf("= %u",
944                                                 (unsigned int) tcp->u_rval);
945                                 } else
946 #endif
947                                 {
948                                         tprintf("= %" PRI_klu, tcp->u_rval);
949                                 }
950                                 break;
951                         case RVAL_DECIMAL:
952                                 tprintf("= %" PRI_kld, tcp->u_rval);
953                                 break;
954                         case RVAL_FD:
955                                 if (show_fd_path) {
956                                         tprints("= ");
957                                         printfd(tcp, tcp->u_rval);
958                                 }
959                                 else
960                                         tprintf("= %" PRI_kld, tcp->u_rval);
961                                 break;
962                         default:
963                                 error_msg("invalid rval format");
964                                 break;
965                         }
966                 }
967                 if ((sys_res & RVAL_STR) && tcp->auxstr)
968                         tprintf(" (%s)", tcp->auxstr);
969                 if (syscall_tampered(tcp))
970                         tprints(" (INJECTED)");
971         }
972         if (Tflag) {
973                 tv_sub(&tv, &tv, &tcp->etime);
974                 tprintf(" <%ld.%06ld>",
975                         (long) tv.tv_sec, (long) tv.tv_usec);
976         }
977         tprints("\n");
978         dumpio(tcp);
979         line_ended();
980
981 #ifdef USE_LIBUNWIND
982         if (stack_trace_enabled)
983                 unwind_print_stacktrace(tcp);
984 #endif
985
986  ret:
987         tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED);
988         tcp->sys_func_rval = 0;
989         free_tcb_priv_data(tcp);
990         return 0;
991 }
992
993 int
994 trace_syscall(struct tcb *tcp, unsigned int *signo)
995 {
996         return exiting(tcp) ?
997                 trace_syscall_exiting(tcp) : trace_syscall_entering(tcp, signo);
998 }
999
1000 bool
1001 is_erestart(struct tcb *tcp)
1002 {
1003         switch (tcp->u_error) {
1004                 case ERESTARTSYS:
1005                 case ERESTARTNOINTR:
1006                 case ERESTARTNOHAND:
1007                 case ERESTART_RESTARTBLOCK:
1008                         return true;
1009                 default:
1010                         return false;
1011         }
1012 }
1013
1014 static unsigned long saved_u_error;
1015
1016 void
1017 temporarily_clear_syserror(struct tcb *tcp)
1018 {
1019         saved_u_error = tcp->u_error;
1020         tcp->u_error = 0;
1021 }
1022
1023 void
1024 restore_cleared_syserror(struct tcb *tcp)
1025 {
1026         tcp->u_error = saved_u_error;
1027 }
1028
1029 /*
1030  * Check the syscall return value register value for whether it is
1031  * a negated errno code indicating an error, or a success return value.
1032  */
1033 static inline bool
1034 is_negated_errno(kernel_ulong_t val)
1035 {
1036         /* Linux kernel defines MAX_ERRNO to 4095. */
1037         kernel_ulong_t max = -(kernel_long_t) 4095;
1038
1039 #ifndef current_klongsize
1040         if (current_klongsize < sizeof(val)) {
1041                 val = (uint32_t) val;
1042                 max = (uint32_t) max;
1043         }
1044 #endif /* !current_klongsize */
1045
1046         return val >= max;
1047 }
1048
1049 #include "arch_regs.c"
1050
1051 #ifdef HAVE_GETRVAL2
1052 # include "arch_getrval2.c"
1053 #endif
1054
1055 void
1056 print_pc(struct tcb *tcp)
1057 {
1058 #if defined ARCH_PC_REG
1059 # define ARCH_GET_PC 0
1060 #elif defined ARCH_PC_PEEK_ADDR
1061         kernel_ulong_t pc;
1062 # define ARCH_PC_REG pc
1063 # define ARCH_GET_PC upeek(tcp->pid, ARCH_PC_PEEK_ADDR, &pc)
1064 #else
1065 # error Neither ARCH_PC_REG nor ARCH_PC_PEEK_ADDR is defined
1066 #endif
1067         get_regs(tcp->pid);
1068         if (get_regs_error || ARCH_GET_PC)
1069                 tprints(current_wordsize == 4 ? "[????????] "
1070                                               : "[????????????????] ");
1071         else
1072                 tprintf(current_wordsize == 4
1073                         ? "[%08" PRI_klx "] " : "[%016" PRI_klx "] ",
1074                         (kernel_ulong_t) ARCH_PC_REG);
1075 }
1076
1077 #include "getregs_old.h"
1078
1079 #undef ptrace_getregset_or_getregs
1080 #undef ptrace_setregset_or_setregs
1081 #ifdef ARCH_REGS_FOR_GETREGSET
1082
1083 # define ptrace_getregset_or_getregs ptrace_getregset
1084 static long
1085 ptrace_getregset(pid_t pid)
1086 {
1087 # ifdef ARCH_IOVEC_FOR_GETREGSET
1088         /* variable iovec */
1089         ARCH_IOVEC_FOR_GETREGSET.iov_len = sizeof(ARCH_REGS_FOR_GETREGSET);
1090         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS,
1091                       &ARCH_IOVEC_FOR_GETREGSET);
1092 # else
1093         /* constant iovec */
1094         static struct iovec io = {
1095                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1096                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1097         };
1098         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &io);
1099
1100 # endif
1101 }
1102
1103 # ifndef HAVE_GETREGS_OLD
1104 #  define ptrace_setregset_or_setregs ptrace_setregset
1105 static int
1106 ptrace_setregset(pid_t pid)
1107 {
1108 #  ifdef ARCH_IOVEC_FOR_GETREGSET
1109         /* variable iovec */
1110         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS,
1111                       &ARCH_IOVEC_FOR_GETREGSET);
1112 #  else
1113         /* constant iovec */
1114         static struct iovec io = {
1115                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1116                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1117         };
1118         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &io);
1119 #  endif
1120 }
1121 # endif /* !HAVE_GETREGS_OLD */
1122
1123 #elif defined ARCH_REGS_FOR_GETREGS
1124
1125 # define ptrace_getregset_or_getregs ptrace_getregs
1126 static long
1127 ptrace_getregs(pid_t pid)
1128 {
1129 # if defined SPARC || defined SPARC64
1130         /* SPARC systems have the meaning of data and addr reversed */
1131         return ptrace(PTRACE_GETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1132 # else
1133         return ptrace(PTRACE_GETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1134 # endif
1135 }
1136
1137 # ifndef HAVE_GETREGS_OLD
1138 #  define ptrace_setregset_or_setregs ptrace_setregs
1139 static int
1140 ptrace_setregs(pid_t pid)
1141 {
1142 #  if defined SPARC || defined SPARC64
1143         /* SPARC systems have the meaning of data and addr reversed */
1144         return ptrace(PTRACE_SETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1145 #  else
1146         return ptrace(PTRACE_SETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1147 #  endif
1148 }
1149 # endif /* !HAVE_GETREGS_OLD */
1150
1151 #endif /* ARCH_REGS_FOR_GETREGSET || ARCH_REGS_FOR_GETREGS */
1152
1153 static void
1154 get_regs(pid_t pid)
1155 {
1156 #undef USE_GET_SYSCALL_RESULT_REGS
1157 #ifdef ptrace_getregset_or_getregs
1158
1159         if (get_regs_error != -1)
1160                 return;
1161
1162 # ifdef HAVE_GETREGS_OLD
1163         /*
1164          * Try PTRACE_GETREGSET/PTRACE_GETREGS first,
1165          * fallback to getregs_old.
1166          */
1167         static int use_getregs_old;
1168         if (use_getregs_old < 0) {
1169                 get_regs_error = ptrace_getregset_or_getregs(pid);
1170                 return;
1171         } else if (use_getregs_old == 0) {
1172                 get_regs_error = ptrace_getregset_or_getregs(pid);
1173                 if (get_regs_error >= 0) {
1174                         use_getregs_old = -1;
1175                         return;
1176                 }
1177                 if (errno == EPERM || errno == ESRCH)
1178                         return;
1179                 use_getregs_old = 1;
1180         }
1181         get_regs_error = getregs_old(pid);
1182 # else /* !HAVE_GETREGS_OLD */
1183         /* Assume that PTRACE_GETREGSET/PTRACE_GETREGS works. */
1184         get_regs_error = ptrace_getregset_or_getregs(pid);
1185 # endif /* !HAVE_GETREGS_OLD */
1186
1187 #else /* !ptrace_getregset_or_getregs */
1188
1189 # define USE_GET_SYSCALL_RESULT_REGS 1
1190 # warning get_regs is not implemented for this architecture yet
1191         get_regs_error = 0;
1192
1193 #endif /* !ptrace_getregset_or_getregs */
1194 }
1195
1196 #ifdef ptrace_setregset_or_setregs
1197 static int
1198 set_regs(pid_t pid)
1199 {
1200         return ptrace_setregset_or_setregs(pid);
1201 }
1202 #endif /* ptrace_setregset_or_setregs */
1203
1204 struct sysent_buf {
1205         struct tcb *tcp;
1206         struct_sysent ent;
1207         char buf[sizeof("syscall_%lu") + sizeof(kernel_ulong_t) * 3];
1208 };
1209
1210 static void
1211 free_sysent_buf(void *ptr)
1212 {
1213         struct sysent_buf *s = ptr;
1214         s->tcp->s_prev_ent = s->tcp->s_ent = NULL;
1215         free(ptr);
1216 }
1217
1218 /*
1219  * Returns:
1220  * 0: "ignore this ptrace stop", bail out of trace_syscall_entering() silently.
1221  * 1: ok, continue in trace_syscall_entering().
1222  * other: error, trace_syscall_entering() should print error indicator
1223  *    ("????" etc) and bail out.
1224  */
1225 int
1226 get_scno(struct tcb *tcp)
1227 {
1228         get_regs(tcp->pid);
1229
1230         if (get_regs_error)
1231                 return -1;
1232
1233         int rc = arch_get_scno(tcp);
1234         if (rc != 1)
1235                 return rc;
1236
1237         if (scno_is_valid(tcp->scno)) {
1238                 tcp->s_ent = &sysent[tcp->scno];
1239                 tcp->qual_flg = qual_flags(tcp->scno);
1240         } else {
1241                 struct sysent_buf *s = xcalloc(1, sizeof(*s));
1242
1243                 s->tcp = tcp;
1244                 s->ent.nargs = MAX_ARGS;
1245                 s->ent.sen = SEN_printargs;
1246                 s->ent.sys_func = printargs;
1247                 s->ent.sys_name = s->buf;
1248                 sprintf(s->buf, "syscall_%" PRI_klu, shuffle_scno(tcp->scno));
1249
1250                 tcp->s_ent = &s->ent;
1251                 tcp->qual_flg = QUAL_RAW | DEFAULT_QUAL_FLAGS;
1252
1253                 set_tcb_priv_data(tcp, s, free_sysent_buf);
1254
1255                 if (debug_flag)
1256                         error_msg("pid %d invalid syscall %" PRI_kld,
1257                                   tcp->pid, tcp->scno);
1258         }
1259         return 1;
1260 }
1261
1262 #ifdef USE_GET_SYSCALL_RESULT_REGS
1263 static int get_syscall_result_regs(struct tcb *);
1264 #endif
1265
1266 /* Returns:
1267  * 1: ok, continue in trace_syscall_exiting().
1268  * -1: error, trace_syscall_exiting() should print error indicator
1269  *    ("????" etc) and bail out.
1270  */
1271 static int
1272 get_syscall_result(struct tcb *tcp)
1273 {
1274 #ifdef USE_GET_SYSCALL_RESULT_REGS
1275         if (get_syscall_result_regs(tcp))
1276                 return -1;
1277 #endif
1278         tcp->u_error = 0;
1279         get_error(tcp, !(tcp->s_ent->sys_flags & SYSCALL_NEVER_FAILS));
1280
1281         return 1;
1282 }
1283
1284 #include "get_scno.c"
1285 #include "set_scno.c"
1286 #include "get_syscall_args.c"
1287 #ifdef USE_GET_SYSCALL_RESULT_REGS
1288 # include "get_syscall_result.c"
1289 #endif
1290 #include "get_error.c"
1291 #include "set_error.c"
1292 #ifdef HAVE_GETREGS_OLD
1293 # include "getregs_old.c"
1294 #endif
1295
1296 const char *
1297 syscall_name(kernel_ulong_t scno)
1298 {
1299 #if defined X32_PERSONALITY_NUMBER && defined __X32_SYSCALL_BIT
1300         if (current_personality == X32_PERSONALITY_NUMBER)
1301                 scno &= ~__X32_SYSCALL_BIT;
1302 #endif
1303         return scno_is_valid(scno) ? sysent[scno].sys_name: NULL;
1304 }