]> granicus.if.org Git - strace/blob - syscall.c
Implement -e trace=%stat option
[strace] / syscall.c
1 /*
2  * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3  * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4  * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5  * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6  * Copyright (c) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
7  *                     Linux for s390 port by D.J. Barrow
8  *                    <barrow_dj@mail.yahoo.com,djbarrow@de.ibm.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The name of the author may not be used to endorse or promote products
20  *    derived from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include "defs.h"
35 #include "native_defs.h"
36 #include "nsig.h"
37 #include <sys/param.h>
38
39 /* for struct iovec */
40 #include <sys/uio.h>
41
42 /* for __X32_SYSCALL_BIT */
43 #include <asm/unistd.h>
44
45 #include "regs.h"
46 #include "ptrace.h"
47
48 #if defined(SPARC64)
49 # undef PTRACE_GETREGS
50 # define PTRACE_GETREGS PTRACE_GETREGS64
51 # undef PTRACE_SETREGS
52 # define PTRACE_SETREGS PTRACE_SETREGS64
53 #endif
54
55 #if defined SPARC64
56 # include <asm/psrcompat.h>
57 #elif defined SPARC
58 # include <asm/psr.h>
59 #endif
60
61 #ifdef IA64
62 # include <asm/rse.h>
63 #endif
64
65 #ifndef NT_PRSTATUS
66 # define NT_PRSTATUS 1
67 #endif
68
69 #include "syscall.h"
70
71 /* Define these shorthand notations to simplify the syscallent files. */
72 #define TD TRACE_DESC
73 #define TF TRACE_FILE
74 #define TI TRACE_IPC
75 #define TN TRACE_NETWORK
76 #define TP TRACE_PROCESS
77 #define TS TRACE_SIGNAL
78 #define TM TRACE_MEMORY
79 #define TST TRACE_STAT
80 #define TLST TRACE_LSTAT
81 #define TFST TRACE_FSTAT
82 #define TSTA TRACE_STAT_LIKE
83 #define TSF TRACE_STATFS
84 #define TFSF TRACE_FSTATFS
85 #define TSFA TRACE_STATFS_LIKE
86 #define NF SYSCALL_NEVER_FAILS
87 #define MA MAX_ARGS
88 #define SI STACKTRACE_INVALIDATE_CACHE
89 #define SE STACKTRACE_CAPTURE_ON_ENTER
90 #define CST COMPAT_SYSCALL_TYPES
91
92 #define SEN(syscall_name) SEN_ ## syscall_name, SYS_FUNC_NAME(sys_ ## syscall_name)
93
94 const struct_sysent sysent0[] = {
95 #include "syscallent.h"
96 };
97
98 #if SUPPORTED_PERSONALITIES > 1
99 # include PERSONALITY1_INCLUDE_FUNCS
100 static const struct_sysent sysent1[] = {
101 # include "syscallent1.h"
102 };
103 #endif
104
105 #if SUPPORTED_PERSONALITIES > 2
106 # include PERSONALITY2_INCLUDE_FUNCS
107 static const struct_sysent sysent2[] = {
108 # include "syscallent2.h"
109 };
110 #endif
111
112 /* Now undef them since short defines cause wicked namespace pollution. */
113 #undef SEN
114 #undef TD
115 #undef TF
116 #undef TI
117 #undef TN
118 #undef TP
119 #undef TS
120 #undef TM
121 #undef TST
122 #undef TLST
123 #undef TFST
124 #undef TSTA
125 #undef TSF
126 #undef TFSF
127 #undef TSFA
128 #undef NF
129 #undef MA
130 #undef SI
131 #undef SE
132 #undef CST
133
134 /*
135  * `ioctlent[012].h' files are automatically generated by the auxiliary
136  * program `ioctlsort', such that the list is sorted by the `code' field.
137  * This has the side-effect of resolving the _IO.. macros into
138  * plain integers, eliminating the need to include here everything
139  * in "/usr/include".
140  */
141
142 const char *const errnoent0[] = {
143 #include "errnoent.h"
144 };
145 const char *const signalent0[] = {
146 #include "signalent.h"
147 };
148 const struct_ioctlent ioctlent0[] = {
149 #include "ioctlent0.h"
150 };
151
152 #if SUPPORTED_PERSONALITIES > 1
153 static const char *const errnoent1[] = {
154 # include "errnoent1.h"
155 };
156 static const char *const signalent1[] = {
157 # include "signalent1.h"
158 };
159 static const struct_ioctlent ioctlent1[] = {
160 # include "ioctlent1.h"
161 };
162 # include PERSONALITY0_INCLUDE_PRINTERS_DECLS
163 static const struct_printers printers0 = {
164 # include PERSONALITY0_INCLUDE_PRINTERS_DEFS
165 };
166 # include PERSONALITY1_INCLUDE_PRINTERS_DECLS
167 static const struct_printers printers1 = {
168 # include PERSONALITY1_INCLUDE_PRINTERS_DEFS
169 };
170 #endif
171
172 #if SUPPORTED_PERSONALITIES > 2
173 static const char *const errnoent2[] = {
174 # include "errnoent2.h"
175 };
176 static const char *const signalent2[] = {
177 # include "signalent2.h"
178 };
179 static const struct_ioctlent ioctlent2[] = {
180 # include "ioctlent2.h"
181 };
182 # include PERSONALITY2_INCLUDE_PRINTERS_DECLS
183 static const struct_printers printers2 = {
184 # include PERSONALITY2_INCLUDE_PRINTERS_DEFS
185 };
186 #endif
187
188 enum {
189         nsyscalls0 = ARRAY_SIZE(sysent0)
190 #if SUPPORTED_PERSONALITIES > 1
191         , nsyscalls1 = ARRAY_SIZE(sysent1)
192 # if SUPPORTED_PERSONALITIES > 2
193         , nsyscalls2 = ARRAY_SIZE(sysent2)
194 # endif
195 #endif
196 };
197
198 enum {
199         nerrnos0 = ARRAY_SIZE(errnoent0)
200 #if SUPPORTED_PERSONALITIES > 1
201         , nerrnos1 = ARRAY_SIZE(errnoent1)
202 # if SUPPORTED_PERSONALITIES > 2
203         , nerrnos2 = ARRAY_SIZE(errnoent2)
204 # endif
205 #endif
206 };
207
208 enum {
209         nsignals0 = ARRAY_SIZE(signalent0)
210 #if SUPPORTED_PERSONALITIES > 1
211         , nsignals1 = ARRAY_SIZE(signalent1)
212 # if SUPPORTED_PERSONALITIES > 2
213         , nsignals2 = ARRAY_SIZE(signalent2)
214 # endif
215 #endif
216 };
217
218 enum {
219         nioctlents0 = ARRAY_SIZE(ioctlent0)
220 #if SUPPORTED_PERSONALITIES > 1
221         , nioctlents1 = ARRAY_SIZE(ioctlent1)
222 # if SUPPORTED_PERSONALITIES > 2
223         , nioctlents2 = ARRAY_SIZE(ioctlent2)
224 # endif
225 #endif
226 };
227
228 #if SUPPORTED_PERSONALITIES > 1
229 const struct_sysent *sysent = sysent0;
230 const char *const *errnoent = errnoent0;
231 const char *const *signalent = signalent0;
232 const struct_ioctlent *ioctlent = ioctlent0;
233 const struct_printers *printers = &printers0;
234 #endif
235
236 unsigned nsyscalls = nsyscalls0;
237 unsigned nerrnos = nerrnos0;
238 unsigned nsignals = nsignals0;
239 unsigned nioctlents = nioctlents0;
240
241 const unsigned int nsyscall_vec[SUPPORTED_PERSONALITIES] = {
242         nsyscalls0,
243 #if SUPPORTED_PERSONALITIES > 1
244         nsyscalls1,
245 #endif
246 #if SUPPORTED_PERSONALITIES > 2
247         nsyscalls2,
248 #endif
249 };
250 const struct_sysent *const sysent_vec[SUPPORTED_PERSONALITIES] = {
251         sysent0,
252 #if SUPPORTED_PERSONALITIES > 1
253         sysent1,
254 #endif
255 #if SUPPORTED_PERSONALITIES > 2
256         sysent2,
257 #endif
258 };
259
260 #if SUPPORTED_PERSONALITIES > 1
261 unsigned current_personality;
262
263 # ifndef current_wordsize
264 unsigned current_wordsize;
265 static const int personality_wordsize[SUPPORTED_PERSONALITIES] = {
266         PERSONALITY0_WORDSIZE,
267         PERSONALITY1_WORDSIZE,
268 # if SUPPORTED_PERSONALITIES > 2
269         PERSONALITY2_WORDSIZE,
270 # endif
271 };
272 # endif
273
274 # ifndef current_klongsize
275 unsigned current_klongsize;
276 static const int personality_klongsize[SUPPORTED_PERSONALITIES] = {
277         PERSONALITY0_KLONGSIZE,
278         PERSONALITY1_KLONGSIZE,
279 #  if SUPPORTED_PERSONALITIES > 2
280         PERSONALITY2_KLONGSIZE,
281 #  endif
282 };
283 # endif
284
285 void
286 set_personality(int personality)
287 {
288         nsyscalls = nsyscall_vec[personality];
289         sysent = sysent_vec[personality];
290
291         switch (personality) {
292         case 0:
293                 errnoent = errnoent0;
294                 nerrnos = nerrnos0;
295                 ioctlent = ioctlent0;
296                 nioctlents = nioctlents0;
297                 signalent = signalent0;
298                 nsignals = nsignals0;
299                 printers = &printers0;
300                 break;
301
302         case 1:
303                 errnoent = errnoent1;
304                 nerrnos = nerrnos1;
305                 ioctlent = ioctlent1;
306                 nioctlents = nioctlents1;
307                 signalent = signalent1;
308                 nsignals = nsignals1;
309                 printers = &printers1;
310                 break;
311
312 # if SUPPORTED_PERSONALITIES > 2
313         case 2:
314                 errnoent = errnoent2;
315                 nerrnos = nerrnos2;
316                 ioctlent = ioctlent2;
317                 nioctlents = nioctlents2;
318                 signalent = signalent2;
319                 nsignals = nsignals2;
320                 printers = &printers2;
321                 break;
322 # endif
323         }
324
325         current_personality = personality;
326 # ifndef current_wordsize
327         current_wordsize = personality_wordsize[personality];
328 # endif
329 # ifndef current_klongsize
330         current_klongsize = personality_klongsize[personality];
331 # endif
332 }
333
334 static void
335 update_personality(struct tcb *tcp, unsigned int personality)
336 {
337         if (personality == current_personality)
338                 return;
339         set_personality(personality);
340
341         if (personality == tcp->currpers)
342                 return;
343         tcp->currpers = personality;
344
345 # undef PERSONALITY_NAMES
346 # if defined POWERPC64
347 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
348 # elif defined X86_64
349 #  define PERSONALITY_NAMES {"64 bit", "32 bit", "x32"}
350 # elif defined X32
351 #  define PERSONALITY_NAMES {"x32", "32 bit"}
352 # elif defined AARCH64
353 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
354 # elif defined TILE
355 #  define PERSONALITY_NAMES {"64-bit", "32-bit"}
356 # endif
357 # ifdef PERSONALITY_NAMES
358         if (!qflag) {
359                 static const char *const names[] = PERSONALITY_NAMES;
360                 error_msg("[ Process PID=%d runs in %s mode. ]",
361                           tcp->pid, names[personality]);
362         }
363 # endif
364 }
365 #endif
366
367 #ifdef SYS_socket_subcall
368 static void
369 decode_socket_subcall(struct tcb *tcp)
370 {
371         const int call = tcp->u_arg[0];
372
373         if (call < 1 || call >= SYS_socket_nsubcalls)
374                 return;
375
376         const kernel_ulong_t scno = SYS_socket_subcall + call;
377         const unsigned int nargs = sysent[scno].nargs;
378         uint64_t buf[nargs];
379
380         if (umoven(tcp, tcp->u_arg[1], nargs * current_wordsize, buf) < 0)
381                 return;
382
383         tcp->scno = scno;
384         tcp->qual_flg = qual_flags(scno);
385         tcp->s_ent = &sysent[scno];
386
387         unsigned int i;
388         for (i = 0; i < nargs; ++i)
389                 tcp->u_arg[i] = (sizeof(uint32_t) == current_wordsize)
390                                 ? ((uint32_t *) (void *) buf)[i] : buf[i];
391 }
392 #endif
393
394 #ifdef SYS_ipc_subcall
395 static void
396 decode_ipc_subcall(struct tcb *tcp)
397 {
398         unsigned int call = tcp->u_arg[0];
399         const unsigned int version = call >> 16;
400
401         if (version) {
402 # if defined S390 || defined S390X
403                 return;
404 # else
405 #  ifdef SPARC64
406                 if (current_wordsize == 8)
407                         return;
408 #  endif
409                 set_tcb_priv_ulong(tcp, version);
410                 call &= 0xffff;
411 # endif
412         }
413
414         switch (call) {
415                 case  1: case  2: case  3: case  4:
416                 case 11: case 12: case 13: case 14:
417                 case 21: case 22: case 23: case 24:
418                         break;
419                 default:
420                         return;
421         }
422
423         tcp->scno = SYS_ipc_subcall + call;
424         tcp->qual_flg = qual_flags(tcp->scno);
425         tcp->s_ent = &sysent[tcp->scno];
426
427         const unsigned int n = tcp->s_ent->nargs;
428         unsigned int i;
429         for (i = 0; i < n; i++)
430                 tcp->u_arg[i] = tcp->u_arg[i + 1];
431 }
432 #endif
433
434 #ifdef LINUX_MIPSO32
435 static void
436 decode_mips_subcall(struct tcb *tcp)
437 {
438         if (!scno_is_valid(tcp->u_arg[0]))
439                 return;
440         tcp->scno = tcp->u_arg[0];
441         tcp->qual_flg = qual_flags(tcp->scno);
442         tcp->s_ent = &sysent[tcp->scno];
443         memmove(&tcp->u_arg[0], &tcp->u_arg[1],
444                 sizeof(tcp->u_arg) - sizeof(tcp->u_arg[0]));
445         /*
446          * Fetching the last arg of 7-arg syscalls (fadvise64_64
447          * and sync_file_range) requires additional code,
448          * see linux/mips/get_syscall_args.c
449          */
450         if (tcp->s_ent->nargs == MAX_ARGS) {
451                 if (umoven(tcp,
452                            mips_REG_SP + MAX_ARGS * sizeof(tcp->u_arg[0]),
453                            sizeof(tcp->u_arg[0]),
454                            &tcp->u_arg[MAX_ARGS - 1]) < 0)
455                 tcp->u_arg[MAX_ARGS - 1] = 0;
456         }
457 }
458 #endif /* LINUX_MIPSO32 */
459
460 static void
461 dumpio(struct tcb *tcp)
462 {
463         if (syserror(tcp))
464                 return;
465
466         int fd = tcp->u_arg[0];
467         if (fd < 0)
468                 return;
469
470         if (is_number_in_set(fd, &read_set)) {
471                 switch (tcp->s_ent->sen) {
472                 case SEN_read:
473                 case SEN_pread:
474                 case SEN_recv:
475                 case SEN_recvfrom:
476                 case SEN_mq_timedreceive:
477                         dumpstr(tcp, tcp->u_arg[1], tcp->u_rval);
478                         return;
479                 case SEN_readv:
480                 case SEN_preadv:
481                 case SEN_preadv2:
482                         dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1],
483                                      tcp->u_rval);
484                         return;
485                 case SEN_recvmsg:
486                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], tcp->u_rval);
487                         return;
488                 case SEN_recvmmsg:
489                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
490                         return;
491                 }
492         }
493         if (is_number_in_set(fd, &write_set)) {
494                 switch (tcp->s_ent->sen) {
495                 case SEN_write:
496                 case SEN_pwrite:
497                 case SEN_send:
498                 case SEN_sendto:
499                 case SEN_mq_timedsend:
500                         dumpstr(tcp, tcp->u_arg[1], tcp->u_arg[2]);
501                         break;
502                 case SEN_writev:
503                 case SEN_pwritev:
504                 case SEN_pwritev2:
505                 case SEN_vmsplice:
506                         dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1], -1);
507                         break;
508                 case SEN_sendmsg:
509                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], -1);
510                         break;
511                 case SEN_sendmmsg:
512                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
513                         break;
514                 }
515         }
516 }
517
518 /*
519  * Shuffle syscall numbers so that we don't have huge gaps in syscall table.
520  * The shuffling should be an involution: shuffle_scno(shuffle_scno(n)) == n.
521  */
522 static kernel_ulong_t
523 shuffle_scno(kernel_ulong_t scno)
524 {
525 #if defined(ARM) || defined(AARCH64) /* So far only 32-bit ARM needs this */
526         if (scno < ARM_FIRST_SHUFFLED_SYSCALL)
527                 return scno;
528
529         /* __ARM_NR_cmpxchg? Swap with LAST_ORDINARY+1 */
530         if (scno == ARM_FIRST_SHUFFLED_SYSCALL)
531                 return 0x000ffff0;
532         if (scno == 0x000ffff0)
533                 return ARM_FIRST_SHUFFLED_SYSCALL;
534
535 # define ARM_SECOND_SHUFFLED_SYSCALL (ARM_FIRST_SHUFFLED_SYSCALL + 1)
536         /*
537          * Is it ARM specific syscall?
538          * Swap [0x000f0000, 0x000f0000 + LAST_SPECIAL] range
539          * with [SECOND_SHUFFLED, SECOND_SHUFFLED + LAST_SPECIAL] range.
540          */
541         if (scno >= 0x000f0000 &&
542             scno <= 0x000f0000 + ARM_LAST_SPECIAL_SYSCALL) {
543                 return scno - 0x000f0000 + ARM_SECOND_SHUFFLED_SYSCALL;
544         }
545         if (scno <= ARM_SECOND_SHUFFLED_SYSCALL + ARM_LAST_SPECIAL_SYSCALL) {
546                 return scno + 0x000f0000 - ARM_SECOND_SHUFFLED_SYSCALL;
547         }
548 #endif /* ARM || AARCH64 */
549
550         return scno;
551 }
552
553 const char *
554 err_name(unsigned long err)
555 {
556         if ((err < nerrnos) && errnoent[err])
557                 return errnoent[err];
558
559         return NULL;
560 }
561
562 static long get_regs_error;
563
564 void
565 clear_regs(void)
566 {
567         get_regs_error = -1;
568 }
569
570 static void get_regs(pid_t pid);
571 static int get_syscall_args(struct tcb *);
572 static int get_syscall_result(struct tcb *);
573 static int arch_get_scno(struct tcb *tcp);
574 static int arch_set_scno(struct tcb *, kernel_ulong_t);
575 static void get_error(struct tcb *, const bool);
576 static int arch_set_error(struct tcb *);
577 static int arch_set_success(struct tcb *);
578
579 struct inject_opts *inject_vec[SUPPORTED_PERSONALITIES];
580
581 static struct inject_opts *
582 tcb_inject_opts(struct tcb *tcp)
583 {
584         return (scno_in_range(tcp->scno) && tcp->inject_vec[current_personality])
585                ? &tcp->inject_vec[current_personality][tcp->scno] : NULL;
586 }
587
588
589 static long
590 tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo)
591 {
592         if (!tcp->inject_vec[current_personality]) {
593                 tcp->inject_vec[current_personality] =
594                         xcalloc(nsyscalls, sizeof(**inject_vec));
595                 memcpy(tcp->inject_vec[current_personality],
596                        inject_vec[current_personality],
597                        nsyscalls * sizeof(**inject_vec));
598         }
599
600         struct inject_opts *opts = tcb_inject_opts(tcp);
601
602         if (!opts || opts->first == 0)
603                 return 0;
604
605         --opts->first;
606
607         if (opts->first != 0)
608                 return 0;
609
610         opts->first = opts->step;
611
612         if (opts->signo > 0)
613                 *signo = opts->signo;
614         if (opts->rval != INJECT_OPTS_RVAL_DEFAULT && !arch_set_scno(tcp, -1))
615                 tcp->flags |= TCB_TAMPERED;
616
617         return 0;
618 }
619
620 static long
621 tamper_with_syscall_exiting(struct tcb *tcp)
622 {
623         struct inject_opts *opts = tcb_inject_opts(tcp);
624
625         if (!opts)
626                 return 0;
627
628         if (opts->rval >= 0) {
629                 kernel_long_t u_rval = tcp->u_rval;
630
631                 tcp->u_rval = opts->rval;
632                 if (arch_set_success(tcp)) {
633                         tcp->u_rval = u_rval;
634                 } else {
635                         tcp->u_error = 0;
636                 }
637         } else {
638                 unsigned long new_error = -opts->rval;
639
640                 if (new_error != tcp->u_error && new_error <= MAX_ERRNO_VALUE) {
641                         unsigned long u_error = tcp->u_error;
642
643                         tcp->u_error = new_error;
644                         if (arch_set_error(tcp)) {
645                                 tcp->u_error = u_error;
646                         }
647                 }
648         }
649
650         return 0;
651 }
652
653 static int
654 trace_syscall_entering(struct tcb *tcp, unsigned int *sig)
655 {
656         int res = get_scno(tcp);
657         if (res == 0)
658                 return res;
659
660         int scno_good = res;
661         if (res == 1)
662                 res = get_syscall_args(tcp);
663
664         if (res != 1) {
665                 printleader(tcp);
666                 tprintf("%s(", scno_good == 1 ? tcp->s_ent->sys_name : "????");
667                 /*
668                  * " <unavailable>" will be added later by the code which
669                  * detects ptrace errors.
670                  */
671                 goto ret;
672         }
673
674 #ifdef LINUX_MIPSO32
675         if (SEN_syscall == tcp->s_ent->sen)
676                 decode_mips_subcall(tcp);
677 #endif
678
679 #if defined(SYS_socket_subcall) || defined(SYS_ipc_subcall)
680         switch (tcp->s_ent->sen) {
681 # ifdef SYS_socket_subcall
682                 case SEN_socketcall:
683                         decode_socket_subcall(tcp);
684                         break;
685 # endif
686 # ifdef SYS_ipc_subcall
687                 case SEN_ipc:
688                         decode_ipc_subcall(tcp);
689                         break;
690 # endif
691         }
692 #endif
693
694         /* Restrain from fault injection while the trace executes strace code. */
695         if (hide_log(tcp)) {
696                 tcp->qual_flg &= ~QUAL_INJECT;
697         }
698
699         switch (tcp->s_ent->sen) {
700                 case SEN_execve:
701                 case SEN_execveat:
702 #if defined SPARC || defined SPARC64
703                 case SEN_execv:
704 #endif
705                         tcp->flags &= ~TCB_HIDE_LOG;
706                         break;
707         }
708
709         if (!(tcp->qual_flg & QUAL_TRACE)
710          || (tracing_paths && !pathtrace_match(tcp))
711         ) {
712                 tcp->flags |= TCB_INSYSCALL | TCB_FILTERED;
713                 tcp->sys_func_rval = 0;
714                 return 0;
715         }
716
717         tcp->flags &= ~TCB_FILTERED;
718
719         if (hide_log(tcp)) {
720                 res = 0;
721                 goto ret;
722         }
723
724         if (tcp->qual_flg & QUAL_INJECT)
725                 tamper_with_syscall_entering(tcp, sig);
726
727         if (cflag == CFLAG_ONLY_STATS) {
728                 res = 0;
729                 goto ret;
730         }
731
732 #ifdef USE_LIBUNWIND
733         if (stack_trace_enabled) {
734                 if (tcp->s_ent->sys_flags & STACKTRACE_CAPTURE_ON_ENTER)
735                         unwind_capture_stacktrace(tcp);
736         }
737 #endif
738
739         printleader(tcp);
740         tprintf("%s(", tcp->s_ent->sys_name);
741         if (tcp->qual_flg & QUAL_RAW)
742                 res = printargs(tcp);
743         else
744                 res = tcp->s_ent->sys_func(tcp);
745
746         fflush(tcp->outf);
747  ret:
748         tcp->flags |= TCB_INSYSCALL;
749         tcp->sys_func_rval = res;
750         /* Measure the entrance time as late as possible to avoid errors. */
751         if (Tflag || cflag)
752                 gettimeofday(&tcp->etime, NULL);
753         return res;
754 }
755
756 static bool
757 syscall_tampered(struct tcb *tcp)
758 {
759         return tcp->flags & TCB_TAMPERED;
760 }
761
762 static int
763 trace_syscall_exiting(struct tcb *tcp)
764 {
765         struct timeval tv;
766
767         /* Measure the exit time as early as possible to avoid errors. */
768         if ((Tflag || cflag) && !(filtered(tcp) || hide_log(tcp)))
769                 gettimeofday(&tv, NULL);
770
771 #ifdef USE_LIBUNWIND
772         if (stack_trace_enabled) {
773                 if (tcp->s_ent->sys_flags & STACKTRACE_INVALIDATE_CACHE)
774                         unwind_cache_invalidate(tcp);
775         }
776 #endif
777
778         if (filtered(tcp) || hide_log(tcp))
779                 goto ret;
780
781         get_regs(tcp->pid);
782 #if SUPPORTED_PERSONALITIES > 1
783         update_personality(tcp, tcp->currpers);
784 #endif
785         int res = (get_regs_error ? -1 : get_syscall_result(tcp));
786
787         if (syserror(tcp) && syscall_tampered(tcp))
788                 tamper_with_syscall_exiting(tcp);
789
790         if (cflag) {
791                 count_syscall(tcp, &tv);
792                 if (cflag == CFLAG_ONLY_STATS) {
793                         goto ret;
794                 }
795         }
796
797         /* If not in -ff mode, and printing_tcp != tcp,
798          * then the log currently does not end with output
799          * of _our syscall entry_, but with something else.
800          * We need to say which syscall's return is this.
801          *
802          * Forced reprinting via TCB_REPRINT is used only by
803          * "strace -ff -oLOG test/threaded_execve" corner case.
804          * It's the only case when -ff mode needs reprinting.
805          */
806         if ((followfork < 2 && printing_tcp != tcp) || (tcp->flags & TCB_REPRINT)) {
807                 tcp->flags &= ~TCB_REPRINT;
808                 printleader(tcp);
809                 tprintf("<... %s resumed> ", tcp->s_ent->sys_name);
810         }
811         printing_tcp = tcp;
812
813         tcp->s_prev_ent = NULL;
814         if (res != 1) {
815                 /* There was error in one of prior ptrace ops */
816                 tprints(") ");
817                 tabto();
818                 tprints("= ? <unavailable>\n");
819                 line_ended();
820                 tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED);
821                 tcp->sys_func_rval = 0;
822                 free_tcb_priv_data(tcp);
823                 return res;
824         }
825         tcp->s_prev_ent = tcp->s_ent;
826
827         int sys_res = 0;
828         if (tcp->qual_flg & QUAL_RAW) {
829                 /* sys_res = printargs(tcp); - but it's nop on sysexit */
830         } else {
831         /* FIXME: not_failing_only (IOW, option -z) is broken:
832          * failure of syscall is known only after syscall return.
833          * Thus we end up with something like this on, say, ENOENT:
834          *     open("doesnt_exist", O_RDONLY <unfinished ...>
835          *     {next syscall decode}
836          * whereas the intended result is that open(...) line
837          * is not shown at all.
838          */
839                 if (not_failing_only && tcp->u_error)
840                         goto ret;       /* ignore failed syscalls */
841                 if (tcp->sys_func_rval & RVAL_DECODED)
842                         sys_res = tcp->sys_func_rval;
843                 else
844                         sys_res = tcp->s_ent->sys_func(tcp);
845         }
846
847         tprints(") ");
848         tabto();
849         unsigned long u_error = tcp->u_error;
850
851         if (tcp->qual_flg & QUAL_RAW) {
852                 if (u_error) {
853                         tprintf("= -1 (errno %lu)", u_error);
854                 } else {
855                         tprintf("= %#" PRI_klx, tcp->u_rval);
856                 }
857                 if (syscall_tampered(tcp))
858                         tprints(" (INJECTED)");
859         }
860         else if (!(sys_res & RVAL_NONE) && u_error) {
861                 const char *u_error_str;
862
863                 switch (u_error) {
864                 /* Blocked signals do not interrupt any syscalls.
865                  * In this case syscalls don't return ERESTARTfoo codes.
866                  *
867                  * Deadly signals set to SIG_DFL interrupt syscalls
868                  * and kill the process regardless of which of the codes below
869                  * is returned by the interrupted syscall.
870                  * In some cases, kernel forces a kernel-generated deadly
871                  * signal to be unblocked and set to SIG_DFL (and thus cause
872                  * death) if it is blocked or SIG_IGNed: for example, SIGSEGV
873                  * or SIGILL. (The alternative is to leave process spinning
874                  * forever on the faulty instruction - not useful).
875                  *
876                  * SIG_IGNed signals and non-deadly signals set to SIG_DFL
877                  * (for example, SIGCHLD, SIGWINCH) interrupt syscalls,
878                  * but kernel will always restart them.
879                  */
880                 case ERESTARTSYS:
881                         /* Most common type of signal-interrupted syscall exit code.
882                          * The system call will be restarted with the same arguments
883                          * if SA_RESTART is set; otherwise, it will fail with EINTR.
884                          */
885                         tprints("= ? ERESTARTSYS (To be restarted if SA_RESTART is set)");
886                         break;
887                 case ERESTARTNOINTR:
888                         /* Rare. For example, fork() returns this if interrupted.
889                          * SA_RESTART is ignored (assumed set): the restart is unconditional.
890                          */
891                         tprints("= ? ERESTARTNOINTR (To be restarted)");
892                         break;
893                 case ERESTARTNOHAND:
894                         /* pause(), rt_sigsuspend() etc use this code.
895                          * SA_RESTART is ignored (assumed not set):
896                          * syscall won't restart (will return EINTR instead)
897                          * even after signal with SA_RESTART set. However,
898                          * after SIG_IGN or SIG_DFL signal it will restart
899                          * (thus the name "restart only if has no handler").
900                          */
901                         tprints("= ? ERESTARTNOHAND (To be restarted if no handler)");
902                         break;
903                 case ERESTART_RESTARTBLOCK:
904                         /* Syscalls like nanosleep(), poll() which can't be
905                          * restarted with their original arguments use this
906                          * code. Kernel will execute restart_syscall() instead,
907                          * which changes arguments before restarting syscall.
908                          * SA_RESTART is ignored (assumed not set) similarly
909                          * to ERESTARTNOHAND. (Kernel can't honor SA_RESTART
910                          * since restart data is saved in "restart block"
911                          * in task struct, and if signal handler uses a syscall
912                          * which in turn saves another such restart block,
913                          * old data is lost and restart becomes impossible)
914                          */
915                         tprints("= ? ERESTART_RESTARTBLOCK (Interrupted by signal)");
916                         break;
917                 default:
918                         u_error_str = err_name(u_error);
919                         if (u_error_str)
920                                 tprintf("= -1 %s (%s)",
921                                         u_error_str, strerror(u_error));
922                         else
923                                 tprintf("= -1 %lu (%s)",
924                                         u_error, strerror(u_error));
925                         break;
926                 }
927                 if (syscall_tampered(tcp))
928                         tprints(" (INJECTED)");
929                 if ((sys_res & RVAL_STR) && tcp->auxstr)
930                         tprintf(" (%s)", tcp->auxstr);
931         }
932         else {
933                 if (sys_res & RVAL_NONE)
934                         tprints("= ?");
935                 else {
936                         switch (sys_res & RVAL_MASK) {
937                         case RVAL_HEX:
938 #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG
939                                 if (current_wordsize < sizeof(tcp->u_rval)) {
940                                         tprintf("= %#x",
941                                                 (unsigned int) tcp->u_rval);
942                                 } else
943 #endif
944                                 {
945                                         tprintf("= %#" PRI_klx, tcp->u_rval);
946                                 }
947                                 break;
948                         case RVAL_OCTAL:
949                                 tprints("= ");
950                                 print_numeric_long_umask(tcp->u_rval);
951                                 break;
952                         case RVAL_UDECIMAL:
953 #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG
954                                 if (current_wordsize < sizeof(tcp->u_rval)) {
955                                         tprintf("= %u",
956                                                 (unsigned int) tcp->u_rval);
957                                 } else
958 #endif
959                                 {
960                                         tprintf("= %" PRI_klu, tcp->u_rval);
961                                 }
962                                 break;
963                         case RVAL_DECIMAL:
964                                 tprintf("= %" PRI_kld, tcp->u_rval);
965                                 break;
966                         case RVAL_FD:
967                                 if (show_fd_path) {
968                                         tprints("= ");
969                                         printfd(tcp, tcp->u_rval);
970                                 }
971                                 else
972                                         tprintf("= %" PRI_kld, tcp->u_rval);
973                                 break;
974                         default:
975                                 error_msg("invalid rval format");
976                                 break;
977                         }
978                 }
979                 if ((sys_res & RVAL_STR) && tcp->auxstr)
980                         tprintf(" (%s)", tcp->auxstr);
981                 if (syscall_tampered(tcp))
982                         tprints(" (INJECTED)");
983         }
984         if (Tflag) {
985                 tv_sub(&tv, &tv, &tcp->etime);
986                 tprintf(" <%ld.%06ld>",
987                         (long) tv.tv_sec, (long) tv.tv_usec);
988         }
989         tprints("\n");
990         dumpio(tcp);
991         line_ended();
992
993 #ifdef USE_LIBUNWIND
994         if (stack_trace_enabled)
995                 unwind_print_stacktrace(tcp);
996 #endif
997
998  ret:
999         tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED);
1000         tcp->sys_func_rval = 0;
1001         free_tcb_priv_data(tcp);
1002         return 0;
1003 }
1004
1005 int
1006 trace_syscall(struct tcb *tcp, unsigned int *signo)
1007 {
1008         return exiting(tcp) ?
1009                 trace_syscall_exiting(tcp) : trace_syscall_entering(tcp, signo);
1010 }
1011
1012 bool
1013 is_erestart(struct tcb *tcp)
1014 {
1015         switch (tcp->u_error) {
1016                 case ERESTARTSYS:
1017                 case ERESTARTNOINTR:
1018                 case ERESTARTNOHAND:
1019                 case ERESTART_RESTARTBLOCK:
1020                         return true;
1021                 default:
1022                         return false;
1023         }
1024 }
1025
1026 static unsigned long saved_u_error;
1027
1028 void
1029 temporarily_clear_syserror(struct tcb *tcp)
1030 {
1031         saved_u_error = tcp->u_error;
1032         tcp->u_error = 0;
1033 }
1034
1035 void
1036 restore_cleared_syserror(struct tcb *tcp)
1037 {
1038         tcp->u_error = saved_u_error;
1039 }
1040
1041 /*
1042  * Check the syscall return value register value for whether it is
1043  * a negated errno code indicating an error, or a success return value.
1044  */
1045 static inline bool
1046 is_negated_errno(kernel_ulong_t val)
1047 {
1048         /* Linux kernel defines MAX_ERRNO to 4095. */
1049         kernel_ulong_t max = -(kernel_long_t) 4095;
1050
1051 #ifndef current_klongsize
1052         if (current_klongsize < sizeof(val)) {
1053                 val = (uint32_t) val;
1054                 max = (uint32_t) max;
1055         }
1056 #endif /* !current_klongsize */
1057
1058         return val >= max;
1059 }
1060
1061 #include "arch_regs.c"
1062
1063 #ifdef HAVE_GETRVAL2
1064 # include "arch_getrval2.c"
1065 #endif
1066
1067 void
1068 print_pc(struct tcb *tcp)
1069 {
1070 #if defined ARCH_PC_REG
1071 # define ARCH_GET_PC 0
1072 #elif defined ARCH_PC_PEEK_ADDR
1073         kernel_ulong_t pc;
1074 # define ARCH_PC_REG pc
1075 # define ARCH_GET_PC upeek(tcp->pid, ARCH_PC_PEEK_ADDR, &pc)
1076 #else
1077 # error Neither ARCH_PC_REG nor ARCH_PC_PEEK_ADDR is defined
1078 #endif
1079         get_regs(tcp->pid);
1080         if (get_regs_error || ARCH_GET_PC)
1081                 tprints(current_wordsize == 4 ? "[????????] "
1082                                               : "[????????????????] ");
1083         else
1084                 tprintf(current_wordsize == 4
1085                         ? "[%08" PRI_klx "] " : "[%016" PRI_klx "] ",
1086                         (kernel_ulong_t) ARCH_PC_REG);
1087 }
1088
1089 #include "getregs_old.h"
1090
1091 #undef ptrace_getregset_or_getregs
1092 #undef ptrace_setregset_or_setregs
1093 #ifdef ARCH_REGS_FOR_GETREGSET
1094
1095 # define ptrace_getregset_or_getregs ptrace_getregset
1096 static long
1097 ptrace_getregset(pid_t pid)
1098 {
1099 # ifdef ARCH_IOVEC_FOR_GETREGSET
1100         /* variable iovec */
1101         ARCH_IOVEC_FOR_GETREGSET.iov_len = sizeof(ARCH_REGS_FOR_GETREGSET);
1102         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS,
1103                       &ARCH_IOVEC_FOR_GETREGSET);
1104 # else
1105         /* constant iovec */
1106         static struct iovec io = {
1107                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1108                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1109         };
1110         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &io);
1111
1112 # endif
1113 }
1114
1115 # ifndef HAVE_GETREGS_OLD
1116 #  define ptrace_setregset_or_setregs ptrace_setregset
1117 static int
1118 ptrace_setregset(pid_t pid)
1119 {
1120 #  ifdef ARCH_IOVEC_FOR_GETREGSET
1121         /* variable iovec */
1122         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS,
1123                       &ARCH_IOVEC_FOR_GETREGSET);
1124 #  else
1125         /* constant iovec */
1126         static struct iovec io = {
1127                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1128                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1129         };
1130         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &io);
1131 #  endif
1132 }
1133 # endif /* !HAVE_GETREGS_OLD */
1134
1135 #elif defined ARCH_REGS_FOR_GETREGS
1136
1137 # define ptrace_getregset_or_getregs ptrace_getregs
1138 static long
1139 ptrace_getregs(pid_t pid)
1140 {
1141 # if defined SPARC || defined SPARC64
1142         /* SPARC systems have the meaning of data and addr reversed */
1143         return ptrace(PTRACE_GETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1144 # else
1145         return ptrace(PTRACE_GETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1146 # endif
1147 }
1148
1149 # ifndef HAVE_GETREGS_OLD
1150 #  define ptrace_setregset_or_setregs ptrace_setregs
1151 static int
1152 ptrace_setregs(pid_t pid)
1153 {
1154 #  if defined SPARC || defined SPARC64
1155         /* SPARC systems have the meaning of data and addr reversed */
1156         return ptrace(PTRACE_SETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1157 #  else
1158         return ptrace(PTRACE_SETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1159 #  endif
1160 }
1161 # endif /* !HAVE_GETREGS_OLD */
1162
1163 #endif /* ARCH_REGS_FOR_GETREGSET || ARCH_REGS_FOR_GETREGS */
1164
1165 static void
1166 get_regs(pid_t pid)
1167 {
1168 #undef USE_GET_SYSCALL_RESULT_REGS
1169 #ifdef ptrace_getregset_or_getregs
1170
1171         if (get_regs_error != -1)
1172                 return;
1173
1174 # ifdef HAVE_GETREGS_OLD
1175         /*
1176          * Try PTRACE_GETREGSET/PTRACE_GETREGS first,
1177          * fallback to getregs_old.
1178          */
1179         static int use_getregs_old;
1180         if (use_getregs_old < 0) {
1181                 get_regs_error = ptrace_getregset_or_getregs(pid);
1182                 return;
1183         } else if (use_getregs_old == 0) {
1184                 get_regs_error = ptrace_getregset_or_getregs(pid);
1185                 if (get_regs_error >= 0) {
1186                         use_getregs_old = -1;
1187                         return;
1188                 }
1189                 if (errno == EPERM || errno == ESRCH)
1190                         return;
1191                 use_getregs_old = 1;
1192         }
1193         get_regs_error = getregs_old(pid);
1194 # else /* !HAVE_GETREGS_OLD */
1195         /* Assume that PTRACE_GETREGSET/PTRACE_GETREGS works. */
1196         get_regs_error = ptrace_getregset_or_getregs(pid);
1197 # endif /* !HAVE_GETREGS_OLD */
1198
1199 #else /* !ptrace_getregset_or_getregs */
1200
1201 # define USE_GET_SYSCALL_RESULT_REGS 1
1202 # warning get_regs is not implemented for this architecture yet
1203         get_regs_error = 0;
1204
1205 #endif /* !ptrace_getregset_or_getregs */
1206 }
1207
1208 #ifdef ptrace_setregset_or_setregs
1209 static int
1210 set_regs(pid_t pid)
1211 {
1212         return ptrace_setregset_or_setregs(pid);
1213 }
1214 #endif /* ptrace_setregset_or_setregs */
1215
1216 struct sysent_buf {
1217         struct tcb *tcp;
1218         struct_sysent ent;
1219         char buf[sizeof("syscall_%lu") + sizeof(kernel_ulong_t) * 3];
1220 };
1221
1222 static void
1223 free_sysent_buf(void *ptr)
1224 {
1225         struct sysent_buf *s = ptr;
1226         s->tcp->s_prev_ent = s->tcp->s_ent = NULL;
1227         free(ptr);
1228 }
1229
1230 /*
1231  * Returns:
1232  * 0: "ignore this ptrace stop", bail out of trace_syscall_entering() silently.
1233  * 1: ok, continue in trace_syscall_entering().
1234  * other: error, trace_syscall_entering() should print error indicator
1235  *    ("????" etc) and bail out.
1236  */
1237 int
1238 get_scno(struct tcb *tcp)
1239 {
1240         get_regs(tcp->pid);
1241
1242         if (get_regs_error)
1243                 return -1;
1244
1245         int rc = arch_get_scno(tcp);
1246         if (rc != 1)
1247                 return rc;
1248
1249         if (scno_is_valid(tcp->scno)) {
1250                 tcp->s_ent = &sysent[tcp->scno];
1251                 tcp->qual_flg = qual_flags(tcp->scno);
1252         } else {
1253                 struct sysent_buf *s = xcalloc(1, sizeof(*s));
1254
1255                 s->tcp = tcp;
1256                 s->ent.nargs = MAX_ARGS;
1257                 s->ent.sen = SEN_printargs;
1258                 s->ent.sys_func = printargs;
1259                 s->ent.sys_name = s->buf;
1260                 sprintf(s->buf, "syscall_%" PRI_klu, shuffle_scno(tcp->scno));
1261
1262                 tcp->s_ent = &s->ent;
1263                 tcp->qual_flg = QUAL_RAW | DEFAULT_QUAL_FLAGS;
1264
1265                 set_tcb_priv_data(tcp, s, free_sysent_buf);
1266
1267                 if (debug_flag)
1268                         error_msg("pid %d invalid syscall %" PRI_kld,
1269                                   tcp->pid, tcp->scno);
1270         }
1271         return 1;
1272 }
1273
1274 #ifdef USE_GET_SYSCALL_RESULT_REGS
1275 static int get_syscall_result_regs(struct tcb *);
1276 #endif
1277
1278 /* Returns:
1279  * 1: ok, continue in trace_syscall_exiting().
1280  * -1: error, trace_syscall_exiting() should print error indicator
1281  *    ("????" etc) and bail out.
1282  */
1283 static int
1284 get_syscall_result(struct tcb *tcp)
1285 {
1286 #ifdef USE_GET_SYSCALL_RESULT_REGS
1287         if (get_syscall_result_regs(tcp))
1288                 return -1;
1289 #endif
1290         tcp->u_error = 0;
1291         get_error(tcp, !(tcp->s_ent->sys_flags & SYSCALL_NEVER_FAILS));
1292
1293         return 1;
1294 }
1295
1296 #include "get_scno.c"
1297 #include "set_scno.c"
1298 #include "get_syscall_args.c"
1299 #ifdef USE_GET_SYSCALL_RESULT_REGS
1300 # include "get_syscall_result.c"
1301 #endif
1302 #include "get_error.c"
1303 #include "set_error.c"
1304 #ifdef HAVE_GETREGS_OLD
1305 # include "getregs_old.c"
1306 #endif
1307
1308 const char *
1309 syscall_name(kernel_ulong_t scno)
1310 {
1311 #if defined X32_PERSONALITY_NUMBER && defined __X32_SYSCALL_BIT
1312         if (current_personality == X32_PERSONALITY_NUMBER)
1313                 scno &= ~__X32_SYSCALL_BIT;
1314 #endif
1315         return scno_is_valid(scno) ? sysent[scno].sys_name: NULL;
1316 }