]> granicus.if.org Git - strace/blob - syscall.c
Include "kernel_types.h" in defs.h and tests/tests.h
[strace] / syscall.c
1 /*
2  * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3  * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4  * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5  * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6  * Copyright (c) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
7  *                     Linux for s390 port by D.J. Barrow
8  *                    <barrow_dj@mail.yahoo.com,djbarrow@de.ibm.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The name of the author may not be used to endorse or promote products
20  *    derived from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include "defs.h"
35 #include "native_defs.h"
36 #include <sys/param.h>
37 #include <signal.h>
38
39 /* for struct iovec */
40 #include <sys/uio.h>
41
42 #include "regs.h"
43 #include "ptrace.h"
44
45 #if defined(SPARC64)
46 # undef PTRACE_GETREGS
47 # define PTRACE_GETREGS PTRACE_GETREGS64
48 # undef PTRACE_SETREGS
49 # define PTRACE_SETREGS PTRACE_SETREGS64
50 #endif
51
52 #if defined SPARC64
53 # include <asm/psrcompat.h>
54 #elif defined SPARC
55 # include <asm/psr.h>
56 #endif
57
58 #ifdef IA64
59 # include <asm/rse.h>
60 #endif
61
62 #ifndef NT_PRSTATUS
63 # define NT_PRSTATUS 1
64 #endif
65
66 #ifndef NSIG
67 # warning: NSIG is not defined, using 32
68 # define NSIG 32
69 #endif
70
71 #include "syscall.h"
72
73 /* Define these shorthand notations to simplify the syscallent files. */
74 #define TD TRACE_DESC
75 #define TF TRACE_FILE
76 #define TI TRACE_IPC
77 #define TN TRACE_NETWORK
78 #define TP TRACE_PROCESS
79 #define TS TRACE_SIGNAL
80 #define TM TRACE_MEMORY
81 #define NF SYSCALL_NEVER_FAILS
82 #define MA MAX_ARGS
83 #define SI STACKTRACE_INVALIDATE_CACHE
84 #define SE STACKTRACE_CAPTURE_ON_ENTER
85
86 #define SEN(syscall_name) SEN_ ## syscall_name, SYS_FUNC_NAME(sys_ ## syscall_name)
87
88 const struct_sysent sysent0[] = {
89 #include "syscallent.h"
90 };
91
92 #if SUPPORTED_PERSONALITIES > 1
93 # include PERSONALITY1_INCLUDE_FUNCS
94 static const struct_sysent sysent1[] = {
95 # include "syscallent1.h"
96 };
97 #endif
98
99 #if SUPPORTED_PERSONALITIES > 2
100 # include PERSONALITY2_INCLUDE_FUNCS
101 static const struct_sysent sysent2[] = {
102 # include "syscallent2.h"
103 };
104 #endif
105
106 /* Now undef them since short defines cause wicked namespace pollution. */
107 #undef SEN
108 #undef TD
109 #undef TF
110 #undef TI
111 #undef TN
112 #undef TP
113 #undef TS
114 #undef TM
115 #undef NF
116 #undef MA
117 #undef SI
118 #undef SE
119
120 /*
121  * `ioctlent[012].h' files are automatically generated by the auxiliary
122  * program `ioctlsort', such that the list is sorted by the `code' field.
123  * This has the side-effect of resolving the _IO.. macros into
124  * plain integers, eliminating the need to include here everything
125  * in "/usr/include".
126  */
127
128 const char *const errnoent0[] = {
129 #include "errnoent.h"
130 };
131 const char *const signalent0[] = {
132 #include "signalent.h"
133 };
134 const struct_ioctlent ioctlent0[] = {
135 #include "ioctlent0.h"
136 };
137
138 #if SUPPORTED_PERSONALITIES > 1
139 static const char *const errnoent1[] = {
140 # include "errnoent1.h"
141 };
142 static const char *const signalent1[] = {
143 # include "signalent1.h"
144 };
145 static const struct_ioctlent ioctlent1[] = {
146 # include "ioctlent1.h"
147 };
148 # include PERSONALITY0_INCLUDE_PRINTERS_DECLS
149 static const struct_printers printers0 = {
150 # include PERSONALITY0_INCLUDE_PRINTERS_DEFS
151 };
152 # include PERSONALITY1_INCLUDE_PRINTERS_DECLS
153 static const struct_printers printers1 = {
154 # include PERSONALITY1_INCLUDE_PRINTERS_DEFS
155 };
156 #endif
157
158 #if SUPPORTED_PERSONALITIES > 2
159 static const char *const errnoent2[] = {
160 # include "errnoent2.h"
161 };
162 static const char *const signalent2[] = {
163 # include "signalent2.h"
164 };
165 static const struct_ioctlent ioctlent2[] = {
166 # include "ioctlent2.h"
167 };
168 # include PERSONALITY2_INCLUDE_PRINTERS_DECLS
169 static const struct_printers printers2 = {
170 # include PERSONALITY2_INCLUDE_PRINTERS_DEFS
171 };
172 #endif
173
174 enum {
175         nsyscalls0 = ARRAY_SIZE(sysent0)
176 #if SUPPORTED_PERSONALITIES > 1
177         , nsyscalls1 = ARRAY_SIZE(sysent1)
178 # if SUPPORTED_PERSONALITIES > 2
179         , nsyscalls2 = ARRAY_SIZE(sysent2)
180 # endif
181 #endif
182 };
183
184 enum {
185         nerrnos0 = ARRAY_SIZE(errnoent0)
186 #if SUPPORTED_PERSONALITIES > 1
187         , nerrnos1 = ARRAY_SIZE(errnoent1)
188 # if SUPPORTED_PERSONALITIES > 2
189         , nerrnos2 = ARRAY_SIZE(errnoent2)
190 # endif
191 #endif
192 };
193
194 enum {
195         nsignals0 = ARRAY_SIZE(signalent0)
196 #if SUPPORTED_PERSONALITIES > 1
197         , nsignals1 = ARRAY_SIZE(signalent1)
198 # if SUPPORTED_PERSONALITIES > 2
199         , nsignals2 = ARRAY_SIZE(signalent2)
200 # endif
201 #endif
202 };
203
204 enum {
205         nioctlents0 = ARRAY_SIZE(ioctlent0)
206 #if SUPPORTED_PERSONALITIES > 1
207         , nioctlents1 = ARRAY_SIZE(ioctlent1)
208 # if SUPPORTED_PERSONALITIES > 2
209         , nioctlents2 = ARRAY_SIZE(ioctlent2)
210 # endif
211 #endif
212 };
213
214 #if SUPPORTED_PERSONALITIES > 1
215 const struct_sysent *sysent = sysent0;
216 const char *const *errnoent = errnoent0;
217 const char *const *signalent = signalent0;
218 const struct_ioctlent *ioctlent = ioctlent0;
219 const struct_printers *printers = &printers0;
220 #endif
221
222 unsigned nsyscalls = nsyscalls0;
223 unsigned nerrnos = nerrnos0;
224 unsigned nsignals = nsignals0;
225 unsigned nioctlents = nioctlents0;
226
227 const unsigned int nsyscall_vec[SUPPORTED_PERSONALITIES] = {
228         nsyscalls0,
229 #if SUPPORTED_PERSONALITIES > 1
230         nsyscalls1,
231 #endif
232 #if SUPPORTED_PERSONALITIES > 2
233         nsyscalls2,
234 #endif
235 };
236 const struct_sysent *const sysent_vec[SUPPORTED_PERSONALITIES] = {
237         sysent0,
238 #if SUPPORTED_PERSONALITIES > 1
239         sysent1,
240 #endif
241 #if SUPPORTED_PERSONALITIES > 2
242         sysent2,
243 #endif
244 };
245
246 #if SUPPORTED_PERSONALITIES > 1
247 unsigned current_personality;
248
249 # ifndef current_wordsize
250 unsigned current_wordsize;
251 static const int personality_wordsize[SUPPORTED_PERSONALITIES] = {
252         PERSONALITY0_WORDSIZE,
253         PERSONALITY1_WORDSIZE,
254 # if SUPPORTED_PERSONALITIES > 2
255         PERSONALITY2_WORDSIZE,
256 # endif
257 };
258 # endif
259
260 void
261 set_personality(int personality)
262 {
263         nsyscalls = nsyscall_vec[personality];
264         sysent = sysent_vec[personality];
265
266         switch (personality) {
267         case 0:
268                 errnoent = errnoent0;
269                 nerrnos = nerrnos0;
270                 ioctlent = ioctlent0;
271                 nioctlents = nioctlents0;
272                 signalent = signalent0;
273                 nsignals = nsignals0;
274                 printers = &printers0;
275                 break;
276
277         case 1:
278                 errnoent = errnoent1;
279                 nerrnos = nerrnos1;
280                 ioctlent = ioctlent1;
281                 nioctlents = nioctlents1;
282                 signalent = signalent1;
283                 nsignals = nsignals1;
284                 printers = &printers1;
285                 break;
286
287 # if SUPPORTED_PERSONALITIES > 2
288         case 2:
289                 errnoent = errnoent2;
290                 nerrnos = nerrnos2;
291                 ioctlent = ioctlent2;
292                 nioctlents = nioctlents2;
293                 signalent = signalent2;
294                 nsignals = nsignals2;
295                 printers = &printers2;
296                 break;
297 # endif
298         }
299
300         current_personality = personality;
301 # ifndef current_wordsize
302         current_wordsize = personality_wordsize[personality];
303 # endif
304 }
305
306 static void
307 update_personality(struct tcb *tcp, unsigned int personality)
308 {
309         if (personality == current_personality)
310                 return;
311         set_personality(personality);
312
313         if (personality == tcp->currpers)
314                 return;
315         tcp->currpers = personality;
316
317 # undef PERSONALITY_NAMES
318 # if defined POWERPC64
319 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
320 # elif defined X86_64
321 #  define PERSONALITY_NAMES {"64 bit", "32 bit", "x32"}
322 # elif defined X32
323 #  define PERSONALITY_NAMES {"x32", "32 bit"}
324 # elif defined AARCH64
325 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
326 # elif defined TILE
327 #  define PERSONALITY_NAMES {"64-bit", "32-bit"}
328 # endif
329 # ifdef PERSONALITY_NAMES
330         if (!qflag) {
331                 static const char *const names[] = PERSONALITY_NAMES;
332                 error_msg("[ Process PID=%d runs in %s mode. ]",
333                           tcp->pid, names[personality]);
334         }
335 # endif
336 }
337 #endif
338
339 #ifdef SYS_socket_subcall
340 static void
341 decode_socket_subcall(struct tcb *tcp)
342 {
343         const int call = tcp->u_arg[0];
344
345         if (call < 1 || call >= SYS_socket_nsubcalls)
346                 return;
347
348         const unsigned long scno = SYS_socket_subcall + call;
349         const unsigned int nargs = sysent[scno].nargs;
350         uint64_t buf[nargs];
351
352         if (umoven(tcp, tcp->u_arg[1], nargs * current_wordsize, buf) < 0)
353                 return;
354
355         tcp->scno = scno;
356         tcp->qual_flg = qual_flags(scno);
357         tcp->s_ent = &sysent[scno];
358
359         unsigned int i;
360         for (i = 0; i < nargs; ++i)
361                 tcp->u_arg[i] = (sizeof(uint32_t) == current_wordsize)
362                                 ? ((uint32_t *) (void *) buf)[i] : buf[i];
363 }
364 #endif
365
366 #ifdef SYS_ipc_subcall
367 static void
368 decode_ipc_subcall(struct tcb *tcp)
369 {
370         unsigned int call = tcp->u_arg[0];
371         const unsigned int version = call >> 16;
372
373         if (version) {
374 # if defined S390 || defined S390X
375                 return;
376 # else
377 #  ifdef SPARC64
378                 if (current_wordsize == 8)
379                         return;
380 #  endif
381                 set_tcb_priv_ulong(tcp, version);
382                 call &= 0xffff;
383 # endif
384         }
385
386         switch (call) {
387                 case  1: case  2: case  3: case  4:
388                 case 11: case 12: case 13: case 14:
389                 case 21: case 22: case 23: case 24:
390                         break;
391                 default:
392                         return;
393         }
394
395         tcp->scno = SYS_ipc_subcall + call;
396         tcp->qual_flg = qual_flags(tcp->scno);
397         tcp->s_ent = &sysent[tcp->scno];
398
399         const unsigned int n = tcp->s_ent->nargs;
400         unsigned int i;
401         for (i = 0; i < n; i++)
402                 tcp->u_arg[i] = tcp->u_arg[i + 1];
403 }
404 #endif
405
406 #ifdef LINUX_MIPSO32
407 static void
408 decode_mips_subcall(struct tcb *tcp)
409 {
410         if (!SCNO_IS_VALID(tcp->u_arg[0]))
411                 return;
412         tcp->scno = tcp->u_arg[0];
413         tcp->qual_flg = qual_flags(tcp->scno);
414         tcp->s_ent = &sysent[tcp->scno];
415         memmove(&tcp->u_arg[0], &tcp->u_arg[1],
416                 sizeof(tcp->u_arg) - sizeof(tcp->u_arg[0]));
417         /*
418          * Fetching the last arg of 7-arg syscalls (fadvise64_64
419          * and sync_file_range) requires additional code,
420          * see linux/mips/get_syscall_args.c
421          */
422         if (tcp->s_ent->nargs == MAX_ARGS) {
423                 if (umoven(tcp,
424                            mips_REG_SP + MAX_ARGS * sizeof(tcp->u_arg[0]),
425                            sizeof(tcp->u_arg[0]),
426                            &tcp->u_arg[MAX_ARGS - 1]) < 0)
427                 tcp->u_arg[MAX_ARGS - 1] = 0;
428         }
429 }
430 #endif /* LINUX_MIPSO32 */
431
432 static void
433 dumpio(struct tcb *tcp)
434 {
435         if (syserror(tcp))
436                 return;
437
438         int fd = tcp->u_arg[0];
439         if (fd < 0)
440                 return;
441
442         if (is_number_in_set(fd, &read_set)) {
443                 switch (tcp->s_ent->sen) {
444                 case SEN_read:
445                 case SEN_pread:
446                 case SEN_recv:
447                 case SEN_recvfrom:
448                 case SEN_mq_timedreceive:
449                         dumpstr(tcp, tcp->u_arg[1], tcp->u_rval);
450                         return;
451                 case SEN_readv:
452                 case SEN_preadv:
453                 case SEN_preadv2:
454                         dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1],
455                                      tcp->u_rval);
456                         return;
457                 case SEN_recvmsg:
458                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], tcp->u_rval);
459                         return;
460                 case SEN_recvmmsg:
461                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
462                         return;
463                 }
464         }
465         if (is_number_in_set(fd, &write_set)) {
466                 switch (tcp->s_ent->sen) {
467                 case SEN_write:
468                 case SEN_pwrite:
469                 case SEN_send:
470                 case SEN_sendto:
471                 case SEN_mq_timedsend:
472                         dumpstr(tcp, tcp->u_arg[1], tcp->u_arg[2]);
473                         break;
474                 case SEN_writev:
475                 case SEN_pwritev:
476                 case SEN_pwritev2:
477                 case SEN_vmsplice:
478                         dumpiov(tcp, tcp->u_arg[2], tcp->u_arg[1]);
479                         break;
480                 case SEN_sendmsg:
481                         dumpiov_in_msghdr(tcp, tcp->u_arg[1],
482                                           (unsigned long) -1L);
483                         break;
484                 case SEN_sendmmsg:
485                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
486                         break;
487                 }
488         }
489 }
490
491 /*
492  * Shuffle syscall numbers so that we don't have huge gaps in syscall table.
493  * The shuffling should be an involution: shuffle_scno(shuffle_scno(n)) == n.
494  */
495 #if defined(ARM) || defined(AARCH64) /* So far only 32-bit ARM needs this */
496 static long
497 shuffle_scno(unsigned long scno)
498 {
499         if (scno < ARM_FIRST_SHUFFLED_SYSCALL)
500                 return scno;
501
502         /* __ARM_NR_cmpxchg? Swap with LAST_ORDINARY+1 */
503         if (scno == ARM_FIRST_SHUFFLED_SYSCALL)
504                 return 0x000ffff0;
505         if (scno == 0x000ffff0)
506                 return ARM_FIRST_SHUFFLED_SYSCALL;
507
508 #define ARM_SECOND_SHUFFLED_SYSCALL (ARM_FIRST_SHUFFLED_SYSCALL + 1)
509         /*
510          * Is it ARM specific syscall?
511          * Swap [0x000f0000, 0x000f0000 + LAST_SPECIAL] range
512          * with [SECOND_SHUFFLED, SECOND_SHUFFLED + LAST_SPECIAL] range.
513          */
514         if (scno >= 0x000f0000 &&
515             scno <= 0x000f0000 + ARM_LAST_SPECIAL_SYSCALL) {
516                 return scno - 0x000f0000 + ARM_SECOND_SHUFFLED_SYSCALL;
517         }
518         if (scno <= ARM_SECOND_SHUFFLED_SYSCALL + ARM_LAST_SPECIAL_SYSCALL) {
519                 return scno + 0x000f0000 - ARM_SECOND_SHUFFLED_SYSCALL;
520         }
521
522         return scno;
523 }
524 #else
525 # define shuffle_scno(scno) ((long)(scno))
526 #endif
527
528 const char *
529 err_name(unsigned long err)
530 {
531         if ((err < nerrnos) && errnoent[err])
532                 return errnoent[err];
533
534         return NULL;
535 }
536
537 static long get_regs_error;
538
539 void
540 clear_regs(void)
541 {
542         get_regs_error = -1;
543 }
544
545 static int get_syscall_args(struct tcb *);
546 static int get_syscall_result(struct tcb *);
547 static int arch_get_scno(struct tcb *tcp);
548 static int arch_set_scno(struct tcb *, long);
549 static void get_error(struct tcb *, const bool);
550 static int arch_set_error(struct tcb *);
551
552 struct fault_opts *fault_vec[SUPPORTED_PERSONALITIES];
553
554 static struct fault_opts *
555 tcb_fault_opts(struct tcb *tcp)
556 {
557         return (SCNO_IN_RANGE(tcp->scno) && tcp->fault_vec[current_personality])
558                ? &tcp->fault_vec[current_personality][tcp->scno] : NULL;
559 }
560
561
562 static long
563 inject_syscall_fault_entering(struct tcb *tcp)
564 {
565         if (!tcp->fault_vec[current_personality]) {
566                 tcp->fault_vec[current_personality] =
567                         xcalloc(nsyscalls, sizeof(**fault_vec));
568                 memcpy(tcp->fault_vec[current_personality],
569                        fault_vec[current_personality],
570                        nsyscalls * sizeof(**fault_vec));
571         }
572
573         struct fault_opts *opts = tcb_fault_opts(tcp);
574
575         if (!opts || opts->first == 0)
576                 return 0;
577
578         --opts->first;
579
580         if (opts->first != 0)
581                 return 0;
582
583         opts->first = opts->step;
584
585         if (!arch_set_scno(tcp, -1))
586                 tcp->flags |= TCB_FAULT_INJ;
587
588         return 0;
589 }
590
591 static long
592 update_syscall_fault_exiting(struct tcb *tcp)
593 {
594         struct fault_opts *opts = tcb_fault_opts(tcp);
595
596         if (opts && opts->err && tcp->u_error != opts->err) {
597                 unsigned long u_error = tcp->u_error;
598                 tcp->u_error = opts->err;
599                 if (arch_set_error(tcp))
600                         tcp->u_error = u_error;
601         }
602
603         return 0;
604 }
605
606 static int
607 trace_syscall_entering(struct tcb *tcp)
608 {
609         int res, scno_good;
610
611         scno_good = res = get_scno(tcp);
612         if (res == 0)
613                 return res;
614         if (res == 1)
615                 res = get_syscall_args(tcp);
616
617         if (res != 1) {
618                 printleader(tcp);
619                 tprintf("%s(", scno_good == 1 ? tcp->s_ent->sys_name : "????");
620                 /*
621                  * " <unavailable>" will be added later by the code which
622                  * detects ptrace errors.
623                  */
624                 goto ret;
625         }
626
627 #ifdef LINUX_MIPSO32
628         if (SEN_syscall == tcp->s_ent->sen)
629                 decode_mips_subcall(tcp);
630 #endif
631
632 #if defined(SYS_socket_subcall) || defined(SYS_ipc_subcall)
633         switch (tcp->s_ent->sen) {
634 # ifdef SYS_socket_subcall
635                 case SEN_socketcall:
636                         decode_socket_subcall(tcp);
637                         break;
638 # endif
639 # ifdef SYS_ipc_subcall
640                 case SEN_ipc:
641                         decode_ipc_subcall(tcp);
642                         break;
643 # endif
644         }
645 #endif
646
647         /* Restrain from fault injection while the trace executes strace code. */
648         if (hide_log(tcp)) {
649                 tcp->qual_flg &= ~QUAL_FAULT;
650         }
651
652         switch (tcp->s_ent->sen) {
653                 case SEN_execve:
654                 case SEN_execveat:
655 #if defined SPARC || defined SPARC64
656                 case SEN_execv:
657 #endif
658                         tcp->flags &= ~TCB_HIDE_LOG;
659                         break;
660         }
661
662         if (!(tcp->qual_flg & QUAL_TRACE)
663          || (tracing_paths && !pathtrace_match(tcp))
664         ) {
665                 tcp->flags |= TCB_INSYSCALL | TCB_FILTERED;
666                 tcp->sys_func_rval = 0;
667                 return 0;
668         }
669
670         tcp->flags &= ~TCB_FILTERED;
671
672         if (hide_log(tcp)) {
673                 res = 0;
674                 goto ret;
675         }
676
677         if (tcp->qual_flg & QUAL_FAULT)
678                 inject_syscall_fault_entering(tcp);
679
680         if (cflag == CFLAG_ONLY_STATS) {
681                 res = 0;
682                 goto ret;
683         }
684
685 #ifdef USE_LIBUNWIND
686         if (stack_trace_enabled) {
687                 if (tcp->s_ent->sys_flags & STACKTRACE_CAPTURE_ON_ENTER)
688                         unwind_capture_stacktrace(tcp);
689         }
690 #endif
691
692         printleader(tcp);
693         tprintf("%s(", tcp->s_ent->sys_name);
694         if (tcp->qual_flg & QUAL_RAW)
695                 res = printargs(tcp);
696         else
697                 res = tcp->s_ent->sys_func(tcp);
698
699         fflush(tcp->outf);
700  ret:
701         tcp->flags |= TCB_INSYSCALL;
702         tcp->sys_func_rval = res;
703         /* Measure the entrance time as late as possible to avoid errors. */
704         if (Tflag || cflag)
705                 gettimeofday(&tcp->etime, NULL);
706         return res;
707 }
708
709 static bool
710 syscall_fault_injected(struct tcb *tcp)
711 {
712         return tcp->flags & TCB_FAULT_INJ;
713 }
714
715 static int
716 trace_syscall_exiting(struct tcb *tcp)
717 {
718         int sys_res;
719         struct timeval tv;
720         int res;
721         unsigned long u_error;
722         const char *u_error_str;
723
724         /* Measure the exit time as early as possible to avoid errors. */
725         if (Tflag || cflag)
726                 gettimeofday(&tv, NULL);
727
728 #ifdef USE_LIBUNWIND
729         if (stack_trace_enabled) {
730                 if (tcp->s_ent->sys_flags & STACKTRACE_INVALIDATE_CACHE)
731                         unwind_cache_invalidate(tcp);
732         }
733 #endif
734
735 #if SUPPORTED_PERSONALITIES > 1
736         update_personality(tcp, tcp->currpers);
737 #endif
738         res = (get_regs_error ? -1 : get_syscall_result(tcp));
739         if (filtered(tcp) || hide_log(tcp))
740                 goto ret;
741
742         if (syserror(tcp) && syscall_fault_injected(tcp))
743                 update_syscall_fault_exiting(tcp);
744
745         if (cflag) {
746                 count_syscall(tcp, &tv);
747                 if (cflag == CFLAG_ONLY_STATS) {
748                         goto ret;
749                 }
750         }
751
752         /* If not in -ff mode, and printing_tcp != tcp,
753          * then the log currently does not end with output
754          * of _our syscall entry_, but with something else.
755          * We need to say which syscall's return is this.
756          *
757          * Forced reprinting via TCB_REPRINT is used only by
758          * "strace -ff -oLOG test/threaded_execve" corner case.
759          * It's the only case when -ff mode needs reprinting.
760          */
761         if ((followfork < 2 && printing_tcp != tcp) || (tcp->flags & TCB_REPRINT)) {
762                 tcp->flags &= ~TCB_REPRINT;
763                 printleader(tcp);
764                 tprintf("<... %s resumed> ", tcp->s_ent->sys_name);
765         }
766         printing_tcp = tcp;
767
768         tcp->s_prev_ent = NULL;
769         if (res != 1) {
770                 /* There was error in one of prior ptrace ops */
771                 tprints(") ");
772                 tabto();
773                 tprints("= ? <unavailable>\n");
774                 line_ended();
775                 tcp->flags &= ~(TCB_INSYSCALL | TCB_FAULT_INJ);
776                 tcp->sys_func_rval = 0;
777                 free_tcb_priv_data(tcp);
778                 return res;
779         }
780         tcp->s_prev_ent = tcp->s_ent;
781
782         sys_res = 0;
783         if (tcp->qual_flg & QUAL_RAW) {
784                 /* sys_res = printargs(tcp); - but it's nop on sysexit */
785         } else {
786         /* FIXME: not_failing_only (IOW, option -z) is broken:
787          * failure of syscall is known only after syscall return.
788          * Thus we end up with something like this on, say, ENOENT:
789          *     open("doesnt_exist", O_RDONLY <unfinished ...>
790          *     {next syscall decode}
791          * whereas the intended result is that open(...) line
792          * is not shown at all.
793          */
794                 if (not_failing_only && tcp->u_error)
795                         goto ret;       /* ignore failed syscalls */
796                 if (tcp->sys_func_rval & RVAL_DECODED)
797                         sys_res = tcp->sys_func_rval;
798                 else
799                         sys_res = tcp->s_ent->sys_func(tcp);
800         }
801
802         tprints(") ");
803         tabto();
804         u_error = tcp->u_error;
805
806         if (tcp->qual_flg & QUAL_RAW) {
807                 if (u_error) {
808                         tprintf("= -1 (errno %lu)", u_error);
809                         if (syscall_fault_injected(tcp))
810                                 tprints(" (INJECTED)");
811                 } else {
812                         tprintf("= %#lx", tcp->u_rval);
813                 }
814         }
815         else if (!(sys_res & RVAL_NONE) && u_error) {
816                 switch (u_error) {
817                 /* Blocked signals do not interrupt any syscalls.
818                  * In this case syscalls don't return ERESTARTfoo codes.
819                  *
820                  * Deadly signals set to SIG_DFL interrupt syscalls
821                  * and kill the process regardless of which of the codes below
822                  * is returned by the interrupted syscall.
823                  * In some cases, kernel forces a kernel-generated deadly
824                  * signal to be unblocked and set to SIG_DFL (and thus cause
825                  * death) if it is blocked or SIG_IGNed: for example, SIGSEGV
826                  * or SIGILL. (The alternative is to leave process spinning
827                  * forever on the faulty instruction - not useful).
828                  *
829                  * SIG_IGNed signals and non-deadly signals set to SIG_DFL
830                  * (for example, SIGCHLD, SIGWINCH) interrupt syscalls,
831                  * but kernel will always restart them.
832                  */
833                 case ERESTARTSYS:
834                         /* Most common type of signal-interrupted syscall exit code.
835                          * The system call will be restarted with the same arguments
836                          * if SA_RESTART is set; otherwise, it will fail with EINTR.
837                          */
838                         tprints("= ? ERESTARTSYS (To be restarted if SA_RESTART is set)");
839                         break;
840                 case ERESTARTNOINTR:
841                         /* Rare. For example, fork() returns this if interrupted.
842                          * SA_RESTART is ignored (assumed set): the restart is unconditional.
843                          */
844                         tprints("= ? ERESTARTNOINTR (To be restarted)");
845                         break;
846                 case ERESTARTNOHAND:
847                         /* pause(), rt_sigsuspend() etc use this code.
848                          * SA_RESTART is ignored (assumed not set):
849                          * syscall won't restart (will return EINTR instead)
850                          * even after signal with SA_RESTART set. However,
851                          * after SIG_IGN or SIG_DFL signal it will restart
852                          * (thus the name "restart only if has no handler").
853                          */
854                         tprints("= ? ERESTARTNOHAND (To be restarted if no handler)");
855                         break;
856                 case ERESTART_RESTARTBLOCK:
857                         /* Syscalls like nanosleep(), poll() which can't be
858                          * restarted with their original arguments use this
859                          * code. Kernel will execute restart_syscall() instead,
860                          * which changes arguments before restarting syscall.
861                          * SA_RESTART is ignored (assumed not set) similarly
862                          * to ERESTARTNOHAND. (Kernel can't honor SA_RESTART
863                          * since restart data is saved in "restart block"
864                          * in task struct, and if signal handler uses a syscall
865                          * which in turn saves another such restart block,
866                          * old data is lost and restart becomes impossible)
867                          */
868                         tprints("= ? ERESTART_RESTARTBLOCK (Interrupted by signal)");
869                         break;
870                 default:
871                         u_error_str = err_name(u_error);
872                         if (u_error_str)
873                                 tprintf("= -1 %s (%s)",
874                                         u_error_str, strerror(u_error));
875                         else
876                                 tprintf("= -1 %lu (%s)",
877                                         u_error, strerror(u_error));
878                         break;
879                 }
880                 if (syscall_fault_injected(tcp))
881                         tprintf(" (INJECTED)");
882                 if ((sys_res & RVAL_STR) && tcp->auxstr)
883                         tprintf(" (%s)", tcp->auxstr);
884         }
885         else {
886                 if (sys_res & RVAL_NONE)
887                         tprints("= ?");
888                 else {
889                         switch (sys_res & RVAL_MASK) {
890                         case RVAL_HEX:
891 #if SUPPORTED_PERSONALITIES > 1
892                                 if (current_wordsize < sizeof(long))
893                                         tprintf("= %#x",
894                                                 (unsigned int) tcp->u_rval);
895                                 else
896 #endif
897                                         tprintf("= %#lx", tcp->u_rval);
898                                 break;
899                         case RVAL_OCTAL:
900                                 tprints("= ");
901                                 print_numeric_long_umask(tcp->u_rval);
902                                 break;
903                         case RVAL_UDECIMAL:
904 #if SUPPORTED_PERSONALITIES > 1
905                                 if (current_wordsize < sizeof(long))
906                                         tprintf("= %u",
907                                                 (unsigned int) tcp->u_rval);
908                                 else
909 #endif
910                                         tprintf("= %lu", tcp->u_rval);
911                                 break;
912                         case RVAL_DECIMAL:
913                                 tprintf("= %ld", tcp->u_rval);
914                                 break;
915                         case RVAL_FD:
916                                 if (show_fd_path) {
917                                         tprints("= ");
918                                         printfd(tcp, tcp->u_rval);
919                                 }
920                                 else
921                                         tprintf("= %ld", tcp->u_rval);
922                                 break;
923 #if HAVE_STRUCT_TCB_EXT_ARG
924                         /*
925                         case RVAL_LHEX:
926                                 tprintf("= %#llx", tcp->u_lrval);
927                                 break;
928                         case RVAL_LOCTAL:
929                                 tprintf("= %#llo", tcp->u_lrval);
930                                 break;
931                         */
932                         case RVAL_LUDECIMAL:
933                                 tprintf("= %llu", tcp->u_lrval);
934                                 break;
935                         /*
936                         case RVAL_LDECIMAL:
937                                 tprintf("= %lld", tcp->u_lrval);
938                                 break;
939                         */
940 #endif /* HAVE_STRUCT_TCB_EXT_ARG */
941                         default:
942                                 error_msg("invalid rval format");
943                                 break;
944                         }
945                 }
946                 if ((sys_res & RVAL_STR) && tcp->auxstr)
947                         tprintf(" (%s)", tcp->auxstr);
948         }
949         if (Tflag) {
950                 tv_sub(&tv, &tv, &tcp->etime);
951                 tprintf(" <%ld.%06ld>",
952                         (long) tv.tv_sec, (long) tv.tv_usec);
953         }
954         tprints("\n");
955         dumpio(tcp);
956         line_ended();
957
958 #ifdef USE_LIBUNWIND
959         if (stack_trace_enabled)
960                 unwind_print_stacktrace(tcp);
961 #endif
962
963  ret:
964         tcp->flags &= ~(TCB_INSYSCALL | TCB_FAULT_INJ);
965         tcp->sys_func_rval = 0;
966         free_tcb_priv_data(tcp);
967         return 0;
968 }
969
970 int
971 trace_syscall(struct tcb *tcp)
972 {
973         return exiting(tcp) ?
974                 trace_syscall_exiting(tcp) : trace_syscall_entering(tcp);
975 }
976
977 bool
978 is_erestart(struct tcb *tcp)
979 {
980         switch (tcp->u_error) {
981                 case ERESTARTSYS:
982                 case ERESTARTNOINTR:
983                 case ERESTARTNOHAND:
984                 case ERESTART_RESTARTBLOCK:
985                         return true;
986                 default:
987                         return false;
988         }
989 }
990
991 static unsigned long saved_u_error;
992
993 void
994 temporarily_clear_syserror(struct tcb *tcp)
995 {
996         saved_u_error = tcp->u_error;
997         tcp->u_error = 0;
998 }
999
1000 void
1001 restore_cleared_syserror(struct tcb *tcp)
1002 {
1003         tcp->u_error = saved_u_error;
1004 }
1005
1006 /*
1007  * Check the syscall return value register value for whether it is
1008  * a negated errno code indicating an error, or a success return value.
1009  */
1010 static inline bool
1011 is_negated_errno(kernel_ulong_t val)
1012 {
1013         /* Linux kernel defines MAX_ERRNO to 4095. */
1014         kernel_ulong_t max = -(kernel_long_t) 4095;
1015
1016 #if defined X86_64 || defined X32
1017         /*
1018          * current_wordsize is 4 for x32 personality
1019          * but truncation _must not_ be done in it, so
1020          * check current_personality instead.
1021          */
1022         if (current_personality == 1) {
1023                 val = (uint32_t) val;
1024                 max = (uint32_t) max;
1025         }
1026 #elif SUPPORTED_PERSONALITIES > 1 && SIZEOF_LONG > 4
1027         if (current_wordsize < sizeof(val)) {
1028                 val = (uint32_t) val;
1029                 max = (uint32_t) max;
1030         }
1031 #endif
1032
1033         return val >= max;
1034 }
1035
1036 #include "arch_regs.c"
1037
1038 #ifdef HAVE_GETRVAL2
1039 # include "arch_getrval2.c"
1040 #endif
1041
1042 void
1043 print_pc(struct tcb *tcp)
1044 {
1045 #if defined ARCH_PC_REG
1046 # define ARCH_GET_PC 0
1047 #elif defined ARCH_PC_PEEK_ADDR
1048         long pc;
1049 # define ARCH_PC_REG pc
1050 # define ARCH_GET_PC upeek(tcp->pid, ARCH_PC_PEEK_ADDR, &pc)
1051 #else
1052 # error Neither ARCH_PC_REG nor ARCH_PC_PEEK_ADDR is defined
1053 #endif
1054         if (get_regs_error || ARCH_GET_PC)
1055                 tprints(current_wordsize == 4 ? "[????????] "
1056                                               : "[????????????????] ");
1057         else
1058                 tprintf(current_wordsize == 4 ? "[%08lx] " : "[%016lx] ",
1059                         (unsigned long) ARCH_PC_REG);
1060 }
1061
1062 #include "getregs_old.h"
1063
1064 #undef ptrace_getregset_or_getregs
1065 #undef ptrace_setregset_or_setregs
1066 #ifdef ARCH_REGS_FOR_GETREGSET
1067
1068 # define ptrace_getregset_or_getregs ptrace_getregset
1069 static long
1070 ptrace_getregset(pid_t pid)
1071 {
1072 # ifdef ARCH_IOVEC_FOR_GETREGSET
1073         /* variable iovec */
1074         ARCH_IOVEC_FOR_GETREGSET.iov_len = sizeof(ARCH_REGS_FOR_GETREGSET);
1075         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS,
1076                       &ARCH_IOVEC_FOR_GETREGSET);
1077 # else
1078         /* constant iovec */
1079         static struct iovec io = {
1080                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1081                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1082         };
1083         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &io);
1084
1085 # endif
1086 }
1087
1088 # ifndef HAVE_GETREGS_OLD
1089 #  define ptrace_setregset_or_setregs ptrace_setregset
1090 static int
1091 ptrace_setregset(pid_t pid)
1092 {
1093 #  ifdef ARCH_IOVEC_FOR_GETREGSET
1094         /* variable iovec */
1095         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS,
1096                       &ARCH_IOVEC_FOR_GETREGSET);
1097 #  else
1098         /* constant iovec */
1099         static struct iovec io = {
1100                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1101                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1102         };
1103         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &io);
1104 #  endif
1105 }
1106 # endif /* !HAVE_GETREGS_OLD */
1107
1108 #elif defined ARCH_REGS_FOR_GETREGS
1109
1110 # define ptrace_getregset_or_getregs ptrace_getregs
1111 static long
1112 ptrace_getregs(pid_t pid)
1113 {
1114 # if defined SPARC || defined SPARC64
1115         /* SPARC systems have the meaning of data and addr reversed */
1116         return ptrace(PTRACE_GETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1117 # else
1118         return ptrace(PTRACE_GETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1119 # endif
1120 }
1121
1122 # ifndef HAVE_GETREGS_OLD
1123 #  define ptrace_setregset_or_setregs ptrace_setregs
1124 static int
1125 ptrace_setregs(pid_t pid)
1126 {
1127 #  if defined SPARC || defined SPARC64
1128         /* SPARC systems have the meaning of data and addr reversed */
1129         return ptrace(PTRACE_SETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1130 #  else
1131         return ptrace(PTRACE_SETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1132 #  endif
1133 }
1134 # endif /* !HAVE_GETREGS_OLD */
1135
1136 #endif /* ARCH_REGS_FOR_GETREGSET || ARCH_REGS_FOR_GETREGS */
1137
1138 void
1139 get_regs(pid_t pid)
1140 {
1141 #undef USE_GET_SYSCALL_RESULT_REGS
1142 #ifdef ptrace_getregset_or_getregs
1143
1144 # ifdef HAVE_GETREGS_OLD
1145         /*
1146          * Try PTRACE_GETREGSET/PTRACE_GETREGS first,
1147          * fallback to getregs_old.
1148          */
1149         static int use_getregs_old;
1150         if (use_getregs_old < 0) {
1151                 get_regs_error = ptrace_getregset_or_getregs(pid);
1152                 return;
1153         } else if (use_getregs_old == 0) {
1154                 get_regs_error = ptrace_getregset_or_getregs(pid);
1155                 if (get_regs_error >= 0) {
1156                         use_getregs_old = -1;
1157                         return;
1158                 }
1159                 if (errno == EPERM || errno == ESRCH)
1160                         return;
1161                 use_getregs_old = 1;
1162         }
1163         get_regs_error = getregs_old(pid);
1164 # else /* !HAVE_GETREGS_OLD */
1165         /* Assume that PTRACE_GETREGSET/PTRACE_GETREGS works. */
1166         get_regs_error = ptrace_getregset_or_getregs(pid);
1167 # endif /* !HAVE_GETREGS_OLD */
1168
1169 #else /* !ptrace_getregset_or_getregs */
1170
1171 # define USE_GET_SYSCALL_RESULT_REGS 1
1172 # warning get_regs is not implemented for this architecture yet
1173         get_regs_error = 0;
1174
1175 #endif /* !ptrace_getregset_or_getregs */
1176 }
1177
1178 #ifdef ptrace_setregset_or_setregs
1179 static int
1180 set_regs(pid_t pid)
1181 {
1182         return ptrace_setregset_or_setregs(pid);
1183 }
1184 #endif /* ptrace_setregset_or_setregs */
1185
1186 struct sysent_buf {
1187         struct tcb *tcp;
1188         struct_sysent ent;
1189         char buf[sizeof("syscall_%lu") + sizeof(long) * 3];
1190 };
1191
1192 static void
1193 free_sysent_buf(void *ptr)
1194 {
1195         struct sysent_buf *s = ptr;
1196         s->tcp->s_prev_ent = s->tcp->s_ent = NULL;
1197         free(ptr);
1198 }
1199
1200 /*
1201  * Returns:
1202  * 0: "ignore this ptrace stop", bail out of trace_syscall_entering() silently.
1203  * 1: ok, continue in trace_syscall_entering().
1204  * other: error, trace_syscall_entering() should print error indicator
1205  *    ("????" etc) and bail out.
1206  */
1207 int
1208 get_scno(struct tcb *tcp)
1209 {
1210         if (get_regs_error)
1211                 return -1;
1212
1213         int rc = arch_get_scno(tcp);
1214         if (rc != 1)
1215                 return rc;
1216
1217         if (SCNO_IS_VALID(tcp->scno)) {
1218                 tcp->s_ent = &sysent[tcp->scno];
1219                 tcp->qual_flg = qual_flags(tcp->scno);
1220         } else {
1221                 struct sysent_buf *s = xcalloc(1, sizeof(*s));
1222
1223                 s->tcp = tcp;
1224                 s->ent.nargs = MAX_ARGS;
1225                 s->ent.sen = SEN_printargs;
1226                 s->ent.sys_func = printargs;
1227                 s->ent.sys_name = s->buf;
1228                 sprintf(s->buf, "syscall_%lu", shuffle_scno(tcp->scno));
1229
1230                 tcp->s_ent = &s->ent;
1231                 tcp->qual_flg = QUAL_RAW | DEFAULT_QUAL_FLAGS;
1232
1233                 set_tcb_priv_data(tcp, s, free_sysent_buf);
1234
1235                 if (debug_flag)
1236                         error_msg("pid %d invalid syscall %ld", tcp->pid, tcp->scno);
1237         }
1238         return 1;
1239 }
1240
1241 #ifdef USE_GET_SYSCALL_RESULT_REGS
1242 static int get_syscall_result_regs(struct tcb *);
1243 #endif
1244
1245 /* Returns:
1246  * 1: ok, continue in trace_syscall_exiting().
1247  * -1: error, trace_syscall_exiting() should print error indicator
1248  *    ("????" etc) and bail out.
1249  */
1250 static int
1251 get_syscall_result(struct tcb *tcp)
1252 {
1253 #ifdef USE_GET_SYSCALL_RESULT_REGS
1254         if (get_syscall_result_regs(tcp))
1255                 return -1;
1256 #endif
1257         tcp->u_error = 0;
1258         get_error(tcp, !(tcp->s_ent->sys_flags & SYSCALL_NEVER_FAILS));
1259
1260         return 1;
1261 }
1262
1263 #include "get_scno.c"
1264 #include "set_scno.c"
1265 #include "get_syscall_args.c"
1266 #ifdef USE_GET_SYSCALL_RESULT_REGS
1267 # include "get_syscall_result.c"
1268 #endif
1269 #include "get_error.c"
1270 #include "set_error.c"
1271 #ifdef HAVE_GETREGS_OLD
1272 # include "getregs_old.c"
1273 #endif
1274
1275 const char *
1276 syscall_name(long scno)
1277 {
1278 #if defined X32_PERSONALITY_NUMBER && defined __X32_SYSCALL_BIT
1279         if (current_personality == X32_PERSONALITY_NUMBER)
1280                 scno &= ~__X32_SYSCALL_BIT;
1281 #endif
1282         return SCNO_IS_VALID(scno) ? sysent[scno].sys_name: NULL;
1283 }