]> granicus.if.org Git - strace/blob - syscall.c
Move NSIG ifdefery to a separate header file
[strace] / syscall.c
1 /*
2  * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3  * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4  * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5  * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6  * Copyright (c) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
7  *                     Linux for s390 port by D.J. Barrow
8  *                    <barrow_dj@mail.yahoo.com,djbarrow@de.ibm.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The name of the author may not be used to endorse or promote products
20  *    derived from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include "defs.h"
35 #include "native_defs.h"
36 #include "nsig.h"
37 #include <sys/param.h>
38
39 /* for struct iovec */
40 #include <sys/uio.h>
41
42 #include "regs.h"
43 #include "ptrace.h"
44
45 #if defined(SPARC64)
46 # undef PTRACE_GETREGS
47 # define PTRACE_GETREGS PTRACE_GETREGS64
48 # undef PTRACE_SETREGS
49 # define PTRACE_SETREGS PTRACE_SETREGS64
50 #endif
51
52 #if defined SPARC64
53 # include <asm/psrcompat.h>
54 #elif defined SPARC
55 # include <asm/psr.h>
56 #endif
57
58 #ifdef IA64
59 # include <asm/rse.h>
60 #endif
61
62 #ifndef NT_PRSTATUS
63 # define NT_PRSTATUS 1
64 #endif
65
66 #include "syscall.h"
67
68 /* Define these shorthand notations to simplify the syscallent files. */
69 #define TD TRACE_DESC
70 #define TF TRACE_FILE
71 #define TI TRACE_IPC
72 #define TN TRACE_NETWORK
73 #define TP TRACE_PROCESS
74 #define TS TRACE_SIGNAL
75 #define TM TRACE_MEMORY
76 #define NF SYSCALL_NEVER_FAILS
77 #define MA MAX_ARGS
78 #define SI STACKTRACE_INVALIDATE_CACHE
79 #define SE STACKTRACE_CAPTURE_ON_ENTER
80 #define CST COMPAT_SYSCALL_TYPES
81
82 #define SEN(syscall_name) SEN_ ## syscall_name, SYS_FUNC_NAME(sys_ ## syscall_name)
83
84 const struct_sysent sysent0[] = {
85 #include "syscallent.h"
86 };
87
88 #if SUPPORTED_PERSONALITIES > 1
89 # include PERSONALITY1_INCLUDE_FUNCS
90 static const struct_sysent sysent1[] = {
91 # include "syscallent1.h"
92 };
93 #endif
94
95 #if SUPPORTED_PERSONALITIES > 2
96 # include PERSONALITY2_INCLUDE_FUNCS
97 static const struct_sysent sysent2[] = {
98 # include "syscallent2.h"
99 };
100 #endif
101
102 /* Now undef them since short defines cause wicked namespace pollution. */
103 #undef SEN
104 #undef TD
105 #undef TF
106 #undef TI
107 #undef TN
108 #undef TP
109 #undef TS
110 #undef TM
111 #undef NF
112 #undef MA
113 #undef SI
114 #undef SE
115 #undef CST
116
117 /*
118  * `ioctlent[012].h' files are automatically generated by the auxiliary
119  * program `ioctlsort', such that the list is sorted by the `code' field.
120  * This has the side-effect of resolving the _IO.. macros into
121  * plain integers, eliminating the need to include here everything
122  * in "/usr/include".
123  */
124
125 const char *const errnoent0[] = {
126 #include "errnoent.h"
127 };
128 const char *const signalent0[] = {
129 #include "signalent.h"
130 };
131 const struct_ioctlent ioctlent0[] = {
132 #include "ioctlent0.h"
133 };
134
135 #if SUPPORTED_PERSONALITIES > 1
136 static const char *const errnoent1[] = {
137 # include "errnoent1.h"
138 };
139 static const char *const signalent1[] = {
140 # include "signalent1.h"
141 };
142 static const struct_ioctlent ioctlent1[] = {
143 # include "ioctlent1.h"
144 };
145 # include PERSONALITY0_INCLUDE_PRINTERS_DECLS
146 static const struct_printers printers0 = {
147 # include PERSONALITY0_INCLUDE_PRINTERS_DEFS
148 };
149 # include PERSONALITY1_INCLUDE_PRINTERS_DECLS
150 static const struct_printers printers1 = {
151 # include PERSONALITY1_INCLUDE_PRINTERS_DEFS
152 };
153 #endif
154
155 #if SUPPORTED_PERSONALITIES > 2
156 static const char *const errnoent2[] = {
157 # include "errnoent2.h"
158 };
159 static const char *const signalent2[] = {
160 # include "signalent2.h"
161 };
162 static const struct_ioctlent ioctlent2[] = {
163 # include "ioctlent2.h"
164 };
165 # include PERSONALITY2_INCLUDE_PRINTERS_DECLS
166 static const struct_printers printers2 = {
167 # include PERSONALITY2_INCLUDE_PRINTERS_DEFS
168 };
169 #endif
170
171 enum {
172         nsyscalls0 = ARRAY_SIZE(sysent0)
173 #if SUPPORTED_PERSONALITIES > 1
174         , nsyscalls1 = ARRAY_SIZE(sysent1)
175 # if SUPPORTED_PERSONALITIES > 2
176         , nsyscalls2 = ARRAY_SIZE(sysent2)
177 # endif
178 #endif
179 };
180
181 enum {
182         nerrnos0 = ARRAY_SIZE(errnoent0)
183 #if SUPPORTED_PERSONALITIES > 1
184         , nerrnos1 = ARRAY_SIZE(errnoent1)
185 # if SUPPORTED_PERSONALITIES > 2
186         , nerrnos2 = ARRAY_SIZE(errnoent2)
187 # endif
188 #endif
189 };
190
191 enum {
192         nsignals0 = ARRAY_SIZE(signalent0)
193 #if SUPPORTED_PERSONALITIES > 1
194         , nsignals1 = ARRAY_SIZE(signalent1)
195 # if SUPPORTED_PERSONALITIES > 2
196         , nsignals2 = ARRAY_SIZE(signalent2)
197 # endif
198 #endif
199 };
200
201 enum {
202         nioctlents0 = ARRAY_SIZE(ioctlent0)
203 #if SUPPORTED_PERSONALITIES > 1
204         , nioctlents1 = ARRAY_SIZE(ioctlent1)
205 # if SUPPORTED_PERSONALITIES > 2
206         , nioctlents2 = ARRAY_SIZE(ioctlent2)
207 # endif
208 #endif
209 };
210
211 #if SUPPORTED_PERSONALITIES > 1
212 const struct_sysent *sysent = sysent0;
213 const char *const *errnoent = errnoent0;
214 const char *const *signalent = signalent0;
215 const struct_ioctlent *ioctlent = ioctlent0;
216 const struct_printers *printers = &printers0;
217 #endif
218
219 unsigned nsyscalls = nsyscalls0;
220 unsigned nerrnos = nerrnos0;
221 unsigned nsignals = nsignals0;
222 unsigned nioctlents = nioctlents0;
223
224 const unsigned int nsyscall_vec[SUPPORTED_PERSONALITIES] = {
225         nsyscalls0,
226 #if SUPPORTED_PERSONALITIES > 1
227         nsyscalls1,
228 #endif
229 #if SUPPORTED_PERSONALITIES > 2
230         nsyscalls2,
231 #endif
232 };
233 const struct_sysent *const sysent_vec[SUPPORTED_PERSONALITIES] = {
234         sysent0,
235 #if SUPPORTED_PERSONALITIES > 1
236         sysent1,
237 #endif
238 #if SUPPORTED_PERSONALITIES > 2
239         sysent2,
240 #endif
241 };
242
243 #if SUPPORTED_PERSONALITIES > 1
244 unsigned current_personality;
245
246 # ifndef current_wordsize
247 unsigned current_wordsize;
248 static const int personality_wordsize[SUPPORTED_PERSONALITIES] = {
249         PERSONALITY0_WORDSIZE,
250         PERSONALITY1_WORDSIZE,
251 # if SUPPORTED_PERSONALITIES > 2
252         PERSONALITY2_WORDSIZE,
253 # endif
254 };
255 # endif
256
257 # ifndef current_klongsize
258 unsigned current_klongsize;
259 static const int personality_klongsize[SUPPORTED_PERSONALITIES] = {
260         PERSONALITY0_KLONGSIZE,
261         PERSONALITY1_KLONGSIZE,
262 #  if SUPPORTED_PERSONALITIES > 2
263         PERSONALITY2_KLONGSIZE,
264 #  endif
265 };
266 # endif
267
268 void
269 set_personality(int personality)
270 {
271         nsyscalls = nsyscall_vec[personality];
272         sysent = sysent_vec[personality];
273
274         switch (personality) {
275         case 0:
276                 errnoent = errnoent0;
277                 nerrnos = nerrnos0;
278                 ioctlent = ioctlent0;
279                 nioctlents = nioctlents0;
280                 signalent = signalent0;
281                 nsignals = nsignals0;
282                 printers = &printers0;
283                 break;
284
285         case 1:
286                 errnoent = errnoent1;
287                 nerrnos = nerrnos1;
288                 ioctlent = ioctlent1;
289                 nioctlents = nioctlents1;
290                 signalent = signalent1;
291                 nsignals = nsignals1;
292                 printers = &printers1;
293                 break;
294
295 # if SUPPORTED_PERSONALITIES > 2
296         case 2:
297                 errnoent = errnoent2;
298                 nerrnos = nerrnos2;
299                 ioctlent = ioctlent2;
300                 nioctlents = nioctlents2;
301                 signalent = signalent2;
302                 nsignals = nsignals2;
303                 printers = &printers2;
304                 break;
305 # endif
306         }
307
308         current_personality = personality;
309 # ifndef current_wordsize
310         current_wordsize = personality_wordsize[personality];
311 # endif
312 # ifndef current_klongsize
313         current_klongsize = personality_klongsize[personality];
314 # endif
315 }
316
317 static void
318 update_personality(struct tcb *tcp, unsigned int personality)
319 {
320         if (personality == current_personality)
321                 return;
322         set_personality(personality);
323
324         if (personality == tcp->currpers)
325                 return;
326         tcp->currpers = personality;
327
328 # undef PERSONALITY_NAMES
329 # if defined POWERPC64
330 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
331 # elif defined X86_64
332 #  define PERSONALITY_NAMES {"64 bit", "32 bit", "x32"}
333 # elif defined X32
334 #  define PERSONALITY_NAMES {"x32", "32 bit"}
335 # elif defined AARCH64
336 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
337 # elif defined TILE
338 #  define PERSONALITY_NAMES {"64-bit", "32-bit"}
339 # endif
340 # ifdef PERSONALITY_NAMES
341         if (!qflag) {
342                 static const char *const names[] = PERSONALITY_NAMES;
343                 error_msg("[ Process PID=%d runs in %s mode. ]",
344                           tcp->pid, names[personality]);
345         }
346 # endif
347 }
348 #endif
349
350 #ifdef SYS_socket_subcall
351 static void
352 decode_socket_subcall(struct tcb *tcp)
353 {
354         const int call = tcp->u_arg[0];
355
356         if (call < 1 || call >= SYS_socket_nsubcalls)
357                 return;
358
359         const kernel_ulong_t scno = SYS_socket_subcall + call;
360         const unsigned int nargs = sysent[scno].nargs;
361         uint64_t buf[nargs];
362
363         if (umoven(tcp, tcp->u_arg[1], nargs * current_wordsize, buf) < 0)
364                 return;
365
366         tcp->scno = scno;
367         tcp->qual_flg = qual_flags(scno);
368         tcp->s_ent = &sysent[scno];
369
370         unsigned int i;
371         for (i = 0; i < nargs; ++i)
372                 tcp->u_arg[i] = (sizeof(uint32_t) == current_wordsize)
373                                 ? ((uint32_t *) (void *) buf)[i] : buf[i];
374 }
375 #endif
376
377 #ifdef SYS_ipc_subcall
378 static void
379 decode_ipc_subcall(struct tcb *tcp)
380 {
381         unsigned int call = tcp->u_arg[0];
382         const unsigned int version = call >> 16;
383
384         if (version) {
385 # if defined S390 || defined S390X
386                 return;
387 # else
388 #  ifdef SPARC64
389                 if (current_wordsize == 8)
390                         return;
391 #  endif
392                 set_tcb_priv_ulong(tcp, version);
393                 call &= 0xffff;
394 # endif
395         }
396
397         switch (call) {
398                 case  1: case  2: case  3: case  4:
399                 case 11: case 12: case 13: case 14:
400                 case 21: case 22: case 23: case 24:
401                         break;
402                 default:
403                         return;
404         }
405
406         tcp->scno = SYS_ipc_subcall + call;
407         tcp->qual_flg = qual_flags(tcp->scno);
408         tcp->s_ent = &sysent[tcp->scno];
409
410         const unsigned int n = tcp->s_ent->nargs;
411         unsigned int i;
412         for (i = 0; i < n; i++)
413                 tcp->u_arg[i] = tcp->u_arg[i + 1];
414 }
415 #endif
416
417 #ifdef LINUX_MIPSO32
418 static void
419 decode_mips_subcall(struct tcb *tcp)
420 {
421         if (!scno_is_valid(tcp->u_arg[0]))
422                 return;
423         tcp->scno = tcp->u_arg[0];
424         tcp->qual_flg = qual_flags(tcp->scno);
425         tcp->s_ent = &sysent[tcp->scno];
426         memmove(&tcp->u_arg[0], &tcp->u_arg[1],
427                 sizeof(tcp->u_arg) - sizeof(tcp->u_arg[0]));
428         /*
429          * Fetching the last arg of 7-arg syscalls (fadvise64_64
430          * and sync_file_range) requires additional code,
431          * see linux/mips/get_syscall_args.c
432          */
433         if (tcp->s_ent->nargs == MAX_ARGS) {
434                 if (umoven(tcp,
435                            mips_REG_SP + MAX_ARGS * sizeof(tcp->u_arg[0]),
436                            sizeof(tcp->u_arg[0]),
437                            &tcp->u_arg[MAX_ARGS - 1]) < 0)
438                 tcp->u_arg[MAX_ARGS - 1] = 0;
439         }
440 }
441 #endif /* LINUX_MIPSO32 */
442
443 static void
444 dumpio(struct tcb *tcp)
445 {
446         if (syserror(tcp))
447                 return;
448
449         int fd = tcp->u_arg[0];
450         if (fd < 0)
451                 return;
452
453         if (is_number_in_set(fd, &read_set)) {
454                 switch (tcp->s_ent->sen) {
455                 case SEN_read:
456                 case SEN_pread:
457                 case SEN_recv:
458                 case SEN_recvfrom:
459                 case SEN_mq_timedreceive:
460                         dumpstr(tcp, tcp->u_arg[1], tcp->u_rval);
461                         return;
462                 case SEN_readv:
463                 case SEN_preadv:
464                 case SEN_preadv2:
465                         dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1],
466                                      tcp->u_rval);
467                         return;
468                 case SEN_recvmsg:
469                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], tcp->u_rval);
470                         return;
471                 case SEN_recvmmsg:
472                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
473                         return;
474                 }
475         }
476         if (is_number_in_set(fd, &write_set)) {
477                 switch (tcp->s_ent->sen) {
478                 case SEN_write:
479                 case SEN_pwrite:
480                 case SEN_send:
481                 case SEN_sendto:
482                 case SEN_mq_timedsend:
483                         dumpstr(tcp, tcp->u_arg[1], tcp->u_arg[2]);
484                         break;
485                 case SEN_writev:
486                 case SEN_pwritev:
487                 case SEN_pwritev2:
488                 case SEN_vmsplice:
489                         dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1], -1);
490                         break;
491                 case SEN_sendmsg:
492                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], -1);
493                         break;
494                 case SEN_sendmmsg:
495                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
496                         break;
497                 }
498         }
499 }
500
501 /*
502  * Shuffle syscall numbers so that we don't have huge gaps in syscall table.
503  * The shuffling should be an involution: shuffle_scno(shuffle_scno(n)) == n.
504  */
505 static kernel_ulong_t
506 shuffle_scno(kernel_ulong_t scno)
507 {
508 #if defined(ARM) || defined(AARCH64) /* So far only 32-bit ARM needs this */
509         if (scno < ARM_FIRST_SHUFFLED_SYSCALL)
510                 return scno;
511
512         /* __ARM_NR_cmpxchg? Swap with LAST_ORDINARY+1 */
513         if (scno == ARM_FIRST_SHUFFLED_SYSCALL)
514                 return 0x000ffff0;
515         if (scno == 0x000ffff0)
516                 return ARM_FIRST_SHUFFLED_SYSCALL;
517
518 # define ARM_SECOND_SHUFFLED_SYSCALL (ARM_FIRST_SHUFFLED_SYSCALL + 1)
519         /*
520          * Is it ARM specific syscall?
521          * Swap [0x000f0000, 0x000f0000 + LAST_SPECIAL] range
522          * with [SECOND_SHUFFLED, SECOND_SHUFFLED + LAST_SPECIAL] range.
523          */
524         if (scno >= 0x000f0000 &&
525             scno <= 0x000f0000 + ARM_LAST_SPECIAL_SYSCALL) {
526                 return scno - 0x000f0000 + ARM_SECOND_SHUFFLED_SYSCALL;
527         }
528         if (scno <= ARM_SECOND_SHUFFLED_SYSCALL + ARM_LAST_SPECIAL_SYSCALL) {
529                 return scno + 0x000f0000 - ARM_SECOND_SHUFFLED_SYSCALL;
530         }
531 #endif /* ARM || AARCH64 */
532
533         return scno;
534 }
535
536 const char *
537 err_name(unsigned long err)
538 {
539         if ((err < nerrnos) && errnoent[err])
540                 return errnoent[err];
541
542         return NULL;
543 }
544
545 static long get_regs_error;
546
547 void
548 clear_regs(void)
549 {
550         get_regs_error = -1;
551 }
552
553 static int get_syscall_args(struct tcb *);
554 static int get_syscall_result(struct tcb *);
555 static int arch_get_scno(struct tcb *tcp);
556 static int arch_set_scno(struct tcb *, kernel_ulong_t);
557 static void get_error(struct tcb *, const bool);
558 static int arch_set_error(struct tcb *);
559
560 struct fault_opts *fault_vec[SUPPORTED_PERSONALITIES];
561
562 static struct fault_opts *
563 tcb_fault_opts(struct tcb *tcp)
564 {
565         return (scno_in_range(tcp->scno) && tcp->fault_vec[current_personality])
566                ? &tcp->fault_vec[current_personality][tcp->scno] : NULL;
567 }
568
569
570 static long
571 inject_syscall_fault_entering(struct tcb *tcp)
572 {
573         if (!tcp->fault_vec[current_personality]) {
574                 tcp->fault_vec[current_personality] =
575                         xcalloc(nsyscalls, sizeof(**fault_vec));
576                 memcpy(tcp->fault_vec[current_personality],
577                        fault_vec[current_personality],
578                        nsyscalls * sizeof(**fault_vec));
579         }
580
581         struct fault_opts *opts = tcb_fault_opts(tcp);
582
583         if (!opts || opts->first == 0)
584                 return 0;
585
586         --opts->first;
587
588         if (opts->first != 0)
589                 return 0;
590
591         opts->first = opts->step;
592
593         if (!arch_set_scno(tcp, -1))
594                 tcp->flags |= TCB_FAULT_INJ;
595
596         return 0;
597 }
598
599 static long
600 update_syscall_fault_exiting(struct tcb *tcp)
601 {
602         struct fault_opts *opts = tcb_fault_opts(tcp);
603
604         if (opts && opts->err && tcp->u_error != opts->err) {
605                 unsigned long u_error = tcp->u_error;
606                 tcp->u_error = opts->err;
607                 if (arch_set_error(tcp))
608                         tcp->u_error = u_error;
609         }
610
611         return 0;
612 }
613
614 static int
615 trace_syscall_entering(struct tcb *tcp)
616 {
617         int res, scno_good;
618
619         scno_good = res = get_scno(tcp);
620         if (res == 0)
621                 return res;
622         if (res == 1)
623                 res = get_syscall_args(tcp);
624
625         if (res != 1) {
626                 printleader(tcp);
627                 tprintf("%s(", scno_good == 1 ? tcp->s_ent->sys_name : "????");
628                 /*
629                  * " <unavailable>" will be added later by the code which
630                  * detects ptrace errors.
631                  */
632                 goto ret;
633         }
634
635 #ifdef LINUX_MIPSO32
636         if (SEN_syscall == tcp->s_ent->sen)
637                 decode_mips_subcall(tcp);
638 #endif
639
640 #if defined(SYS_socket_subcall) || defined(SYS_ipc_subcall)
641         switch (tcp->s_ent->sen) {
642 # ifdef SYS_socket_subcall
643                 case SEN_socketcall:
644                         decode_socket_subcall(tcp);
645                         break;
646 # endif
647 # ifdef SYS_ipc_subcall
648                 case SEN_ipc:
649                         decode_ipc_subcall(tcp);
650                         break;
651 # endif
652         }
653 #endif
654
655         /* Restrain from fault injection while the trace executes strace code. */
656         if (hide_log(tcp)) {
657                 tcp->qual_flg &= ~QUAL_FAULT;
658         }
659
660         switch (tcp->s_ent->sen) {
661                 case SEN_execve:
662                 case SEN_execveat:
663 #if defined SPARC || defined SPARC64
664                 case SEN_execv:
665 #endif
666                         tcp->flags &= ~TCB_HIDE_LOG;
667                         break;
668         }
669
670         if (!(tcp->qual_flg & QUAL_TRACE)
671          || (tracing_paths && !pathtrace_match(tcp))
672         ) {
673                 tcp->flags |= TCB_INSYSCALL | TCB_FILTERED;
674                 tcp->sys_func_rval = 0;
675                 return 0;
676         }
677
678         tcp->flags &= ~TCB_FILTERED;
679
680         if (hide_log(tcp)) {
681                 res = 0;
682                 goto ret;
683         }
684
685         if (tcp->qual_flg & QUAL_FAULT)
686                 inject_syscall_fault_entering(tcp);
687
688         if (cflag == CFLAG_ONLY_STATS) {
689                 res = 0;
690                 goto ret;
691         }
692
693 #ifdef USE_LIBUNWIND
694         if (stack_trace_enabled) {
695                 if (tcp->s_ent->sys_flags & STACKTRACE_CAPTURE_ON_ENTER)
696                         unwind_capture_stacktrace(tcp);
697         }
698 #endif
699
700         printleader(tcp);
701         tprintf("%s(", tcp->s_ent->sys_name);
702         if (tcp->qual_flg & QUAL_RAW)
703                 res = printargs(tcp);
704         else
705                 res = tcp->s_ent->sys_func(tcp);
706
707         fflush(tcp->outf);
708  ret:
709         tcp->flags |= TCB_INSYSCALL;
710         tcp->sys_func_rval = res;
711         /* Measure the entrance time as late as possible to avoid errors. */
712         if (Tflag || cflag)
713                 gettimeofday(&tcp->etime, NULL);
714         return res;
715 }
716
717 static bool
718 syscall_fault_injected(struct tcb *tcp)
719 {
720         return tcp->flags & TCB_FAULT_INJ;
721 }
722
723 static int
724 trace_syscall_exiting(struct tcb *tcp)
725 {
726         int sys_res;
727         struct timeval tv;
728         int res;
729         unsigned long u_error;
730         const char *u_error_str;
731
732         /* Measure the exit time as early as possible to avoid errors. */
733         if (Tflag || cflag)
734                 gettimeofday(&tv, NULL);
735
736 #ifdef USE_LIBUNWIND
737         if (stack_trace_enabled) {
738                 if (tcp->s_ent->sys_flags & STACKTRACE_INVALIDATE_CACHE)
739                         unwind_cache_invalidate(tcp);
740         }
741 #endif
742
743 #if SUPPORTED_PERSONALITIES > 1
744         update_personality(tcp, tcp->currpers);
745 #endif
746         res = (get_regs_error ? -1 : get_syscall_result(tcp));
747         if (filtered(tcp) || hide_log(tcp))
748                 goto ret;
749
750         if (syserror(tcp) && syscall_fault_injected(tcp))
751                 update_syscall_fault_exiting(tcp);
752
753         if (cflag) {
754                 count_syscall(tcp, &tv);
755                 if (cflag == CFLAG_ONLY_STATS) {
756                         goto ret;
757                 }
758         }
759
760         /* If not in -ff mode, and printing_tcp != tcp,
761          * then the log currently does not end with output
762          * of _our syscall entry_, but with something else.
763          * We need to say which syscall's return is this.
764          *
765          * Forced reprinting via TCB_REPRINT is used only by
766          * "strace -ff -oLOG test/threaded_execve" corner case.
767          * It's the only case when -ff mode needs reprinting.
768          */
769         if ((followfork < 2 && printing_tcp != tcp) || (tcp->flags & TCB_REPRINT)) {
770                 tcp->flags &= ~TCB_REPRINT;
771                 printleader(tcp);
772                 tprintf("<... %s resumed> ", tcp->s_ent->sys_name);
773         }
774         printing_tcp = tcp;
775
776         tcp->s_prev_ent = NULL;
777         if (res != 1) {
778                 /* There was error in one of prior ptrace ops */
779                 tprints(") ");
780                 tabto();
781                 tprints("= ? <unavailable>\n");
782                 line_ended();
783                 tcp->flags &= ~(TCB_INSYSCALL | TCB_FAULT_INJ);
784                 tcp->sys_func_rval = 0;
785                 free_tcb_priv_data(tcp);
786                 return res;
787         }
788         tcp->s_prev_ent = tcp->s_ent;
789
790         sys_res = 0;
791         if (tcp->qual_flg & QUAL_RAW) {
792                 /* sys_res = printargs(tcp); - but it's nop on sysexit */
793         } else {
794         /* FIXME: not_failing_only (IOW, option -z) is broken:
795          * failure of syscall is known only after syscall return.
796          * Thus we end up with something like this on, say, ENOENT:
797          *     open("doesnt_exist", O_RDONLY <unfinished ...>
798          *     {next syscall decode}
799          * whereas the intended result is that open(...) line
800          * is not shown at all.
801          */
802                 if (not_failing_only && tcp->u_error)
803                         goto ret;       /* ignore failed syscalls */
804                 if (tcp->sys_func_rval & RVAL_DECODED)
805                         sys_res = tcp->sys_func_rval;
806                 else
807                         sys_res = tcp->s_ent->sys_func(tcp);
808         }
809
810         tprints(") ");
811         tabto();
812         u_error = tcp->u_error;
813
814         if (tcp->qual_flg & QUAL_RAW) {
815                 if (u_error) {
816                         tprintf("= -1 (errno %lu)", u_error);
817                         if (syscall_fault_injected(tcp))
818                                 tprints(" (INJECTED)");
819                 } else {
820                         tprintf("= %#" PRI_klx, tcp->u_rval);
821                 }
822         }
823         else if (!(sys_res & RVAL_NONE) && u_error) {
824                 switch (u_error) {
825                 /* Blocked signals do not interrupt any syscalls.
826                  * In this case syscalls don't return ERESTARTfoo codes.
827                  *
828                  * Deadly signals set to SIG_DFL interrupt syscalls
829                  * and kill the process regardless of which of the codes below
830                  * is returned by the interrupted syscall.
831                  * In some cases, kernel forces a kernel-generated deadly
832                  * signal to be unblocked and set to SIG_DFL (and thus cause
833                  * death) if it is blocked or SIG_IGNed: for example, SIGSEGV
834                  * or SIGILL. (The alternative is to leave process spinning
835                  * forever on the faulty instruction - not useful).
836                  *
837                  * SIG_IGNed signals and non-deadly signals set to SIG_DFL
838                  * (for example, SIGCHLD, SIGWINCH) interrupt syscalls,
839                  * but kernel will always restart them.
840                  */
841                 case ERESTARTSYS:
842                         /* Most common type of signal-interrupted syscall exit code.
843                          * The system call will be restarted with the same arguments
844                          * if SA_RESTART is set; otherwise, it will fail with EINTR.
845                          */
846                         tprints("= ? ERESTARTSYS (To be restarted if SA_RESTART is set)");
847                         break;
848                 case ERESTARTNOINTR:
849                         /* Rare. For example, fork() returns this if interrupted.
850                          * SA_RESTART is ignored (assumed set): the restart is unconditional.
851                          */
852                         tprints("= ? ERESTARTNOINTR (To be restarted)");
853                         break;
854                 case ERESTARTNOHAND:
855                         /* pause(), rt_sigsuspend() etc use this code.
856                          * SA_RESTART is ignored (assumed not set):
857                          * syscall won't restart (will return EINTR instead)
858                          * even after signal with SA_RESTART set. However,
859                          * after SIG_IGN or SIG_DFL signal it will restart
860                          * (thus the name "restart only if has no handler").
861                          */
862                         tprints("= ? ERESTARTNOHAND (To be restarted if no handler)");
863                         break;
864                 case ERESTART_RESTARTBLOCK:
865                         /* Syscalls like nanosleep(), poll() which can't be
866                          * restarted with their original arguments use this
867                          * code. Kernel will execute restart_syscall() instead,
868                          * which changes arguments before restarting syscall.
869                          * SA_RESTART is ignored (assumed not set) similarly
870                          * to ERESTARTNOHAND. (Kernel can't honor SA_RESTART
871                          * since restart data is saved in "restart block"
872                          * in task struct, and if signal handler uses a syscall
873                          * which in turn saves another such restart block,
874                          * old data is lost and restart becomes impossible)
875                          */
876                         tprints("= ? ERESTART_RESTARTBLOCK (Interrupted by signal)");
877                         break;
878                 default:
879                         u_error_str = err_name(u_error);
880                         if (u_error_str)
881                                 tprintf("= -1 %s (%s)",
882                                         u_error_str, strerror(u_error));
883                         else
884                                 tprintf("= -1 %lu (%s)",
885                                         u_error, strerror(u_error));
886                         break;
887                 }
888                 if (syscall_fault_injected(tcp))
889                         tprintf(" (INJECTED)");
890                 if ((sys_res & RVAL_STR) && tcp->auxstr)
891                         tprintf(" (%s)", tcp->auxstr);
892         }
893         else {
894                 if (sys_res & RVAL_NONE)
895                         tprints("= ?");
896                 else {
897                         switch (sys_res & RVAL_MASK) {
898                         case RVAL_HEX:
899 #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG
900                                 if (current_wordsize < sizeof(tcp->u_rval)) {
901                                         tprintf("= %#x",
902                                                 (unsigned int) tcp->u_rval);
903                                 } else
904 #endif
905                                 {
906                                         tprintf("= %#" PRI_klx, tcp->u_rval);
907                                 }
908                                 break;
909                         case RVAL_OCTAL:
910                                 tprints("= ");
911                                 print_numeric_long_umask(tcp->u_rval);
912                                 break;
913                         case RVAL_UDECIMAL:
914 #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG
915                                 if (current_wordsize < sizeof(tcp->u_rval)) {
916                                         tprintf("= %u",
917                                                 (unsigned int) tcp->u_rval);
918                                 } else
919 #endif
920                                 {
921                                         tprintf("= %" PRI_klu, tcp->u_rval);
922                                 }
923                                 break;
924                         case RVAL_DECIMAL:
925                                 tprintf("= %" PRI_kld, tcp->u_rval);
926                                 break;
927                         case RVAL_FD:
928                                 if (show_fd_path) {
929                                         tprints("= ");
930                                         printfd(tcp, tcp->u_rval);
931                                 }
932                                 else
933                                         tprintf("= %" PRI_kld, tcp->u_rval);
934                                 break;
935                         default:
936                                 error_msg("invalid rval format");
937                                 break;
938                         }
939                 }
940                 if ((sys_res & RVAL_STR) && tcp->auxstr)
941                         tprintf(" (%s)", tcp->auxstr);
942         }
943         if (Tflag) {
944                 tv_sub(&tv, &tv, &tcp->etime);
945                 tprintf(" <%ld.%06ld>",
946                         (long) tv.tv_sec, (long) tv.tv_usec);
947         }
948         tprints("\n");
949         dumpio(tcp);
950         line_ended();
951
952 #ifdef USE_LIBUNWIND
953         if (stack_trace_enabled)
954                 unwind_print_stacktrace(tcp);
955 #endif
956
957  ret:
958         tcp->flags &= ~(TCB_INSYSCALL | TCB_FAULT_INJ);
959         tcp->sys_func_rval = 0;
960         free_tcb_priv_data(tcp);
961         return 0;
962 }
963
964 int
965 trace_syscall(struct tcb *tcp)
966 {
967         return exiting(tcp) ?
968                 trace_syscall_exiting(tcp) : trace_syscall_entering(tcp);
969 }
970
971 bool
972 is_erestart(struct tcb *tcp)
973 {
974         switch (tcp->u_error) {
975                 case ERESTARTSYS:
976                 case ERESTARTNOINTR:
977                 case ERESTARTNOHAND:
978                 case ERESTART_RESTARTBLOCK:
979                         return true;
980                 default:
981                         return false;
982         }
983 }
984
985 static unsigned long saved_u_error;
986
987 void
988 temporarily_clear_syserror(struct tcb *tcp)
989 {
990         saved_u_error = tcp->u_error;
991         tcp->u_error = 0;
992 }
993
994 void
995 restore_cleared_syserror(struct tcb *tcp)
996 {
997         tcp->u_error = saved_u_error;
998 }
999
1000 /*
1001  * Check the syscall return value register value for whether it is
1002  * a negated errno code indicating an error, or a success return value.
1003  */
1004 static inline bool
1005 is_negated_errno(kernel_ulong_t val)
1006 {
1007         /* Linux kernel defines MAX_ERRNO to 4095. */
1008         kernel_ulong_t max = -(kernel_long_t) 4095;
1009
1010 #ifndef current_klongsize
1011         if (current_klongsize < sizeof(val)) {
1012                 val = (uint32_t) val;
1013                 max = (uint32_t) max;
1014         }
1015 #endif /* !current_klongsize */
1016
1017         return val >= max;
1018 }
1019
1020 #include "arch_regs.c"
1021
1022 #ifdef HAVE_GETRVAL2
1023 # include "arch_getrval2.c"
1024 #endif
1025
1026 void
1027 print_pc(struct tcb *tcp)
1028 {
1029 #if defined ARCH_PC_REG
1030 # define ARCH_GET_PC 0
1031 #elif defined ARCH_PC_PEEK_ADDR
1032         kernel_ulong_t pc;
1033 # define ARCH_PC_REG pc
1034 # define ARCH_GET_PC upeek(tcp->pid, ARCH_PC_PEEK_ADDR, &pc)
1035 #else
1036 # error Neither ARCH_PC_REG nor ARCH_PC_PEEK_ADDR is defined
1037 #endif
1038         if (get_regs_error || ARCH_GET_PC)
1039                 tprints(current_wordsize == 4 ? "[????????] "
1040                                               : "[????????????????] ");
1041         else
1042                 tprintf(current_wordsize == 4
1043                         ? "[%08" PRI_klx "] " : "[%016" PRI_klx "] ",
1044                         (kernel_ulong_t) ARCH_PC_REG);
1045 }
1046
1047 #include "getregs_old.h"
1048
1049 #undef ptrace_getregset_or_getregs
1050 #undef ptrace_setregset_or_setregs
1051 #ifdef ARCH_REGS_FOR_GETREGSET
1052
1053 # define ptrace_getregset_or_getregs ptrace_getregset
1054 static long
1055 ptrace_getregset(pid_t pid)
1056 {
1057 # ifdef ARCH_IOVEC_FOR_GETREGSET
1058         /* variable iovec */
1059         ARCH_IOVEC_FOR_GETREGSET.iov_len = sizeof(ARCH_REGS_FOR_GETREGSET);
1060         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS,
1061                       &ARCH_IOVEC_FOR_GETREGSET);
1062 # else
1063         /* constant iovec */
1064         static struct iovec io = {
1065                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1066                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1067         };
1068         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &io);
1069
1070 # endif
1071 }
1072
1073 # ifndef HAVE_GETREGS_OLD
1074 #  define ptrace_setregset_or_setregs ptrace_setregset
1075 static int
1076 ptrace_setregset(pid_t pid)
1077 {
1078 #  ifdef ARCH_IOVEC_FOR_GETREGSET
1079         /* variable iovec */
1080         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS,
1081                       &ARCH_IOVEC_FOR_GETREGSET);
1082 #  else
1083         /* constant iovec */
1084         static struct iovec io = {
1085                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1086                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1087         };
1088         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &io);
1089 #  endif
1090 }
1091 # endif /* !HAVE_GETREGS_OLD */
1092
1093 #elif defined ARCH_REGS_FOR_GETREGS
1094
1095 # define ptrace_getregset_or_getregs ptrace_getregs
1096 static long
1097 ptrace_getregs(pid_t pid)
1098 {
1099 # if defined SPARC || defined SPARC64
1100         /* SPARC systems have the meaning of data and addr reversed */
1101         return ptrace(PTRACE_GETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1102 # else
1103         return ptrace(PTRACE_GETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1104 # endif
1105 }
1106
1107 # ifndef HAVE_GETREGS_OLD
1108 #  define ptrace_setregset_or_setregs ptrace_setregs
1109 static int
1110 ptrace_setregs(pid_t pid)
1111 {
1112 #  if defined SPARC || defined SPARC64
1113         /* SPARC systems have the meaning of data and addr reversed */
1114         return ptrace(PTRACE_SETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1115 #  else
1116         return ptrace(PTRACE_SETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1117 #  endif
1118 }
1119 # endif /* !HAVE_GETREGS_OLD */
1120
1121 #endif /* ARCH_REGS_FOR_GETREGSET || ARCH_REGS_FOR_GETREGS */
1122
1123 void
1124 get_regs(pid_t pid)
1125 {
1126 #undef USE_GET_SYSCALL_RESULT_REGS
1127 #ifdef ptrace_getregset_or_getregs
1128
1129 # ifdef HAVE_GETREGS_OLD
1130         /*
1131          * Try PTRACE_GETREGSET/PTRACE_GETREGS first,
1132          * fallback to getregs_old.
1133          */
1134         static int use_getregs_old;
1135         if (use_getregs_old < 0) {
1136                 get_regs_error = ptrace_getregset_or_getregs(pid);
1137                 return;
1138         } else if (use_getregs_old == 0) {
1139                 get_regs_error = ptrace_getregset_or_getregs(pid);
1140                 if (get_regs_error >= 0) {
1141                         use_getregs_old = -1;
1142                         return;
1143                 }
1144                 if (errno == EPERM || errno == ESRCH)
1145                         return;
1146                 use_getregs_old = 1;
1147         }
1148         get_regs_error = getregs_old(pid);
1149 # else /* !HAVE_GETREGS_OLD */
1150         /* Assume that PTRACE_GETREGSET/PTRACE_GETREGS works. */
1151         get_regs_error = ptrace_getregset_or_getregs(pid);
1152 # endif /* !HAVE_GETREGS_OLD */
1153
1154 #else /* !ptrace_getregset_or_getregs */
1155
1156 # define USE_GET_SYSCALL_RESULT_REGS 1
1157 # warning get_regs is not implemented for this architecture yet
1158         get_regs_error = 0;
1159
1160 #endif /* !ptrace_getregset_or_getregs */
1161 }
1162
1163 #ifdef ptrace_setregset_or_setregs
1164 static int
1165 set_regs(pid_t pid)
1166 {
1167         return ptrace_setregset_or_setregs(pid);
1168 }
1169 #endif /* ptrace_setregset_or_setregs */
1170
1171 struct sysent_buf {
1172         struct tcb *tcp;
1173         struct_sysent ent;
1174         char buf[sizeof("syscall_%lu") + sizeof(kernel_ulong_t) * 3];
1175 };
1176
1177 static void
1178 free_sysent_buf(void *ptr)
1179 {
1180         struct sysent_buf *s = ptr;
1181         s->tcp->s_prev_ent = s->tcp->s_ent = NULL;
1182         free(ptr);
1183 }
1184
1185 /*
1186  * Returns:
1187  * 0: "ignore this ptrace stop", bail out of trace_syscall_entering() silently.
1188  * 1: ok, continue in trace_syscall_entering().
1189  * other: error, trace_syscall_entering() should print error indicator
1190  *    ("????" etc) and bail out.
1191  */
1192 int
1193 get_scno(struct tcb *tcp)
1194 {
1195         if (get_regs_error)
1196                 return -1;
1197
1198         int rc = arch_get_scno(tcp);
1199         if (rc != 1)
1200                 return rc;
1201
1202         if (scno_is_valid(tcp->scno)) {
1203                 tcp->s_ent = &sysent[tcp->scno];
1204                 tcp->qual_flg = qual_flags(tcp->scno);
1205         } else {
1206                 struct sysent_buf *s = xcalloc(1, sizeof(*s));
1207
1208                 s->tcp = tcp;
1209                 s->ent.nargs = MAX_ARGS;
1210                 s->ent.sen = SEN_printargs;
1211                 s->ent.sys_func = printargs;
1212                 s->ent.sys_name = s->buf;
1213                 sprintf(s->buf, "syscall_%" PRI_klu, shuffle_scno(tcp->scno));
1214
1215                 tcp->s_ent = &s->ent;
1216                 tcp->qual_flg = QUAL_RAW | DEFAULT_QUAL_FLAGS;
1217
1218                 set_tcb_priv_data(tcp, s, free_sysent_buf);
1219
1220                 if (debug_flag)
1221                         error_msg("pid %d invalid syscall %" PRI_kld,
1222                                   tcp->pid, tcp->scno);
1223         }
1224         return 1;
1225 }
1226
1227 #ifdef USE_GET_SYSCALL_RESULT_REGS
1228 static int get_syscall_result_regs(struct tcb *);
1229 #endif
1230
1231 /* Returns:
1232  * 1: ok, continue in trace_syscall_exiting().
1233  * -1: error, trace_syscall_exiting() should print error indicator
1234  *    ("????" etc) and bail out.
1235  */
1236 static int
1237 get_syscall_result(struct tcb *tcp)
1238 {
1239 #ifdef USE_GET_SYSCALL_RESULT_REGS
1240         if (get_syscall_result_regs(tcp))
1241                 return -1;
1242 #endif
1243         tcp->u_error = 0;
1244         get_error(tcp, !(tcp->s_ent->sys_flags & SYSCALL_NEVER_FAILS));
1245
1246         return 1;
1247 }
1248
1249 #include "get_scno.c"
1250 #include "set_scno.c"
1251 #include "get_syscall_args.c"
1252 #ifdef USE_GET_SYSCALL_RESULT_REGS
1253 # include "get_syscall_result.c"
1254 #endif
1255 #include "get_error.c"
1256 #include "set_error.c"
1257 #ifdef HAVE_GETREGS_OLD
1258 # include "getregs_old.c"
1259 #endif
1260
1261 const char *
1262 syscall_name(kernel_ulong_t scno)
1263 {
1264 #if defined X32_PERSONALITY_NUMBER && defined __X32_SYSCALL_BIT
1265         if (current_personality == X32_PERSONALITY_NUMBER)
1266                 scno &= ~__X32_SYSCALL_BIT;
1267 #endif
1268         return scno_is_valid(scno) ? sysent[scno].sys_name: NULL;
1269 }