]> granicus.if.org Git - strace/blob - syscall.c
Check for current_klongsize instead of current_personality where appropriate
[strace] / syscall.c
1 /*
2  * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3  * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4  * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5  * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6  * Copyright (c) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
7  *                     Linux for s390 port by D.J. Barrow
8  *                    <barrow_dj@mail.yahoo.com,djbarrow@de.ibm.com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The name of the author may not be used to endorse or promote products
20  *    derived from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include "defs.h"
35 #include "native_defs.h"
36 #include <sys/param.h>
37 #include <signal.h>
38
39 /* for struct iovec */
40 #include <sys/uio.h>
41
42 #include "regs.h"
43 #include "ptrace.h"
44
45 #if defined(SPARC64)
46 # undef PTRACE_GETREGS
47 # define PTRACE_GETREGS PTRACE_GETREGS64
48 # undef PTRACE_SETREGS
49 # define PTRACE_SETREGS PTRACE_SETREGS64
50 #endif
51
52 #if defined SPARC64
53 # include <asm/psrcompat.h>
54 #elif defined SPARC
55 # include <asm/psr.h>
56 #endif
57
58 #ifdef IA64
59 # include <asm/rse.h>
60 #endif
61
62 #ifndef NT_PRSTATUS
63 # define NT_PRSTATUS 1
64 #endif
65
66 #ifndef NSIG
67 # warning: NSIG is not defined, using 32
68 # define NSIG 32
69 #endif
70
71 #include "syscall.h"
72
73 /* Define these shorthand notations to simplify the syscallent files. */
74 #define TD TRACE_DESC
75 #define TF TRACE_FILE
76 #define TI TRACE_IPC
77 #define TN TRACE_NETWORK
78 #define TP TRACE_PROCESS
79 #define TS TRACE_SIGNAL
80 #define TM TRACE_MEMORY
81 #define NF SYSCALL_NEVER_FAILS
82 #define MA MAX_ARGS
83 #define SI STACKTRACE_INVALIDATE_CACHE
84 #define SE STACKTRACE_CAPTURE_ON_ENTER
85
86 #define SEN(syscall_name) SEN_ ## syscall_name, SYS_FUNC_NAME(sys_ ## syscall_name)
87
88 const struct_sysent sysent0[] = {
89 #include "syscallent.h"
90 };
91
92 #if SUPPORTED_PERSONALITIES > 1
93 # include PERSONALITY1_INCLUDE_FUNCS
94 static const struct_sysent sysent1[] = {
95 # include "syscallent1.h"
96 };
97 #endif
98
99 #if SUPPORTED_PERSONALITIES > 2
100 # include PERSONALITY2_INCLUDE_FUNCS
101 static const struct_sysent sysent2[] = {
102 # include "syscallent2.h"
103 };
104 #endif
105
106 /* Now undef them since short defines cause wicked namespace pollution. */
107 #undef SEN
108 #undef TD
109 #undef TF
110 #undef TI
111 #undef TN
112 #undef TP
113 #undef TS
114 #undef TM
115 #undef NF
116 #undef MA
117 #undef SI
118 #undef SE
119
120 /*
121  * `ioctlent[012].h' files are automatically generated by the auxiliary
122  * program `ioctlsort', such that the list is sorted by the `code' field.
123  * This has the side-effect of resolving the _IO.. macros into
124  * plain integers, eliminating the need to include here everything
125  * in "/usr/include".
126  */
127
128 const char *const errnoent0[] = {
129 #include "errnoent.h"
130 };
131 const char *const signalent0[] = {
132 #include "signalent.h"
133 };
134 const struct_ioctlent ioctlent0[] = {
135 #include "ioctlent0.h"
136 };
137
138 #if SUPPORTED_PERSONALITIES > 1
139 static const char *const errnoent1[] = {
140 # include "errnoent1.h"
141 };
142 static const char *const signalent1[] = {
143 # include "signalent1.h"
144 };
145 static const struct_ioctlent ioctlent1[] = {
146 # include "ioctlent1.h"
147 };
148 # include PERSONALITY0_INCLUDE_PRINTERS_DECLS
149 static const struct_printers printers0 = {
150 # include PERSONALITY0_INCLUDE_PRINTERS_DEFS
151 };
152 # include PERSONALITY1_INCLUDE_PRINTERS_DECLS
153 static const struct_printers printers1 = {
154 # include PERSONALITY1_INCLUDE_PRINTERS_DEFS
155 };
156 #endif
157
158 #if SUPPORTED_PERSONALITIES > 2
159 static const char *const errnoent2[] = {
160 # include "errnoent2.h"
161 };
162 static const char *const signalent2[] = {
163 # include "signalent2.h"
164 };
165 static const struct_ioctlent ioctlent2[] = {
166 # include "ioctlent2.h"
167 };
168 # include PERSONALITY2_INCLUDE_PRINTERS_DECLS
169 static const struct_printers printers2 = {
170 # include PERSONALITY2_INCLUDE_PRINTERS_DEFS
171 };
172 #endif
173
174 enum {
175         nsyscalls0 = ARRAY_SIZE(sysent0)
176 #if SUPPORTED_PERSONALITIES > 1
177         , nsyscalls1 = ARRAY_SIZE(sysent1)
178 # if SUPPORTED_PERSONALITIES > 2
179         , nsyscalls2 = ARRAY_SIZE(sysent2)
180 # endif
181 #endif
182 };
183
184 enum {
185         nerrnos0 = ARRAY_SIZE(errnoent0)
186 #if SUPPORTED_PERSONALITIES > 1
187         , nerrnos1 = ARRAY_SIZE(errnoent1)
188 # if SUPPORTED_PERSONALITIES > 2
189         , nerrnos2 = ARRAY_SIZE(errnoent2)
190 # endif
191 #endif
192 };
193
194 enum {
195         nsignals0 = ARRAY_SIZE(signalent0)
196 #if SUPPORTED_PERSONALITIES > 1
197         , nsignals1 = ARRAY_SIZE(signalent1)
198 # if SUPPORTED_PERSONALITIES > 2
199         , nsignals2 = ARRAY_SIZE(signalent2)
200 # endif
201 #endif
202 };
203
204 enum {
205         nioctlents0 = ARRAY_SIZE(ioctlent0)
206 #if SUPPORTED_PERSONALITIES > 1
207         , nioctlents1 = ARRAY_SIZE(ioctlent1)
208 # if SUPPORTED_PERSONALITIES > 2
209         , nioctlents2 = ARRAY_SIZE(ioctlent2)
210 # endif
211 #endif
212 };
213
214 #if SUPPORTED_PERSONALITIES > 1
215 const struct_sysent *sysent = sysent0;
216 const char *const *errnoent = errnoent0;
217 const char *const *signalent = signalent0;
218 const struct_ioctlent *ioctlent = ioctlent0;
219 const struct_printers *printers = &printers0;
220 #endif
221
222 unsigned nsyscalls = nsyscalls0;
223 unsigned nerrnos = nerrnos0;
224 unsigned nsignals = nsignals0;
225 unsigned nioctlents = nioctlents0;
226
227 const unsigned int nsyscall_vec[SUPPORTED_PERSONALITIES] = {
228         nsyscalls0,
229 #if SUPPORTED_PERSONALITIES > 1
230         nsyscalls1,
231 #endif
232 #if SUPPORTED_PERSONALITIES > 2
233         nsyscalls2,
234 #endif
235 };
236 const struct_sysent *const sysent_vec[SUPPORTED_PERSONALITIES] = {
237         sysent0,
238 #if SUPPORTED_PERSONALITIES > 1
239         sysent1,
240 #endif
241 #if SUPPORTED_PERSONALITIES > 2
242         sysent2,
243 #endif
244 };
245
246 #if SUPPORTED_PERSONALITIES > 1
247 unsigned current_personality;
248
249 # ifndef current_wordsize
250 unsigned current_wordsize;
251 static const int personality_wordsize[SUPPORTED_PERSONALITIES] = {
252         PERSONALITY0_WORDSIZE,
253         PERSONALITY1_WORDSIZE,
254 # if SUPPORTED_PERSONALITIES > 2
255         PERSONALITY2_WORDSIZE,
256 # endif
257 };
258 # endif
259
260 # ifndef current_klongsize
261 unsigned current_klongsize;
262 static const int personality_klongsize[SUPPORTED_PERSONALITIES] = {
263         PERSONALITY0_KLONGSIZE,
264         PERSONALITY1_KLONGSIZE,
265 #  if SUPPORTED_PERSONALITIES > 2
266         PERSONALITY2_KLONGSIZE,
267 #  endif
268 };
269 # endif
270
271 void
272 set_personality(int personality)
273 {
274         nsyscalls = nsyscall_vec[personality];
275         sysent = sysent_vec[personality];
276
277         switch (personality) {
278         case 0:
279                 errnoent = errnoent0;
280                 nerrnos = nerrnos0;
281                 ioctlent = ioctlent0;
282                 nioctlents = nioctlents0;
283                 signalent = signalent0;
284                 nsignals = nsignals0;
285                 printers = &printers0;
286                 break;
287
288         case 1:
289                 errnoent = errnoent1;
290                 nerrnos = nerrnos1;
291                 ioctlent = ioctlent1;
292                 nioctlents = nioctlents1;
293                 signalent = signalent1;
294                 nsignals = nsignals1;
295                 printers = &printers1;
296                 break;
297
298 # if SUPPORTED_PERSONALITIES > 2
299         case 2:
300                 errnoent = errnoent2;
301                 nerrnos = nerrnos2;
302                 ioctlent = ioctlent2;
303                 nioctlents = nioctlents2;
304                 signalent = signalent2;
305                 nsignals = nsignals2;
306                 printers = &printers2;
307                 break;
308 # endif
309         }
310
311         current_personality = personality;
312 # ifndef current_wordsize
313         current_wordsize = personality_wordsize[personality];
314 # endif
315 # ifndef current_klongsize
316         current_klongsize = personality_klongsize[personality];
317 # endif
318 }
319
320 static void
321 update_personality(struct tcb *tcp, unsigned int personality)
322 {
323         if (personality == current_personality)
324                 return;
325         set_personality(personality);
326
327         if (personality == tcp->currpers)
328                 return;
329         tcp->currpers = personality;
330
331 # undef PERSONALITY_NAMES
332 # if defined POWERPC64
333 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
334 # elif defined X86_64
335 #  define PERSONALITY_NAMES {"64 bit", "32 bit", "x32"}
336 # elif defined X32
337 #  define PERSONALITY_NAMES {"x32", "32 bit"}
338 # elif defined AARCH64
339 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
340 # elif defined TILE
341 #  define PERSONALITY_NAMES {"64-bit", "32-bit"}
342 # endif
343 # ifdef PERSONALITY_NAMES
344         if (!qflag) {
345                 static const char *const names[] = PERSONALITY_NAMES;
346                 error_msg("[ Process PID=%d runs in %s mode. ]",
347                           tcp->pid, names[personality]);
348         }
349 # endif
350 }
351 #endif
352
353 #ifdef SYS_socket_subcall
354 static void
355 decode_socket_subcall(struct tcb *tcp)
356 {
357         const int call = tcp->u_arg[0];
358
359         if (call < 1 || call >= SYS_socket_nsubcalls)
360                 return;
361
362         const kernel_scno_t scno = SYS_socket_subcall + call;
363         const unsigned int nargs = sysent[scno].nargs;
364         uint64_t buf[nargs];
365
366         if (umoven(tcp, tcp->u_arg[1], nargs * current_wordsize, buf) < 0)
367                 return;
368
369         tcp->scno = scno;
370         tcp->qual_flg = qual_flags(scno);
371         tcp->s_ent = &sysent[scno];
372
373         unsigned int i;
374         for (i = 0; i < nargs; ++i)
375                 tcp->u_arg[i] = (sizeof(uint32_t) == current_wordsize)
376                                 ? ((uint32_t *) (void *) buf)[i] : buf[i];
377 }
378 #endif
379
380 #ifdef SYS_ipc_subcall
381 static void
382 decode_ipc_subcall(struct tcb *tcp)
383 {
384         unsigned int call = tcp->u_arg[0];
385         const unsigned int version = call >> 16;
386
387         if (version) {
388 # if defined S390 || defined S390X
389                 return;
390 # else
391 #  ifdef SPARC64
392                 if (current_wordsize == 8)
393                         return;
394 #  endif
395                 set_tcb_priv_ulong(tcp, version);
396                 call &= 0xffff;
397 # endif
398         }
399
400         switch (call) {
401                 case  1: case  2: case  3: case  4:
402                 case 11: case 12: case 13: case 14:
403                 case 21: case 22: case 23: case 24:
404                         break;
405                 default:
406                         return;
407         }
408
409         tcp->scno = SYS_ipc_subcall + call;
410         tcp->qual_flg = qual_flags(tcp->scno);
411         tcp->s_ent = &sysent[tcp->scno];
412
413         const unsigned int n = tcp->s_ent->nargs;
414         unsigned int i;
415         for (i = 0; i < n; i++)
416                 tcp->u_arg[i] = tcp->u_arg[i + 1];
417 }
418 #endif
419
420 #ifdef LINUX_MIPSO32
421 static void
422 decode_mips_subcall(struct tcb *tcp)
423 {
424         if (!scno_is_valid(tcp->u_arg[0]))
425                 return;
426         tcp->scno = tcp->u_arg[0];
427         tcp->qual_flg = qual_flags(tcp->scno);
428         tcp->s_ent = &sysent[tcp->scno];
429         memmove(&tcp->u_arg[0], &tcp->u_arg[1],
430                 sizeof(tcp->u_arg) - sizeof(tcp->u_arg[0]));
431         /*
432          * Fetching the last arg of 7-arg syscalls (fadvise64_64
433          * and sync_file_range) requires additional code,
434          * see linux/mips/get_syscall_args.c
435          */
436         if (tcp->s_ent->nargs == MAX_ARGS) {
437                 if (umoven(tcp,
438                            mips_REG_SP + MAX_ARGS * sizeof(tcp->u_arg[0]),
439                            sizeof(tcp->u_arg[0]),
440                            &tcp->u_arg[MAX_ARGS - 1]) < 0)
441                 tcp->u_arg[MAX_ARGS - 1] = 0;
442         }
443 }
444 #endif /* LINUX_MIPSO32 */
445
446 static void
447 dumpio(struct tcb *tcp)
448 {
449         if (syserror(tcp))
450                 return;
451
452         int fd = tcp->u_arg[0];
453         if (fd < 0)
454                 return;
455
456         if (is_number_in_set(fd, &read_set)) {
457                 switch (tcp->s_ent->sen) {
458                 case SEN_read:
459                 case SEN_pread:
460                 case SEN_recv:
461                 case SEN_recvfrom:
462                 case SEN_mq_timedreceive:
463                         dumpstr(tcp, tcp->u_arg[1], tcp->u_rval);
464                         return;
465                 case SEN_readv:
466                 case SEN_preadv:
467                 case SEN_preadv2:
468                         dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1],
469                                      tcp->u_rval);
470                         return;
471                 case SEN_recvmsg:
472                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], tcp->u_rval);
473                         return;
474                 case SEN_recvmmsg:
475                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
476                         return;
477                 }
478         }
479         if (is_number_in_set(fd, &write_set)) {
480                 switch (tcp->s_ent->sen) {
481                 case SEN_write:
482                 case SEN_pwrite:
483                 case SEN_send:
484                 case SEN_sendto:
485                 case SEN_mq_timedsend:
486                         dumpstr(tcp, tcp->u_arg[1], tcp->u_arg[2]);
487                         break;
488                 case SEN_writev:
489                 case SEN_pwritev:
490                 case SEN_pwritev2:
491                 case SEN_vmsplice:
492                         dumpiov(tcp, tcp->u_arg[2], tcp->u_arg[1]);
493                         break;
494                 case SEN_sendmsg:
495                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], -1UL);
496                         break;
497                 case SEN_sendmmsg:
498                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
499                         break;
500                 }
501         }
502 }
503
504 /*
505  * Shuffle syscall numbers so that we don't have huge gaps in syscall table.
506  * The shuffling should be an involution: shuffle_scno(shuffle_scno(n)) == n.
507  */
508 static kernel_scno_t
509 shuffle_scno(kernel_scno_t scno)
510 {
511 #if defined(ARM) || defined(AARCH64) /* So far only 32-bit ARM needs this */
512         if (scno < ARM_FIRST_SHUFFLED_SYSCALL)
513                 return scno;
514
515         /* __ARM_NR_cmpxchg? Swap with LAST_ORDINARY+1 */
516         if (scno == ARM_FIRST_SHUFFLED_SYSCALL)
517                 return 0x000ffff0;
518         if (scno == 0x000ffff0)
519                 return ARM_FIRST_SHUFFLED_SYSCALL;
520
521 # define ARM_SECOND_SHUFFLED_SYSCALL (ARM_FIRST_SHUFFLED_SYSCALL + 1)
522         /*
523          * Is it ARM specific syscall?
524          * Swap [0x000f0000, 0x000f0000 + LAST_SPECIAL] range
525          * with [SECOND_SHUFFLED, SECOND_SHUFFLED + LAST_SPECIAL] range.
526          */
527         if (scno >= 0x000f0000 &&
528             scno <= 0x000f0000 + ARM_LAST_SPECIAL_SYSCALL) {
529                 return scno - 0x000f0000 + ARM_SECOND_SHUFFLED_SYSCALL;
530         }
531         if (scno <= ARM_SECOND_SHUFFLED_SYSCALL + ARM_LAST_SPECIAL_SYSCALL) {
532                 return scno + 0x000f0000 - ARM_SECOND_SHUFFLED_SYSCALL;
533         }
534 #endif /* ARM || AARCH64 */
535
536         return scno;
537 }
538
539 const char *
540 err_name(unsigned long err)
541 {
542         if ((err < nerrnos) && errnoent[err])
543                 return errnoent[err];
544
545         return NULL;
546 }
547
548 static long get_regs_error;
549
550 void
551 clear_regs(void)
552 {
553         get_regs_error = -1;
554 }
555
556 static int get_syscall_args(struct tcb *);
557 static int get_syscall_result(struct tcb *);
558 static int arch_get_scno(struct tcb *tcp);
559 static int arch_set_scno(struct tcb *, kernel_scno_t);
560 static void get_error(struct tcb *, const bool);
561 static int arch_set_error(struct tcb *);
562
563 struct fault_opts *fault_vec[SUPPORTED_PERSONALITIES];
564
565 static struct fault_opts *
566 tcb_fault_opts(struct tcb *tcp)
567 {
568         return (scno_in_range(tcp->scno) && tcp->fault_vec[current_personality])
569                ? &tcp->fault_vec[current_personality][tcp->scno] : NULL;
570 }
571
572
573 static long
574 inject_syscall_fault_entering(struct tcb *tcp)
575 {
576         if (!tcp->fault_vec[current_personality]) {
577                 tcp->fault_vec[current_personality] =
578                         xcalloc(nsyscalls, sizeof(**fault_vec));
579                 memcpy(tcp->fault_vec[current_personality],
580                        fault_vec[current_personality],
581                        nsyscalls * sizeof(**fault_vec));
582         }
583
584         struct fault_opts *opts = tcb_fault_opts(tcp);
585
586         if (!opts || opts->first == 0)
587                 return 0;
588
589         --opts->first;
590
591         if (opts->first != 0)
592                 return 0;
593
594         opts->first = opts->step;
595
596         if (!arch_set_scno(tcp, -1))
597                 tcp->flags |= TCB_FAULT_INJ;
598
599         return 0;
600 }
601
602 static long
603 update_syscall_fault_exiting(struct tcb *tcp)
604 {
605         struct fault_opts *opts = tcb_fault_opts(tcp);
606
607         if (opts && opts->err && tcp->u_error != opts->err) {
608                 unsigned long u_error = tcp->u_error;
609                 tcp->u_error = opts->err;
610                 if (arch_set_error(tcp))
611                         tcp->u_error = u_error;
612         }
613
614         return 0;
615 }
616
617 static int
618 trace_syscall_entering(struct tcb *tcp)
619 {
620         int res, scno_good;
621
622         scno_good = res = get_scno(tcp);
623         if (res == 0)
624                 return res;
625         if (res == 1)
626                 res = get_syscall_args(tcp);
627
628         if (res != 1) {
629                 printleader(tcp);
630                 tprintf("%s(", scno_good == 1 ? tcp->s_ent->sys_name : "????");
631                 /*
632                  * " <unavailable>" will be added later by the code which
633                  * detects ptrace errors.
634                  */
635                 goto ret;
636         }
637
638 #ifdef LINUX_MIPSO32
639         if (SEN_syscall == tcp->s_ent->sen)
640                 decode_mips_subcall(tcp);
641 #endif
642
643 #if defined(SYS_socket_subcall) || defined(SYS_ipc_subcall)
644         switch (tcp->s_ent->sen) {
645 # ifdef SYS_socket_subcall
646                 case SEN_socketcall:
647                         decode_socket_subcall(tcp);
648                         break;
649 # endif
650 # ifdef SYS_ipc_subcall
651                 case SEN_ipc:
652                         decode_ipc_subcall(tcp);
653                         break;
654 # endif
655         }
656 #endif
657
658         /* Restrain from fault injection while the trace executes strace code. */
659         if (hide_log(tcp)) {
660                 tcp->qual_flg &= ~QUAL_FAULT;
661         }
662
663         switch (tcp->s_ent->sen) {
664                 case SEN_execve:
665                 case SEN_execveat:
666 #if defined SPARC || defined SPARC64
667                 case SEN_execv:
668 #endif
669                         tcp->flags &= ~TCB_HIDE_LOG;
670                         break;
671         }
672
673         if (!(tcp->qual_flg & QUAL_TRACE)
674          || (tracing_paths && !pathtrace_match(tcp))
675         ) {
676                 tcp->flags |= TCB_INSYSCALL | TCB_FILTERED;
677                 tcp->sys_func_rval = 0;
678                 return 0;
679         }
680
681         tcp->flags &= ~TCB_FILTERED;
682
683         if (hide_log(tcp)) {
684                 res = 0;
685                 goto ret;
686         }
687
688         if (tcp->qual_flg & QUAL_FAULT)
689                 inject_syscall_fault_entering(tcp);
690
691         if (cflag == CFLAG_ONLY_STATS) {
692                 res = 0;
693                 goto ret;
694         }
695
696 #ifdef USE_LIBUNWIND
697         if (stack_trace_enabled) {
698                 if (tcp->s_ent->sys_flags & STACKTRACE_CAPTURE_ON_ENTER)
699                         unwind_capture_stacktrace(tcp);
700         }
701 #endif
702
703         printleader(tcp);
704         tprintf("%s(", tcp->s_ent->sys_name);
705         if (tcp->qual_flg & QUAL_RAW)
706                 res = printargs(tcp);
707         else
708                 res = tcp->s_ent->sys_func(tcp);
709
710         fflush(tcp->outf);
711  ret:
712         tcp->flags |= TCB_INSYSCALL;
713         tcp->sys_func_rval = res;
714         /* Measure the entrance time as late as possible to avoid errors. */
715         if (Tflag || cflag)
716                 gettimeofday(&tcp->etime, NULL);
717         return res;
718 }
719
720 static bool
721 syscall_fault_injected(struct tcb *tcp)
722 {
723         return tcp->flags & TCB_FAULT_INJ;
724 }
725
726 static int
727 trace_syscall_exiting(struct tcb *tcp)
728 {
729         int sys_res;
730         struct timeval tv;
731         int res;
732         unsigned long u_error;
733         const char *u_error_str;
734
735         /* Measure the exit time as early as possible to avoid errors. */
736         if (Tflag || cflag)
737                 gettimeofday(&tv, NULL);
738
739 #ifdef USE_LIBUNWIND
740         if (stack_trace_enabled) {
741                 if (tcp->s_ent->sys_flags & STACKTRACE_INVALIDATE_CACHE)
742                         unwind_cache_invalidate(tcp);
743         }
744 #endif
745
746 #if SUPPORTED_PERSONALITIES > 1
747         update_personality(tcp, tcp->currpers);
748 #endif
749         res = (get_regs_error ? -1 : get_syscall_result(tcp));
750         if (filtered(tcp) || hide_log(tcp))
751                 goto ret;
752
753         if (syserror(tcp) && syscall_fault_injected(tcp))
754                 update_syscall_fault_exiting(tcp);
755
756         if (cflag) {
757                 count_syscall(tcp, &tv);
758                 if (cflag == CFLAG_ONLY_STATS) {
759                         goto ret;
760                 }
761         }
762
763         /* If not in -ff mode, and printing_tcp != tcp,
764          * then the log currently does not end with output
765          * of _our syscall entry_, but with something else.
766          * We need to say which syscall's return is this.
767          *
768          * Forced reprinting via TCB_REPRINT is used only by
769          * "strace -ff -oLOG test/threaded_execve" corner case.
770          * It's the only case when -ff mode needs reprinting.
771          */
772         if ((followfork < 2 && printing_tcp != tcp) || (tcp->flags & TCB_REPRINT)) {
773                 tcp->flags &= ~TCB_REPRINT;
774                 printleader(tcp);
775                 tprintf("<... %s resumed> ", tcp->s_ent->sys_name);
776         }
777         printing_tcp = tcp;
778
779         tcp->s_prev_ent = NULL;
780         if (res != 1) {
781                 /* There was error in one of prior ptrace ops */
782                 tprints(") ");
783                 tabto();
784                 tprints("= ? <unavailable>\n");
785                 line_ended();
786                 tcp->flags &= ~(TCB_INSYSCALL | TCB_FAULT_INJ);
787                 tcp->sys_func_rval = 0;
788                 free_tcb_priv_data(tcp);
789                 return res;
790         }
791         tcp->s_prev_ent = tcp->s_ent;
792
793         sys_res = 0;
794         if (tcp->qual_flg & QUAL_RAW) {
795                 /* sys_res = printargs(tcp); - but it's nop on sysexit */
796         } else {
797         /* FIXME: not_failing_only (IOW, option -z) is broken:
798          * failure of syscall is known only after syscall return.
799          * Thus we end up with something like this on, say, ENOENT:
800          *     open("doesnt_exist", O_RDONLY <unfinished ...>
801          *     {next syscall decode}
802          * whereas the intended result is that open(...) line
803          * is not shown at all.
804          */
805                 if (not_failing_only && tcp->u_error)
806                         goto ret;       /* ignore failed syscalls */
807                 if (tcp->sys_func_rval & RVAL_DECODED)
808                         sys_res = tcp->sys_func_rval;
809                 else
810                         sys_res = tcp->s_ent->sys_func(tcp);
811         }
812
813         tprints(") ");
814         tabto();
815         u_error = tcp->u_error;
816
817         if (tcp->qual_flg & QUAL_RAW) {
818                 if (u_error) {
819                         tprintf("= -1 (errno %lu)", u_error);
820                         if (syscall_fault_injected(tcp))
821                                 tprints(" (INJECTED)");
822                 } else {
823                         tprintf("= %#lx", tcp->u_rval);
824                 }
825         }
826         else if (!(sys_res & RVAL_NONE) && u_error) {
827                 switch (u_error) {
828                 /* Blocked signals do not interrupt any syscalls.
829                  * In this case syscalls don't return ERESTARTfoo codes.
830                  *
831                  * Deadly signals set to SIG_DFL interrupt syscalls
832                  * and kill the process regardless of which of the codes below
833                  * is returned by the interrupted syscall.
834                  * In some cases, kernel forces a kernel-generated deadly
835                  * signal to be unblocked and set to SIG_DFL (and thus cause
836                  * death) if it is blocked or SIG_IGNed: for example, SIGSEGV
837                  * or SIGILL. (The alternative is to leave process spinning
838                  * forever on the faulty instruction - not useful).
839                  *
840                  * SIG_IGNed signals and non-deadly signals set to SIG_DFL
841                  * (for example, SIGCHLD, SIGWINCH) interrupt syscalls,
842                  * but kernel will always restart them.
843                  */
844                 case ERESTARTSYS:
845                         /* Most common type of signal-interrupted syscall exit code.
846                          * The system call will be restarted with the same arguments
847                          * if SA_RESTART is set; otherwise, it will fail with EINTR.
848                          */
849                         tprints("= ? ERESTARTSYS (To be restarted if SA_RESTART is set)");
850                         break;
851                 case ERESTARTNOINTR:
852                         /* Rare. For example, fork() returns this if interrupted.
853                          * SA_RESTART is ignored (assumed set): the restart is unconditional.
854                          */
855                         tprints("= ? ERESTARTNOINTR (To be restarted)");
856                         break;
857                 case ERESTARTNOHAND:
858                         /* pause(), rt_sigsuspend() etc use this code.
859                          * SA_RESTART is ignored (assumed not set):
860                          * syscall won't restart (will return EINTR instead)
861                          * even after signal with SA_RESTART set. However,
862                          * after SIG_IGN or SIG_DFL signal it will restart
863                          * (thus the name "restart only if has no handler").
864                          */
865                         tprints("= ? ERESTARTNOHAND (To be restarted if no handler)");
866                         break;
867                 case ERESTART_RESTARTBLOCK:
868                         /* Syscalls like nanosleep(), poll() which can't be
869                          * restarted with their original arguments use this
870                          * code. Kernel will execute restart_syscall() instead,
871                          * which changes arguments before restarting syscall.
872                          * SA_RESTART is ignored (assumed not set) similarly
873                          * to ERESTARTNOHAND. (Kernel can't honor SA_RESTART
874                          * since restart data is saved in "restart block"
875                          * in task struct, and if signal handler uses a syscall
876                          * which in turn saves another such restart block,
877                          * old data is lost and restart becomes impossible)
878                          */
879                         tprints("= ? ERESTART_RESTARTBLOCK (Interrupted by signal)");
880                         break;
881                 default:
882                         u_error_str = err_name(u_error);
883                         if (u_error_str)
884                                 tprintf("= -1 %s (%s)",
885                                         u_error_str, strerror(u_error));
886                         else
887                                 tprintf("= -1 %lu (%s)",
888                                         u_error, strerror(u_error));
889                         break;
890                 }
891                 if (syscall_fault_injected(tcp))
892                         tprintf(" (INJECTED)");
893                 if ((sys_res & RVAL_STR) && tcp->auxstr)
894                         tprintf(" (%s)", tcp->auxstr);
895         }
896         else {
897                 if (sys_res & RVAL_NONE)
898                         tprints("= ?");
899                 else {
900                         switch (sys_res & RVAL_MASK) {
901                         case RVAL_HEX:
902 #if SUPPORTED_PERSONALITIES > 1
903                                 if (current_wordsize < sizeof(long))
904                                         tprintf("= %#x",
905                                                 (unsigned int) tcp->u_rval);
906                                 else
907 #endif
908                                         tprintf("= %#lx", tcp->u_rval);
909                                 break;
910                         case RVAL_OCTAL:
911                                 tprints("= ");
912                                 print_numeric_long_umask(tcp->u_rval);
913                                 break;
914                         case RVAL_UDECIMAL:
915 #if SUPPORTED_PERSONALITIES > 1
916                                 if (current_wordsize < sizeof(long))
917                                         tprintf("= %u",
918                                                 (unsigned int) tcp->u_rval);
919                                 else
920 #endif
921                                         tprintf("= %lu", tcp->u_rval);
922                                 break;
923                         case RVAL_DECIMAL:
924                                 tprintf("= %ld", tcp->u_rval);
925                                 break;
926                         case RVAL_FD:
927                                 if (show_fd_path) {
928                                         tprints("= ");
929                                         printfd(tcp, tcp->u_rval);
930                                 }
931                                 else
932                                         tprintf("= %ld", tcp->u_rval);
933                                 break;
934 #if HAVE_STRUCT_TCB_EXT_ARG
935                         /*
936                         case RVAL_LHEX:
937                                 tprintf("= %#llx", tcp->u_lrval);
938                                 break;
939                         case RVAL_LOCTAL:
940                                 tprintf("= %#llo", tcp->u_lrval);
941                                 break;
942                         */
943                         case RVAL_LUDECIMAL:
944                                 tprintf("= %llu", tcp->u_lrval);
945                                 break;
946                         /*
947                         case RVAL_LDECIMAL:
948                                 tprintf("= %lld", tcp->u_lrval);
949                                 break;
950                         */
951 #endif /* HAVE_STRUCT_TCB_EXT_ARG */
952                         default:
953                                 error_msg("invalid rval format");
954                                 break;
955                         }
956                 }
957                 if ((sys_res & RVAL_STR) && tcp->auxstr)
958                         tprintf(" (%s)", tcp->auxstr);
959         }
960         if (Tflag) {
961                 tv_sub(&tv, &tv, &tcp->etime);
962                 tprintf(" <%ld.%06ld>",
963                         (long) tv.tv_sec, (long) tv.tv_usec);
964         }
965         tprints("\n");
966         dumpio(tcp);
967         line_ended();
968
969 #ifdef USE_LIBUNWIND
970         if (stack_trace_enabled)
971                 unwind_print_stacktrace(tcp);
972 #endif
973
974  ret:
975         tcp->flags &= ~(TCB_INSYSCALL | TCB_FAULT_INJ);
976         tcp->sys_func_rval = 0;
977         free_tcb_priv_data(tcp);
978         return 0;
979 }
980
981 int
982 trace_syscall(struct tcb *tcp)
983 {
984         return exiting(tcp) ?
985                 trace_syscall_exiting(tcp) : trace_syscall_entering(tcp);
986 }
987
988 bool
989 is_erestart(struct tcb *tcp)
990 {
991         switch (tcp->u_error) {
992                 case ERESTARTSYS:
993                 case ERESTARTNOINTR:
994                 case ERESTARTNOHAND:
995                 case ERESTART_RESTARTBLOCK:
996                         return true;
997                 default:
998                         return false;
999         }
1000 }
1001
1002 static unsigned long saved_u_error;
1003
1004 void
1005 temporarily_clear_syserror(struct tcb *tcp)
1006 {
1007         saved_u_error = tcp->u_error;
1008         tcp->u_error = 0;
1009 }
1010
1011 void
1012 restore_cleared_syserror(struct tcb *tcp)
1013 {
1014         tcp->u_error = saved_u_error;
1015 }
1016
1017 /*
1018  * Check the syscall return value register value for whether it is
1019  * a negated errno code indicating an error, or a success return value.
1020  */
1021 static inline bool
1022 is_negated_errno(kernel_ulong_t val)
1023 {
1024         /* Linux kernel defines MAX_ERRNO to 4095. */
1025         kernel_ulong_t max = -(kernel_long_t) 4095;
1026
1027 #ifndef current_klongsize
1028         if (current_klongsize < sizeof(val)) {
1029                 val = (uint32_t) val;
1030                 max = (uint32_t) max;
1031         }
1032 #endif /* !current_klongsize */
1033
1034         return val >= max;
1035 }
1036
1037 #include "arch_regs.c"
1038
1039 #ifdef HAVE_GETRVAL2
1040 # include "arch_getrval2.c"
1041 #endif
1042
1043 void
1044 print_pc(struct tcb *tcp)
1045 {
1046 #if defined ARCH_PC_REG
1047 # define ARCH_GET_PC 0
1048 #elif defined ARCH_PC_PEEK_ADDR
1049         long pc;
1050 # define ARCH_PC_REG pc
1051 # define ARCH_GET_PC upeek(tcp->pid, ARCH_PC_PEEK_ADDR, &pc)
1052 #else
1053 # error Neither ARCH_PC_REG nor ARCH_PC_PEEK_ADDR is defined
1054 #endif
1055         if (get_regs_error || ARCH_GET_PC)
1056                 tprints(current_wordsize == 4 ? "[????????] "
1057                                               : "[????????????????] ");
1058         else
1059                 tprintf(current_wordsize == 4 ? "[%08lx] " : "[%016lx] ",
1060                         (unsigned long) ARCH_PC_REG);
1061 }
1062
1063 #include "getregs_old.h"
1064
1065 #undef ptrace_getregset_or_getregs
1066 #undef ptrace_setregset_or_setregs
1067 #ifdef ARCH_REGS_FOR_GETREGSET
1068
1069 # define ptrace_getregset_or_getregs ptrace_getregset
1070 static long
1071 ptrace_getregset(pid_t pid)
1072 {
1073 # ifdef ARCH_IOVEC_FOR_GETREGSET
1074         /* variable iovec */
1075         ARCH_IOVEC_FOR_GETREGSET.iov_len = sizeof(ARCH_REGS_FOR_GETREGSET);
1076         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS,
1077                       &ARCH_IOVEC_FOR_GETREGSET);
1078 # else
1079         /* constant iovec */
1080         static struct iovec io = {
1081                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1082                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1083         };
1084         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &io);
1085
1086 # endif
1087 }
1088
1089 # ifndef HAVE_GETREGS_OLD
1090 #  define ptrace_setregset_or_setregs ptrace_setregset
1091 static int
1092 ptrace_setregset(pid_t pid)
1093 {
1094 #  ifdef ARCH_IOVEC_FOR_GETREGSET
1095         /* variable iovec */
1096         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS,
1097                       &ARCH_IOVEC_FOR_GETREGSET);
1098 #  else
1099         /* constant iovec */
1100         static struct iovec io = {
1101                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1102                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1103         };
1104         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &io);
1105 #  endif
1106 }
1107 # endif /* !HAVE_GETREGS_OLD */
1108
1109 #elif defined ARCH_REGS_FOR_GETREGS
1110
1111 # define ptrace_getregset_or_getregs ptrace_getregs
1112 static long
1113 ptrace_getregs(pid_t pid)
1114 {
1115 # if defined SPARC || defined SPARC64
1116         /* SPARC systems have the meaning of data and addr reversed */
1117         return ptrace(PTRACE_GETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1118 # else
1119         return ptrace(PTRACE_GETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1120 # endif
1121 }
1122
1123 # ifndef HAVE_GETREGS_OLD
1124 #  define ptrace_setregset_or_setregs ptrace_setregs
1125 static int
1126 ptrace_setregs(pid_t pid)
1127 {
1128 #  if defined SPARC || defined SPARC64
1129         /* SPARC systems have the meaning of data and addr reversed */
1130         return ptrace(PTRACE_SETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1131 #  else
1132         return ptrace(PTRACE_SETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1133 #  endif
1134 }
1135 # endif /* !HAVE_GETREGS_OLD */
1136
1137 #endif /* ARCH_REGS_FOR_GETREGSET || ARCH_REGS_FOR_GETREGS */
1138
1139 void
1140 get_regs(pid_t pid)
1141 {
1142 #undef USE_GET_SYSCALL_RESULT_REGS
1143 #ifdef ptrace_getregset_or_getregs
1144
1145 # ifdef HAVE_GETREGS_OLD
1146         /*
1147          * Try PTRACE_GETREGSET/PTRACE_GETREGS first,
1148          * fallback to getregs_old.
1149          */
1150         static int use_getregs_old;
1151         if (use_getregs_old < 0) {
1152                 get_regs_error = ptrace_getregset_or_getregs(pid);
1153                 return;
1154         } else if (use_getregs_old == 0) {
1155                 get_regs_error = ptrace_getregset_or_getregs(pid);
1156                 if (get_regs_error >= 0) {
1157                         use_getregs_old = -1;
1158                         return;
1159                 }
1160                 if (errno == EPERM || errno == ESRCH)
1161                         return;
1162                 use_getregs_old = 1;
1163         }
1164         get_regs_error = getregs_old(pid);
1165 # else /* !HAVE_GETREGS_OLD */
1166         /* Assume that PTRACE_GETREGSET/PTRACE_GETREGS works. */
1167         get_regs_error = ptrace_getregset_or_getregs(pid);
1168 # endif /* !HAVE_GETREGS_OLD */
1169
1170 #else /* !ptrace_getregset_or_getregs */
1171
1172 # define USE_GET_SYSCALL_RESULT_REGS 1
1173 # warning get_regs is not implemented for this architecture yet
1174         get_regs_error = 0;
1175
1176 #endif /* !ptrace_getregset_or_getregs */
1177 }
1178
1179 #ifdef ptrace_setregset_or_setregs
1180 static int
1181 set_regs(pid_t pid)
1182 {
1183         return ptrace_setregset_or_setregs(pid);
1184 }
1185 #endif /* ptrace_setregset_or_setregs */
1186
1187 struct sysent_buf {
1188         struct tcb *tcp;
1189         struct_sysent ent;
1190         char buf[sizeof("syscall_%lu") + sizeof(long) * 3];
1191 };
1192
1193 static void
1194 free_sysent_buf(void *ptr)
1195 {
1196         struct sysent_buf *s = ptr;
1197         s->tcp->s_prev_ent = s->tcp->s_ent = NULL;
1198         free(ptr);
1199 }
1200
1201 /*
1202  * Returns:
1203  * 0: "ignore this ptrace stop", bail out of trace_syscall_entering() silently.
1204  * 1: ok, continue in trace_syscall_entering().
1205  * other: error, trace_syscall_entering() should print error indicator
1206  *    ("????" etc) and bail out.
1207  */
1208 int
1209 get_scno(struct tcb *tcp)
1210 {
1211         if (get_regs_error)
1212                 return -1;
1213
1214         int rc = arch_get_scno(tcp);
1215         if (rc != 1)
1216                 return rc;
1217
1218         if (scno_is_valid(tcp->scno)) {
1219                 tcp->s_ent = &sysent[tcp->scno];
1220                 tcp->qual_flg = qual_flags(tcp->scno);
1221         } else {
1222                 struct sysent_buf *s = xcalloc(1, sizeof(*s));
1223
1224                 s->tcp = tcp;
1225                 s->ent.nargs = MAX_ARGS;
1226                 s->ent.sen = SEN_printargs;
1227                 s->ent.sys_func = printargs;
1228                 s->ent.sys_name = s->buf;
1229                 sprintf(s->buf, "syscall_%lu", shuffle_scno(tcp->scno));
1230
1231                 tcp->s_ent = &s->ent;
1232                 tcp->qual_flg = QUAL_RAW | DEFAULT_QUAL_FLAGS;
1233
1234                 set_tcb_priv_data(tcp, s, free_sysent_buf);
1235
1236                 if (debug_flag)
1237                         error_msg("pid %d invalid syscall %ld", tcp->pid, tcp->scno);
1238         }
1239         return 1;
1240 }
1241
1242 #ifdef USE_GET_SYSCALL_RESULT_REGS
1243 static int get_syscall_result_regs(struct tcb *);
1244 #endif
1245
1246 /* Returns:
1247  * 1: ok, continue in trace_syscall_exiting().
1248  * -1: error, trace_syscall_exiting() should print error indicator
1249  *    ("????" etc) and bail out.
1250  */
1251 static int
1252 get_syscall_result(struct tcb *tcp)
1253 {
1254 #ifdef USE_GET_SYSCALL_RESULT_REGS
1255         if (get_syscall_result_regs(tcp))
1256                 return -1;
1257 #endif
1258         tcp->u_error = 0;
1259         get_error(tcp, !(tcp->s_ent->sys_flags & SYSCALL_NEVER_FAILS));
1260
1261         return 1;
1262 }
1263
1264 #include "get_scno.c"
1265 #include "set_scno.c"
1266 #include "get_syscall_args.c"
1267 #ifdef USE_GET_SYSCALL_RESULT_REGS
1268 # include "get_syscall_result.c"
1269 #endif
1270 #include "get_error.c"
1271 #include "set_error.c"
1272 #ifdef HAVE_GETREGS_OLD
1273 # include "getregs_old.c"
1274 #endif
1275
1276 const char *
1277 syscall_name(kernel_scno_t scno)
1278 {
1279 #if defined X32_PERSONALITY_NUMBER && defined __X32_SYSCALL_BIT
1280         if (current_personality == X32_PERSONALITY_NUMBER)
1281                 scno &= ~__X32_SYSCALL_BIT;
1282 #endif
1283         return scno_is_valid(scno) ? sysent[scno].sys_name: NULL;
1284 }