]> granicus.if.org Git - strace/blob - syscall.c
Add macros for testing QUAL_* flags
[strace] / syscall.c
1 /*
2  * Copyright (c) 1991, 1992 Paul Kranenburg <pk@cs.few.eur.nl>
3  * Copyright (c) 1993 Branko Lankester <branko@hacktic.nl>
4  * Copyright (c) 1993, 1994, 1995, 1996 Rick Sladkey <jrs@world.std.com>
5  * Copyright (c) 1996-1999 Wichert Akkerman <wichert@cistron.nl>
6  * Copyright (c) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
7  *                     Linux for s390 port by D.J. Barrow
8  *                    <barrow_dj@mail.yahoo.com,djbarrow@de.ibm.com>
9  * Copyright (c) 1999-2017 The strace developers.
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34
35 #include "defs.h"
36 #include "native_defs.h"
37 #include "nsig.h"
38 #include "number_set.h"
39 #include <sys/param.h>
40
41 /* for struct iovec */
42 #include <sys/uio.h>
43
44 /* for __X32_SYSCALL_BIT */
45 #include <asm/unistd.h>
46
47 #include "regs.h"
48 #include "ptrace.h"
49
50 #if defined(SPARC64)
51 # undef PTRACE_GETREGS
52 # define PTRACE_GETREGS PTRACE_GETREGS64
53 # undef PTRACE_SETREGS
54 # define PTRACE_SETREGS PTRACE_SETREGS64
55 #endif
56
57 #if defined SPARC64
58 # include <asm/psrcompat.h>
59 #elif defined SPARC
60 # include <asm/psr.h>
61 #endif
62
63 #ifdef IA64
64 # include <asm/rse.h>
65 #endif
66
67 #ifndef NT_PRSTATUS
68 # define NT_PRSTATUS 1
69 #endif
70
71 #include "syscall.h"
72
73 /* Define these shorthand notations to simplify the syscallent files. */
74 #include "sysent_shorthand_defs.h"
75
76 #define SEN(syscall_name) SEN_ ## syscall_name, SYS_FUNC_NAME(sys_ ## syscall_name)
77
78 const struct_sysent sysent0[] = {
79 #include "syscallent.h"
80 };
81
82 #if SUPPORTED_PERSONALITIES > 1
83 # include PERSONALITY1_INCLUDE_FUNCS
84 static const struct_sysent sysent1[] = {
85 # include "syscallent1.h"
86 };
87 #endif
88
89 #if SUPPORTED_PERSONALITIES > 2
90 # include PERSONALITY2_INCLUDE_FUNCS
91 static const struct_sysent sysent2[] = {
92 # include "syscallent2.h"
93 };
94 #endif
95
96 /* Now undef them since short defines cause wicked namespace pollution. */
97 #include "sysent_shorthand_undefs.h"
98
99 /*
100  * `ioctlent[012].h' files are automatically generated by the auxiliary
101  * program `ioctlsort', such that the list is sorted by the `code' field.
102  * This has the side-effect of resolving the _IO.. macros into
103  * plain integers, eliminating the need to include here everything
104  * in "/usr/include".
105  */
106
107 const char *const errnoent0[] = {
108 #include "errnoent.h"
109 };
110 const char *const signalent0[] = {
111 #include "signalent.h"
112 };
113 const struct_ioctlent ioctlent0[] = {
114 #include "ioctlent0.h"
115 };
116
117 #if SUPPORTED_PERSONALITIES > 1
118 static const char *const errnoent1[] = {
119 # include "errnoent1.h"
120 };
121 static const char *const signalent1[] = {
122 # include "signalent1.h"
123 };
124 static const struct_ioctlent ioctlent1[] = {
125 # include "ioctlent1.h"
126 };
127 # include PERSONALITY0_INCLUDE_PRINTERS_DECLS
128 static const struct_printers printers0 = {
129 # include PERSONALITY0_INCLUDE_PRINTERS_DEFS
130 };
131 # include PERSONALITY1_INCLUDE_PRINTERS_DECLS
132 static const struct_printers printers1 = {
133 # include PERSONALITY1_INCLUDE_PRINTERS_DEFS
134 };
135 #endif
136
137 #if SUPPORTED_PERSONALITIES > 2
138 static const char *const errnoent2[] = {
139 # include "errnoent2.h"
140 };
141 static const char *const signalent2[] = {
142 # include "signalent2.h"
143 };
144 static const struct_ioctlent ioctlent2[] = {
145 # include "ioctlent2.h"
146 };
147 # include PERSONALITY2_INCLUDE_PRINTERS_DECLS
148 static const struct_printers printers2 = {
149 # include PERSONALITY2_INCLUDE_PRINTERS_DEFS
150 };
151 #endif
152
153 enum {
154         nsyscalls0 = ARRAY_SIZE(sysent0)
155 #if SUPPORTED_PERSONALITIES > 1
156         , nsyscalls1 = ARRAY_SIZE(sysent1)
157 # if SUPPORTED_PERSONALITIES > 2
158         , nsyscalls2 = ARRAY_SIZE(sysent2)
159 # endif
160 #endif
161 };
162
163 enum {
164         nerrnos0 = ARRAY_SIZE(errnoent0)
165 #if SUPPORTED_PERSONALITIES > 1
166         , nerrnos1 = ARRAY_SIZE(errnoent1)
167 # if SUPPORTED_PERSONALITIES > 2
168         , nerrnos2 = ARRAY_SIZE(errnoent2)
169 # endif
170 #endif
171 };
172
173 enum {
174         nsignals0 = ARRAY_SIZE(signalent0)
175 #if SUPPORTED_PERSONALITIES > 1
176         , nsignals1 = ARRAY_SIZE(signalent1)
177 # if SUPPORTED_PERSONALITIES > 2
178         , nsignals2 = ARRAY_SIZE(signalent2)
179 # endif
180 #endif
181 };
182
183 enum {
184         nioctlents0 = ARRAY_SIZE(ioctlent0)
185 #if SUPPORTED_PERSONALITIES > 1
186         , nioctlents1 = ARRAY_SIZE(ioctlent1)
187 # if SUPPORTED_PERSONALITIES > 2
188         , nioctlents2 = ARRAY_SIZE(ioctlent2)
189 # endif
190 #endif
191 };
192
193 #if SUPPORTED_PERSONALITIES > 1
194 const struct_sysent *sysent = sysent0;
195 const char *const *errnoent = errnoent0;
196 const char *const *signalent = signalent0;
197 const struct_ioctlent *ioctlent = ioctlent0;
198 const struct_printers *printers = &printers0;
199 #endif
200
201 unsigned nsyscalls = nsyscalls0;
202 unsigned nerrnos = nerrnos0;
203 unsigned nsignals = nsignals0;
204 unsigned nioctlents = nioctlents0;
205
206 const unsigned int nsyscall_vec[SUPPORTED_PERSONALITIES] = {
207         nsyscalls0,
208 #if SUPPORTED_PERSONALITIES > 1
209         nsyscalls1,
210 #endif
211 #if SUPPORTED_PERSONALITIES > 2
212         nsyscalls2,
213 #endif
214 };
215 const struct_sysent *const sysent_vec[SUPPORTED_PERSONALITIES] = {
216         sysent0,
217 #if SUPPORTED_PERSONALITIES > 1
218         sysent1,
219 #endif
220 #if SUPPORTED_PERSONALITIES > 2
221         sysent2,
222 #endif
223 };
224
225 #if SUPPORTED_PERSONALITIES > 1
226 unsigned current_personality;
227
228 # ifndef current_wordsize
229 unsigned current_wordsize;
230 static const int personality_wordsize[SUPPORTED_PERSONALITIES] = {
231         PERSONALITY0_WORDSIZE,
232         PERSONALITY1_WORDSIZE,
233 # if SUPPORTED_PERSONALITIES > 2
234         PERSONALITY2_WORDSIZE,
235 # endif
236 };
237 # endif
238
239 # ifndef current_klongsize
240 unsigned current_klongsize;
241 static const int personality_klongsize[SUPPORTED_PERSONALITIES] = {
242         PERSONALITY0_KLONGSIZE,
243         PERSONALITY1_KLONGSIZE,
244 #  if SUPPORTED_PERSONALITIES > 2
245         PERSONALITY2_KLONGSIZE,
246 #  endif
247 };
248 # endif
249
250 void
251 set_personality(int personality)
252 {
253         nsyscalls = nsyscall_vec[personality];
254         sysent = sysent_vec[personality];
255
256         switch (personality) {
257         case 0:
258                 errnoent = errnoent0;
259                 nerrnos = nerrnos0;
260                 ioctlent = ioctlent0;
261                 nioctlents = nioctlents0;
262                 signalent = signalent0;
263                 nsignals = nsignals0;
264                 printers = &printers0;
265                 break;
266
267         case 1:
268                 errnoent = errnoent1;
269                 nerrnos = nerrnos1;
270                 ioctlent = ioctlent1;
271                 nioctlents = nioctlents1;
272                 signalent = signalent1;
273                 nsignals = nsignals1;
274                 printers = &printers1;
275                 break;
276
277 # if SUPPORTED_PERSONALITIES > 2
278         case 2:
279                 errnoent = errnoent2;
280                 nerrnos = nerrnos2;
281                 ioctlent = ioctlent2;
282                 nioctlents = nioctlents2;
283                 signalent = signalent2;
284                 nsignals = nsignals2;
285                 printers = &printers2;
286                 break;
287 # endif
288         }
289
290         current_personality = personality;
291 # ifndef current_wordsize
292         current_wordsize = personality_wordsize[personality];
293 # endif
294 # ifndef current_klongsize
295         current_klongsize = personality_klongsize[personality];
296 # endif
297 }
298
299 static void
300 update_personality(struct tcb *tcp, unsigned int personality)
301 {
302         if (personality == current_personality)
303                 return;
304         set_personality(personality);
305
306         if (personality == tcp->currpers)
307                 return;
308         tcp->currpers = personality;
309
310 # undef PERSONALITY_NAMES
311 # if defined POWERPC64
312 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
313 # elif defined X86_64
314 #  define PERSONALITY_NAMES {"64 bit", "32 bit", "x32"}
315 # elif defined X32
316 #  define PERSONALITY_NAMES {"x32", "32 bit"}
317 # elif defined AARCH64
318 #  define PERSONALITY_NAMES {"64 bit", "32 bit"}
319 # elif defined TILE
320 #  define PERSONALITY_NAMES {"64-bit", "32-bit"}
321 # endif
322 # ifdef PERSONALITY_NAMES
323         if (!qflag) {
324                 static const char *const names[] = PERSONALITY_NAMES;
325                 error_msg("[ Process PID=%d runs in %s mode. ]",
326                           tcp->pid, names[personality]);
327         }
328 # endif
329 }
330 #endif
331
332 #ifdef SYS_socket_subcall
333 static void
334 decode_socket_subcall(struct tcb *tcp)
335 {
336         const int call = tcp->u_arg[0];
337
338         if (call < 1 || call >= SYS_socket_nsubcalls)
339                 return;
340
341         const kernel_ulong_t scno = SYS_socket_subcall + call;
342         const unsigned int nargs = sysent[scno].nargs;
343         uint64_t buf[nargs];
344
345         if (umoven(tcp, tcp->u_arg[1], nargs * current_wordsize, buf) < 0)
346                 return;
347
348         tcp->scno = scno;
349         tcp->qual_flg = qual_flags(scno);
350         tcp->s_ent = &sysent[scno];
351
352         unsigned int i;
353         for (i = 0; i < nargs; ++i)
354                 tcp->u_arg[i] = (sizeof(uint32_t) == current_wordsize)
355                                 ? ((uint32_t *) (void *) buf)[i] : buf[i];
356 }
357 #endif
358
359 #ifdef SYS_ipc_subcall
360 static void
361 decode_ipc_subcall(struct tcb *tcp)
362 {
363         unsigned int call = tcp->u_arg[0];
364         const unsigned int version = call >> 16;
365
366         if (version) {
367 # if defined S390 || defined S390X
368                 return;
369 # else
370 #  ifdef SPARC64
371                 if (current_wordsize == 8)
372                         return;
373 #  endif
374                 set_tcb_priv_ulong(tcp, version);
375                 call &= 0xffff;
376 # endif
377         }
378
379         switch (call) {
380                 case  1: case  2: case  3: case  4:
381                 case 11: case 12: case 13: case 14:
382                 case 21: case 22: case 23: case 24:
383                         break;
384                 default:
385                         return;
386         }
387
388         tcp->scno = SYS_ipc_subcall + call;
389         tcp->qual_flg = qual_flags(tcp->scno);
390         tcp->s_ent = &sysent[tcp->scno];
391
392         const unsigned int n = tcp->s_ent->nargs;
393         unsigned int i;
394         for (i = 0; i < n; i++)
395                 tcp->u_arg[i] = tcp->u_arg[i + 1];
396 }
397 #endif
398
399 #ifdef LINUX_MIPSO32
400 static void
401 decode_mips_subcall(struct tcb *tcp)
402 {
403         if (!scno_is_valid(tcp->u_arg[0]))
404                 return;
405         tcp->scno = tcp->u_arg[0];
406         tcp->qual_flg = qual_flags(tcp->scno);
407         tcp->s_ent = &sysent[tcp->scno];
408         memmove(&tcp->u_arg[0], &tcp->u_arg[1],
409                 sizeof(tcp->u_arg) - sizeof(tcp->u_arg[0]));
410         /*
411          * Fetching the last arg of 7-arg syscalls (fadvise64_64
412          * and sync_file_range) requires additional code,
413          * see linux/mips/get_syscall_args.c
414          */
415         if (tcp->s_ent->nargs == MAX_ARGS) {
416                 if (umoven(tcp,
417                            mips_REG_SP + MAX_ARGS * sizeof(tcp->u_arg[0]),
418                            sizeof(tcp->u_arg[0]),
419                            &tcp->u_arg[MAX_ARGS - 1]) < 0)
420                 tcp->u_arg[MAX_ARGS - 1] = 0;
421         }
422 }
423 #endif /* LINUX_MIPSO32 */
424
425 static void
426 dumpio(struct tcb *tcp)
427 {
428         if (syserror(tcp))
429                 return;
430
431         int fd = tcp->u_arg[0];
432         if (fd < 0)
433                 return;
434
435         if (is_number_in_set(fd, read_set)) {
436                 switch (tcp->s_ent->sen) {
437                 case SEN_read:
438                 case SEN_pread:
439                 case SEN_recv:
440                 case SEN_recvfrom:
441                 case SEN_mq_timedreceive:
442                         dumpstr(tcp, tcp->u_arg[1], tcp->u_rval);
443                         return;
444                 case SEN_readv:
445                 case SEN_preadv:
446                 case SEN_preadv2:
447                         dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1],
448                                      tcp->u_rval);
449                         return;
450                 case SEN_recvmsg:
451                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], tcp->u_rval);
452                         return;
453                 case SEN_recvmmsg:
454                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
455                         return;
456                 }
457         }
458         if (is_number_in_set(fd, write_set)) {
459                 switch (tcp->s_ent->sen) {
460                 case SEN_write:
461                 case SEN_pwrite:
462                 case SEN_send:
463                 case SEN_sendto:
464                 case SEN_mq_timedsend:
465                         dumpstr(tcp, tcp->u_arg[1], tcp->u_arg[2]);
466                         break;
467                 case SEN_writev:
468                 case SEN_pwritev:
469                 case SEN_pwritev2:
470                 case SEN_vmsplice:
471                         dumpiov_upto(tcp, tcp->u_arg[2], tcp->u_arg[1], -1);
472                         break;
473                 case SEN_sendmsg:
474                         dumpiov_in_msghdr(tcp, tcp->u_arg[1], -1);
475                         break;
476                 case SEN_sendmmsg:
477                         dumpiov_in_mmsghdr(tcp, tcp->u_arg[1]);
478                         break;
479                 }
480         }
481 }
482
483 /*
484  * Shuffle syscall numbers so that we don't have huge gaps in syscall table.
485  * The shuffling should be an involution: shuffle_scno(shuffle_scno(n)) == n.
486  */
487 static kernel_ulong_t
488 shuffle_scno(kernel_ulong_t scno)
489 {
490 #if defined(ARM) || defined(AARCH64) /* So far only 32-bit ARM needs this */
491         if (scno < ARM_FIRST_SHUFFLED_SYSCALL)
492                 return scno;
493
494         /* __ARM_NR_cmpxchg? Swap with LAST_ORDINARY+1 */
495         if (scno == ARM_FIRST_SHUFFLED_SYSCALL)
496                 return 0x000ffff0;
497         if (scno == 0x000ffff0)
498                 return ARM_FIRST_SHUFFLED_SYSCALL;
499
500 # define ARM_SECOND_SHUFFLED_SYSCALL (ARM_FIRST_SHUFFLED_SYSCALL + 1)
501         /*
502          * Is it ARM specific syscall?
503          * Swap [0x000f0000, 0x000f0000 + LAST_SPECIAL] range
504          * with [SECOND_SHUFFLED, SECOND_SHUFFLED + LAST_SPECIAL] range.
505          */
506         if (scno >= 0x000f0000 &&
507             scno <= 0x000f0000 + ARM_LAST_SPECIAL_SYSCALL) {
508                 return scno - 0x000f0000 + ARM_SECOND_SHUFFLED_SYSCALL;
509         }
510         if (scno <= ARM_SECOND_SHUFFLED_SYSCALL + ARM_LAST_SPECIAL_SYSCALL) {
511                 return scno + 0x000f0000 - ARM_SECOND_SHUFFLED_SYSCALL;
512         }
513 #endif /* ARM || AARCH64 */
514
515         return scno;
516 }
517
518 const char *
519 err_name(unsigned long err)
520 {
521         if ((err < nerrnos) && errnoent[err])
522                 return errnoent[err];
523
524         return NULL;
525 }
526
527 static long get_regs_error;
528
529 void
530 clear_regs(void)
531 {
532         get_regs_error = -1;
533 }
534
535 static void get_regs(pid_t pid);
536 static int get_syscall_args(struct tcb *);
537 static int get_syscall_result(struct tcb *);
538 static int arch_get_scno(struct tcb *tcp);
539 static int arch_set_scno(struct tcb *, kernel_ulong_t);
540 static void get_error(struct tcb *, const bool);
541 static int arch_set_error(struct tcb *);
542 static int arch_set_success(struct tcb *);
543
544 struct inject_opts *inject_vec[SUPPORTED_PERSONALITIES];
545
546 static struct inject_opts *
547 tcb_inject_opts(struct tcb *tcp)
548 {
549         return (scno_in_range(tcp->scno) && tcp->inject_vec[current_personality])
550                ? &tcp->inject_vec[current_personality][tcp->scno] : NULL;
551 }
552
553
554 static long
555 tamper_with_syscall_entering(struct tcb *tcp, unsigned int *signo)
556 {
557         if (!tcp->inject_vec[current_personality]) {
558                 tcp->inject_vec[current_personality] =
559                         xcalloc(nsyscalls, sizeof(**inject_vec));
560                 memcpy(tcp->inject_vec[current_personality],
561                        inject_vec[current_personality],
562                        nsyscalls * sizeof(**inject_vec));
563         }
564
565         struct inject_opts *opts = tcb_inject_opts(tcp);
566
567         if (!opts || opts->first == 0)
568                 return 0;
569
570         --opts->first;
571
572         if (opts->first != 0)
573                 return 0;
574
575         opts->first = opts->step;
576
577         if (opts->signo > 0)
578                 *signo = opts->signo;
579         if (opts->rval != INJECT_OPTS_RVAL_DEFAULT && !arch_set_scno(tcp, -1))
580                 tcp->flags |= TCB_TAMPERED;
581
582         return 0;
583 }
584
585 static long
586 tamper_with_syscall_exiting(struct tcb *tcp)
587 {
588         struct inject_opts *opts = tcb_inject_opts(tcp);
589
590         if (!opts)
591                 return 0;
592
593         if (opts->rval >= 0) {
594                 kernel_long_t u_rval = tcp->u_rval;
595
596                 tcp->u_rval = opts->rval;
597                 if (arch_set_success(tcp)) {
598                         tcp->u_rval = u_rval;
599                 } else {
600                         tcp->u_error = 0;
601                 }
602         } else {
603                 unsigned long new_error = -opts->rval;
604
605                 if (new_error != tcp->u_error && new_error <= MAX_ERRNO_VALUE) {
606                         unsigned long u_error = tcp->u_error;
607
608                         tcp->u_error = new_error;
609                         if (arch_set_error(tcp)) {
610                                 tcp->u_error = u_error;
611                         }
612                 }
613         }
614
615         return 0;
616 }
617
618 /*
619  * Returns:
620  * 0: "ignore this ptrace stop", bail out silently.
621  * 1: ok, decoded; call
622  *    syscall_entering_finish(tcp, syscall_entering_trace(tcp, ...)).
623  * other: error; call syscall_entering_finish(tcp, res), where res is the value
624  *    returned.
625  */
626 int
627 syscall_entering_decode(struct tcb *tcp)
628 {
629         int res = get_scno(tcp);
630         if (res == 0)
631                 return res;
632         int scno_good = res;
633         if (res != 1 || (res = get_syscall_args(tcp)) != 1) {
634                 printleader(tcp);
635                 tprintf("%s(", scno_good == 1 ? tcp->s_ent->sys_name : "????");
636                 /*
637                  * " <unavailable>" will be added later by the code which
638                  * detects ptrace errors.
639                  */
640                 return res;
641         }
642
643 #ifdef LINUX_MIPSO32
644         if (SEN_syscall == tcp->s_ent->sen)
645                 decode_mips_subcall(tcp);
646 #endif
647
648 #if defined(SYS_socket_subcall) || defined(SYS_ipc_subcall)
649         switch (tcp->s_ent->sen) {
650 # ifdef SYS_socket_subcall
651                 case SEN_socketcall:
652                         decode_socket_subcall(tcp);
653                         break;
654 # endif
655 # ifdef SYS_ipc_subcall
656                 case SEN_ipc:
657                         decode_ipc_subcall(tcp);
658                         break;
659 # endif
660         }
661 #endif
662
663         return 1;
664 }
665
666 int
667 syscall_entering_trace(struct tcb *tcp, unsigned int *sig)
668 {
669         /* Restrain from fault injection while the trace executes strace code. */
670         if (hide_log(tcp)) {
671                 tcp->qual_flg &= ~QUAL_INJECT;
672         }
673
674         switch (tcp->s_ent->sen) {
675                 case SEN_execve:
676                 case SEN_execveat:
677 #if defined SPARC || defined SPARC64
678                 case SEN_execv:
679 #endif
680                         tcp->flags &= ~TCB_HIDE_LOG;
681                         break;
682         }
683
684         if (!traced(tcp) || (tracing_paths && !pathtrace_match(tcp))) {
685                 tcp->flags |= TCB_FILTERED;
686                 return 0;
687         }
688
689         tcp->flags &= ~TCB_FILTERED;
690
691         if (hide_log(tcp)) {
692                 return 0;
693         }
694
695         if (inject(tcp))
696                 tamper_with_syscall_entering(tcp, sig);
697
698         if (cflag == CFLAG_ONLY_STATS) {
699                 return 0;
700         }
701
702 #ifdef USE_LIBUNWIND
703         if (stack_trace_enabled) {
704                 if (tcp->s_ent->sys_flags & STACKTRACE_CAPTURE_ON_ENTER)
705                         unwind_capture_stacktrace(tcp);
706         }
707 #endif
708
709         printleader(tcp);
710         tprintf("%s(", tcp->s_ent->sys_name);
711         int res = raw(tcp) ? printargs(tcp) : tcp->s_ent->sys_func(tcp);
712         fflush(tcp->outf);
713         return res;
714 }
715
716 void
717 syscall_entering_finish(struct tcb *tcp, int res)
718 {
719         tcp->flags |= TCB_INSYSCALL;
720         tcp->sys_func_rval = res;
721         /* Measure the entrance time as late as possible to avoid errors. */
722         if ((Tflag || cflag) && !filtered(tcp))
723                 gettimeofday(&tcp->etime, NULL);
724 }
725
726 static bool
727 syscall_tampered(struct tcb *tcp)
728 {
729         return tcp->flags & TCB_TAMPERED;
730 }
731
732 /* Returns:
733  * 0: "bail out".
734  * 1: ok.
735  * -1: error in one of ptrace ops.
736  *
737  * If not 0, call syscall_exiting_trace(tcp, res), where res is the return
738  *    value. Anyway, call syscall_exiting_finish(tcp) then.
739  */
740 int
741 syscall_exiting_decode(struct tcb *tcp, struct timeval *ptv)
742 {
743         /* Measure the exit time as early as possible to avoid errors. */
744         if ((Tflag || cflag) && !(filtered(tcp) || hide_log(tcp)))
745                 gettimeofday(ptv, NULL);
746
747 #ifdef USE_LIBUNWIND
748         if (stack_trace_enabled) {
749                 if (tcp->s_ent->sys_flags & STACKTRACE_INVALIDATE_CACHE)
750                         unwind_cache_invalidate(tcp);
751         }
752 #endif
753
754         if (filtered(tcp) || hide_log(tcp))
755                 return 0;
756
757         get_regs(tcp->pid);
758 #if SUPPORTED_PERSONALITIES > 1
759         update_personality(tcp, tcp->currpers);
760 #endif
761         return get_regs_error ? -1 : get_syscall_result(tcp);
762 }
763
764 int
765 syscall_exiting_trace(struct tcb *tcp, struct timeval tv, int res)
766 {
767         if (syserror(tcp) && syscall_tampered(tcp))
768                 tamper_with_syscall_exiting(tcp);
769
770         if (cflag) {
771                 count_syscall(tcp, &tv);
772                 if (cflag == CFLAG_ONLY_STATS) {
773                         return 0;
774                 }
775         }
776
777         /* If not in -ff mode, and printing_tcp != tcp,
778          * then the log currently does not end with output
779          * of _our syscall entry_, but with something else.
780          * We need to say which syscall's return is this.
781          *
782          * Forced reprinting via TCB_REPRINT is used only by
783          * "strace -ff -oLOG test/threaded_execve" corner case.
784          * It's the only case when -ff mode needs reprinting.
785          */
786         if ((followfork < 2 && printing_tcp != tcp) || (tcp->flags & TCB_REPRINT)) {
787                 tcp->flags &= ~TCB_REPRINT;
788                 printleader(tcp);
789                 tprintf("<... %s resumed> ", tcp->s_ent->sys_name);
790         }
791         printing_tcp = tcp;
792
793         tcp->s_prev_ent = NULL;
794         if (res != 1) {
795                 /* There was error in one of prior ptrace ops */
796                 tprints(") ");
797                 tabto();
798                 tprints("= ? <unavailable>\n");
799                 line_ended();
800                 return res;
801         }
802         tcp->s_prev_ent = tcp->s_ent;
803
804         int sys_res = 0;
805         if (raw(tcp)) {
806                 /* sys_res = printargs(tcp); - but it's nop on sysexit */
807         } else {
808         /* FIXME: not_failing_only (IOW, option -z) is broken:
809          * failure of syscall is known only after syscall return.
810          * Thus we end up with something like this on, say, ENOENT:
811          *     open("does_not_exist", O_RDONLY <unfinished ...>
812          *     {next syscall decode}
813          * whereas the intended result is that open(...) line
814          * is not shown at all.
815          */
816                 if (not_failing_only && tcp->u_error)
817                         return 0;       /* ignore failed syscalls */
818                 if (tcp->sys_func_rval & RVAL_DECODED)
819                         sys_res = tcp->sys_func_rval;
820                 else
821                         sys_res = tcp->s_ent->sys_func(tcp);
822         }
823
824         tprints(") ");
825         tabto();
826         unsigned long u_error = tcp->u_error;
827
828         if (raw(tcp)) {
829                 if (u_error) {
830                         tprintf("= -1 (errno %lu)", u_error);
831                 } else {
832                         tprintf("= %#" PRI_klx, tcp->u_rval);
833                 }
834                 if (syscall_tampered(tcp))
835                         tprints(" (INJECTED)");
836         } else if (!(sys_res & RVAL_NONE) && u_error) {
837                 const char *u_error_str;
838
839                 switch (u_error) {
840                 /* Blocked signals do not interrupt any syscalls.
841                  * In this case syscalls don't return ERESTARTfoo codes.
842                  *
843                  * Deadly signals set to SIG_DFL interrupt syscalls
844                  * and kill the process regardless of which of the codes below
845                  * is returned by the interrupted syscall.
846                  * In some cases, kernel forces a kernel-generated deadly
847                  * signal to be unblocked and set to SIG_DFL (and thus cause
848                  * death) if it is blocked or SIG_IGNed: for example, SIGSEGV
849                  * or SIGILL. (The alternative is to leave process spinning
850                  * forever on the faulty instruction - not useful).
851                  *
852                  * SIG_IGNed signals and non-deadly signals set to SIG_DFL
853                  * (for example, SIGCHLD, SIGWINCH) interrupt syscalls,
854                  * but kernel will always restart them.
855                  */
856                 case ERESTARTSYS:
857                         /* Most common type of signal-interrupted syscall exit code.
858                          * The system call will be restarted with the same arguments
859                          * if SA_RESTART is set; otherwise, it will fail with EINTR.
860                          */
861                         tprints("= ? ERESTARTSYS (To be restarted if SA_RESTART is set)");
862                         break;
863                 case ERESTARTNOINTR:
864                         /* Rare. For example, fork() returns this if interrupted.
865                          * SA_RESTART is ignored (assumed set): the restart is unconditional.
866                          */
867                         tprints("= ? ERESTARTNOINTR (To be restarted)");
868                         break;
869                 case ERESTARTNOHAND:
870                         /* pause(), rt_sigsuspend() etc use this code.
871                          * SA_RESTART is ignored (assumed not set):
872                          * syscall won't restart (will return EINTR instead)
873                          * even after signal with SA_RESTART set. However,
874                          * after SIG_IGN or SIG_DFL signal it will restart
875                          * (thus the name "restart only if has no handler").
876                          */
877                         tprints("= ? ERESTARTNOHAND (To be restarted if no handler)");
878                         break;
879                 case ERESTART_RESTARTBLOCK:
880                         /* Syscalls like nanosleep(), poll() which can't be
881                          * restarted with their original arguments use this
882                          * code. Kernel will execute restart_syscall() instead,
883                          * which changes arguments before restarting syscall.
884                          * SA_RESTART is ignored (assumed not set) similarly
885                          * to ERESTARTNOHAND. (Kernel can't honor SA_RESTART
886                          * since restart data is saved in "restart block"
887                          * in task struct, and if signal handler uses a syscall
888                          * which in turn saves another such restart block,
889                          * old data is lost and restart becomes impossible)
890                          */
891                         tprints("= ? ERESTART_RESTARTBLOCK (Interrupted by signal)");
892                         break;
893                 default:
894                         u_error_str = err_name(u_error);
895                         if (u_error_str)
896                                 tprintf("= -1 %s (%s)",
897                                         u_error_str, strerror(u_error));
898                         else
899                                 tprintf("= -1 %lu (%s)",
900                                         u_error, strerror(u_error));
901                         break;
902                 }
903                 if (syscall_tampered(tcp))
904                         tprints(" (INJECTED)");
905                 if ((sys_res & RVAL_STR) && tcp->auxstr)
906                         tprintf(" (%s)", tcp->auxstr);
907         } else {
908                 if (sys_res & RVAL_NONE)
909                         tprints("= ?");
910                 else {
911                         switch (sys_res & RVAL_MASK) {
912                         case RVAL_HEX:
913 #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG
914                                 if (current_wordsize < sizeof(tcp->u_rval)) {
915                                         tprintf("= %#x",
916                                                 (unsigned int) tcp->u_rval);
917                                 } else
918 #endif
919                                 {
920                                         tprintf("= %#" PRI_klx, tcp->u_rval);
921                                 }
922                                 break;
923                         case RVAL_OCTAL:
924                                 tprints("= ");
925                                 print_numeric_long_umask(tcp->u_rval);
926                                 break;
927                         case RVAL_UDECIMAL:
928 #if ANY_WORDSIZE_LESS_THAN_KERNEL_LONG
929                                 if (current_wordsize < sizeof(tcp->u_rval)) {
930                                         tprintf("= %u",
931                                                 (unsigned int) tcp->u_rval);
932                                 } else
933 #endif
934                                 {
935                                         tprintf("= %" PRI_klu, tcp->u_rval);
936                                 }
937                                 break;
938                         case RVAL_DECIMAL:
939                                 tprintf("= %" PRI_kld, tcp->u_rval);
940                                 break;
941                         case RVAL_FD:
942                                 if (show_fd_path) {
943                                         tprints("= ");
944                                         printfd(tcp, tcp->u_rval);
945                                 } else
946                                         tprintf("= %" PRI_kld, tcp->u_rval);
947                                 break;
948                         default:
949                                 error_msg("invalid rval format");
950                                 break;
951                         }
952                 }
953                 if ((sys_res & RVAL_STR) && tcp->auxstr)
954                         tprintf(" (%s)", tcp->auxstr);
955                 if (syscall_tampered(tcp))
956                         tprints(" (INJECTED)");
957         }
958         if (Tflag) {
959                 tv_sub(&tv, &tv, &tcp->etime);
960                 tprintf(" <%ld.%06ld>",
961                         (long) tv.tv_sec, (long) tv.tv_usec);
962         }
963         tprints("\n");
964         dumpio(tcp);
965         line_ended();
966
967 #ifdef USE_LIBUNWIND
968         if (stack_trace_enabled)
969                 unwind_print_stacktrace(tcp);
970 #endif
971         return 0;
972 }
973
974 void
975 syscall_exiting_finish(struct tcb *tcp)
976 {
977         tcp->flags &= ~(TCB_INSYSCALL | TCB_TAMPERED);
978         tcp->sys_func_rval = 0;
979         free_tcb_priv_data(tcp);
980 }
981
982 bool
983 is_erestart(struct tcb *tcp)
984 {
985         switch (tcp->u_error) {
986                 case ERESTARTSYS:
987                 case ERESTARTNOINTR:
988                 case ERESTARTNOHAND:
989                 case ERESTART_RESTARTBLOCK:
990                         return true;
991                 default:
992                         return false;
993         }
994 }
995
996 static unsigned long saved_u_error;
997
998 void
999 temporarily_clear_syserror(struct tcb *tcp)
1000 {
1001         saved_u_error = tcp->u_error;
1002         tcp->u_error = 0;
1003 }
1004
1005 void
1006 restore_cleared_syserror(struct tcb *tcp)
1007 {
1008         tcp->u_error = saved_u_error;
1009 }
1010
1011 #include "arch_regs.c"
1012
1013 #ifdef HAVE_GETRVAL2
1014 # include "arch_getrval2.c"
1015 #endif
1016
1017 void
1018 print_pc(struct tcb *tcp)
1019 {
1020 #if defined ARCH_PC_REG
1021 # define ARCH_GET_PC 0
1022 #elif defined ARCH_PC_PEEK_ADDR
1023         kernel_ulong_t pc;
1024 # define ARCH_PC_REG pc
1025 # define ARCH_GET_PC upeek(tcp->pid, ARCH_PC_PEEK_ADDR, &pc)
1026 #else
1027 # error Neither ARCH_PC_REG nor ARCH_PC_PEEK_ADDR is defined
1028 #endif
1029         get_regs(tcp->pid);
1030         if (get_regs_error || ARCH_GET_PC)
1031                 tprints(current_wordsize == 4 ? "[????????] "
1032                                               : "[????????????????] ");
1033         else
1034                 tprintf(current_wordsize == 4
1035                         ? "[%08" PRI_klx "] " : "[%016" PRI_klx "] ",
1036                         (kernel_ulong_t) ARCH_PC_REG);
1037 }
1038
1039 #include "getregs_old.h"
1040
1041 #undef ptrace_getregset_or_getregs
1042 #undef ptrace_setregset_or_setregs
1043 #ifdef ARCH_REGS_FOR_GETREGSET
1044
1045 # define ptrace_getregset_or_getregs ptrace_getregset
1046 static long
1047 ptrace_getregset(pid_t pid)
1048 {
1049 # ifdef ARCH_IOVEC_FOR_GETREGSET
1050         /* variable iovec */
1051         ARCH_IOVEC_FOR_GETREGSET.iov_len = sizeof(ARCH_REGS_FOR_GETREGSET);
1052         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS,
1053                       &ARCH_IOVEC_FOR_GETREGSET);
1054 # else
1055         /* constant iovec */
1056         static struct iovec io = {
1057                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1058                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1059         };
1060         return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &io);
1061
1062 # endif
1063 }
1064
1065 # ifndef HAVE_GETREGS_OLD
1066 #  define ptrace_setregset_or_setregs ptrace_setregset
1067 static int
1068 ptrace_setregset(pid_t pid)
1069 {
1070 #  ifdef ARCH_IOVEC_FOR_GETREGSET
1071         /* variable iovec */
1072         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS,
1073                       &ARCH_IOVEC_FOR_GETREGSET);
1074 #  else
1075         /* constant iovec */
1076         static struct iovec io = {
1077                 .iov_base = &ARCH_REGS_FOR_GETREGSET,
1078                 .iov_len = sizeof(ARCH_REGS_FOR_GETREGSET)
1079         };
1080         return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &io);
1081 #  endif
1082 }
1083 # endif /* !HAVE_GETREGS_OLD */
1084
1085 #elif defined ARCH_REGS_FOR_GETREGS
1086
1087 # define ptrace_getregset_or_getregs ptrace_getregs
1088 static long
1089 ptrace_getregs(pid_t pid)
1090 {
1091 # if defined SPARC || defined SPARC64
1092         /* SPARC systems have the meaning of data and addr reversed */
1093         return ptrace(PTRACE_GETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1094 # else
1095         return ptrace(PTRACE_GETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1096 # endif
1097 }
1098
1099 # ifndef HAVE_GETREGS_OLD
1100 #  define ptrace_setregset_or_setregs ptrace_setregs
1101 static int
1102 ptrace_setregs(pid_t pid)
1103 {
1104 #  if defined SPARC || defined SPARC64
1105         /* SPARC systems have the meaning of data and addr reversed */
1106         return ptrace(PTRACE_SETREGS, pid, (void *) &ARCH_REGS_FOR_GETREGS, 0);
1107 #  else
1108         return ptrace(PTRACE_SETREGS, pid, NULL, &ARCH_REGS_FOR_GETREGS);
1109 #  endif
1110 }
1111 # endif /* !HAVE_GETREGS_OLD */
1112
1113 #endif /* ARCH_REGS_FOR_GETREGSET || ARCH_REGS_FOR_GETREGS */
1114
1115 static void
1116 get_regs(pid_t pid)
1117 {
1118 #undef USE_GET_SYSCALL_RESULT_REGS
1119 #ifdef ptrace_getregset_or_getregs
1120
1121         if (get_regs_error != -1)
1122                 return;
1123
1124 # ifdef HAVE_GETREGS_OLD
1125         /*
1126          * Try PTRACE_GETREGSET/PTRACE_GETREGS first,
1127          * fallback to getregs_old.
1128          */
1129         static int use_getregs_old;
1130         if (use_getregs_old < 0) {
1131                 get_regs_error = ptrace_getregset_or_getregs(pid);
1132                 return;
1133         } else if (use_getregs_old == 0) {
1134                 get_regs_error = ptrace_getregset_or_getregs(pid);
1135                 if (get_regs_error >= 0) {
1136                         use_getregs_old = -1;
1137                         return;
1138                 }
1139                 if (errno == EPERM || errno == ESRCH)
1140                         return;
1141                 use_getregs_old = 1;
1142         }
1143         get_regs_error = getregs_old(pid);
1144 # else /* !HAVE_GETREGS_OLD */
1145         /* Assume that PTRACE_GETREGSET/PTRACE_GETREGS works. */
1146         get_regs_error = ptrace_getregset_or_getregs(pid);
1147 # endif /* !HAVE_GETREGS_OLD */
1148
1149 #else /* !ptrace_getregset_or_getregs */
1150
1151 # define USE_GET_SYSCALL_RESULT_REGS 1
1152 # warning get_regs is not implemented for this architecture yet
1153         get_regs_error = 0;
1154
1155 #endif /* !ptrace_getregset_or_getregs */
1156 }
1157
1158 #ifdef ptrace_setregset_or_setregs
1159 static int
1160 set_regs(pid_t pid)
1161 {
1162         return ptrace_setregset_or_setregs(pid);
1163 }
1164 #endif /* ptrace_setregset_or_setregs */
1165
1166 struct sysent_buf {
1167         struct tcb *tcp;
1168         struct_sysent ent;
1169         char buf[sizeof("syscall_%lu") + sizeof(kernel_ulong_t) * 3];
1170 };
1171
1172 static void
1173 free_sysent_buf(void *ptr)
1174 {
1175         struct sysent_buf *s = ptr;
1176         s->tcp->s_prev_ent = s->tcp->s_ent = NULL;
1177         free(ptr);
1178 }
1179
1180 /*
1181  * Returns:
1182  * 0: "ignore this ptrace stop", syscall_entering_decode() should return a "bail
1183  *    out silently" code.
1184  * 1: ok, continue in syscall_entering_decode().
1185  * other: error, syscall_entering_decode() should print error indicator
1186  *    ("????" etc) and return an appropriate code.
1187  */
1188 int
1189 get_scno(struct tcb *tcp)
1190 {
1191         get_regs(tcp->pid);
1192
1193         if (get_regs_error)
1194                 return -1;
1195
1196         int rc = arch_get_scno(tcp);
1197         if (rc != 1)
1198                 return rc;
1199
1200         if (scno_is_valid(tcp->scno)) {
1201                 tcp->s_ent = &sysent[tcp->scno];
1202                 tcp->qual_flg = qual_flags(tcp->scno);
1203         } else {
1204                 struct sysent_buf *s = xcalloc(1, sizeof(*s));
1205
1206                 s->tcp = tcp;
1207                 s->ent.nargs = MAX_ARGS;
1208                 s->ent.sen = SEN_printargs;
1209                 s->ent.sys_func = printargs;
1210                 s->ent.sys_name = s->buf;
1211                 sprintf(s->buf, "syscall_%" PRI_klu, shuffle_scno(tcp->scno));
1212
1213                 tcp->s_ent = &s->ent;
1214                 tcp->qual_flg = QUAL_RAW | DEFAULT_QUAL_FLAGS;
1215
1216                 set_tcb_priv_data(tcp, s, free_sysent_buf);
1217
1218                 if (debug_flag)
1219                         error_msg("pid %d invalid syscall %" PRI_kld,
1220                                   tcp->pid, tcp->scno);
1221         }
1222         return 1;
1223 }
1224
1225 #ifdef USE_GET_SYSCALL_RESULT_REGS
1226 static int get_syscall_result_regs(struct tcb *);
1227 #endif
1228
1229 /* Returns:
1230  * 1: ok, continue in syscall_exiting_trace().
1231  * -1: error, syscall_exiting_trace() should print error indicator
1232  *    ("????" etc) and bail out.
1233  */
1234 static int
1235 get_syscall_result(struct tcb *tcp)
1236 {
1237 #ifdef USE_GET_SYSCALL_RESULT_REGS
1238         if (get_syscall_result_regs(tcp))
1239                 return -1;
1240 #endif
1241         tcp->u_error = 0;
1242         get_error(tcp, !(tcp->s_ent->sys_flags & SYSCALL_NEVER_FAILS));
1243
1244         return 1;
1245 }
1246
1247 #include "get_scno.c"
1248 #include "set_scno.c"
1249 #include "get_syscall_args.c"
1250 #ifdef USE_GET_SYSCALL_RESULT_REGS
1251 # include "get_syscall_result.c"
1252 #endif
1253 #include "get_error.c"
1254 #include "set_error.c"
1255 #ifdef HAVE_GETREGS_OLD
1256 # include "getregs_old.c"
1257 #endif
1258
1259 const char *
1260 syscall_name(kernel_ulong_t scno)
1261 {
1262 #if defined X32_PERSONALITY_NUMBER && defined __X32_SYSCALL_BIT
1263         if (current_personality == X32_PERSONALITY_NUMBER)
1264                 scno &= ~__X32_SYSCALL_BIT;
1265 #endif
1266         return scno_is_valid(scno) ? sysent[scno].sys_name : NULL;
1267 }