]> granicus.if.org Git - strace/commitdiff
Use process_vm_readv instead of PTRACE_PEEKDATA to read data blocks
authorDenys Vlasenko <vda.linux@googlemail.com>
Sat, 28 Jan 2012 00:46:33 +0000 (01:46 +0100)
committerDenys Vlasenko <vda.linux@googlemail.com>
Sat, 28 Jan 2012 00:46:33 +0000 (01:46 +0100)
Currently, we use PTRACE_PEEKDATA to read things like filenames and
data passed by I/O syscalls.
PTRACE_PEEKDATA gets one word per syscall. This is VERY expensive.
For example, in order to print fstat syscall, we need to perform
more than twenty trips into kernel to fetch one struct stat!

Kernel 3.2 got a new syscall, process_vm_readv(), which can be used to
copy data blocks out of process' address space.

This change uses it in umoven() and umovestr() functions if possible,
with fallback to old method if process_vm_readv() fails.
If it returns ENOSYS, we don't try to use it anymore, eliminating
overhead of trying it on older kernels.

Result of "time strace -oLOG ls -l /usr/lib >/dev/null":
before patch: 0.372s
After patch:  0.262s

* util.c (process_vm_readv): Wrapper to call process_vm_readv syscall.
(umoven): Use process_vm_readv for block reads of tracee memory.
(umovestr): Likewise.
* linux/syscall.h: Declare new function sys_process_vm_readv.
* process.c (sys_process_vm_readv): Decoder for new syscall.
* linux/i386/syscallent.h: Add process_vm_readv, process_vm_writev syscalls.
* linux/x86_64/syscallent.h: Likewise.
* linux/powerpc/syscallent.h: Likewise.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
linux/i386/syscallent.h
linux/powerpc/syscallent.h
linux/syscall.h
linux/x86_64/syscallent.h
process.c
util.c

index 43093028daaecda0c5052f423acbdb332a0c3887..62597913aa42c2bd5ea2ff390ac672bbb37a6203 100644 (file)
        { 1,    TD,     sys_syncfs,             "syncfs"        }, /* 344 */
        { 4,    TN,     sys_sendmmsg,           "sendmmsg"      }, /* 345 */
        { 2,    TD,     sys_setns,              "setns"         }, /* 346 */
-       { 5,    0,      printargs,              "SYS_347"       }, /* 347 */
-       { 5,    0,      printargs,              "SYS_348"       }, /* 348 */
+       { 6,    0,      sys_process_vm_readv,   "process_vm_readv"      }, /* 347 */
+       { 6,    0,      printargs,              "process_vm_writev"     }, /* 348 */
        { 5,    0,      printargs,              "SYS_349"       }, /* 349 */
        { 5,    0,      printargs,              "SYS_350"       }, /* 350 */
        { 5,    0,      printargs,              "SYS_351"       }, /* 351 */
index 6c569904b4ba9edbff693570fa36b8bd4cf51ad9..697fce2ebc45ea8fe3b8645682ad5fc4813cdb8d 100644 (file)
        { 1,    TD,     sys_syncfs,             "syncfs"                }, /* 348 */
        { 4,    TN,     sys_sendmmsg,           "sendmmsg"              }, /* 349 */
        { 2,    TD,     sys_setns,              "setns"                 }, /* 350 */
-       { 5,    0,      printargs,              "SYS_351"               }, /* 351 */
-       { 5,    0,      printargs,              "SYS_352"               }, /* 352 */
+       { 6,    0,      sys_process_vm_readv,   "process_vm_readv"      }, /* 351 */
+       { 6,    0,      printargs,              "process_vm_writev"     }, /* 352 */
        { 5,    0,      printargs,              "SYS_353"               }, /* 353 */
        { 5,    0,      printargs,              "SYS_354"               }, /* 354 */
        { 5,    0,      printargs,              "SYS_355"               }, /* 355 */
index 6087d42ccb495130ba22fac35cdf8392b90d35c6..fda85ed6cf35d95e6873203784e6af32ce4bf4d2 100644 (file)
@@ -178,6 +178,7 @@ int sys_pread64();
 int sys_preadv();
 int sys_pselect6();
 int sys_ptrace();
+int sys_process_vm_readv();
 int sys_putpmsg();
 int sys_pwrite();
 int sys_pwrite64();
index 961ac0b5c93b245a585c3cf6093fd58275eb777d..7b9397330a19ce8cee2cd17ed3dbda85d0ef6221 100644 (file)
        { 4,    TN,     sys_sendmmsg,           "sendmmsg"      }, /* 307 */
        { 2,    TD,     sys_setns,              "setns"         }, /* 308 */
        { 3,    0,      sys_getcpu,             "getcpu"        }, /* 309 */
+       { 6,    0,      sys_process_vm_readv,   "process_vm_readv"      }, /* 310 */
+       { 6,    0,      printargs,              "process_vm_writev"     }, /* 311 */
index d93a19034942a82f24729f4a6e0b91a47d68ba52..552e5a5bfc7942bf4e5ef55546508a337922fad6 100644 (file)
--- a/process.c
+++ b/process.c
@@ -3456,4 +3456,30 @@ sys_getcpu(struct tcb *tcp)
        return 0;
 }
 
+int
+sys_process_vm_readv(struct tcb *tcp)
+{
+       if (entering(tcp)) {
+               /* arg 1: pid */
+               tprintf("%ld, ", tcp->u_arg[0]);
+       } else {
+               /* args 2,3: local iov,cnt */
+               if (syserror(tcp)) {
+                       tprintf("%#lx, %lu",
+                                       tcp->u_arg[1], tcp->u_arg[2]);
+               } else {
+                       tprint_iov(tcp, tcp->u_arg[2], tcp->u_arg[1], 1);
+               }
+               tprints(", ");
+               /* args 4,5: remote iov,cnt */
+               if (syserror(tcp)) {
+                       tprintf("%#lx, %lu", tcp->u_arg[3], tcp->u_arg[4]);
+               } else {
+                       tprint_iov(tcp, tcp->u_arg[4], tcp->u_arg[3], 0);
+               }
+               /* arg 6: flags */
+               tprintf(", %lu", tcp->u_arg[5]);
+       }
+       return 0;
+}
 #endif /* LINUX */
diff --git a/util.c b/util.c
index d9ec1b3cfb70efb7e5e63a15134092ccdac97568..85207cdf52088ba87feb952c7fb8a48b00f30158 100644 (file)
--- a/util.c
+++ b/util.c
@@ -769,6 +769,39 @@ dumpstr(struct tcb *tcp, long addr, int len)
        }
 }
 
+
+/* Need to do this since process_vm_readv() is not yet available in libc.
+ * When libc is be updated, only "static bool process_vm_readv_not_supported"
+ * line should remain.
+ */
+#if !defined(__NR_process_vm_readv)
+# if defined(I386)
+#  define __NR_process_vm_readv  347
+# elif defined(X86_64)
+#  define __NR_process_vm_readv  310
+# elif defined(POWERPC)
+#  define __NR_process_vm_readv  351
+# endif
+#endif
+
+#if defined(__NR_process_vm_readv)
+static bool process_vm_readv_not_supported = 0;
+static ssize_t process_vm_readv(pid_t pid,
+                const struct iovec *lvec,
+                unsigned long liovcnt,
+                const struct iovec *rvec,
+                unsigned long riovcnt,
+                unsigned long flags)
+{
+       return syscall(__NR_process_vm_readv, (long)pid, lvec, liovcnt, rvec, riovcnt, flags);
+}
+#else
+static bool process_vm_readv_not_supported = 1;
+# define process_vm_readv(...) (errno = ENOSYS, -1)
+#endif
+/* end of hack */
+
+
 #define PAGMASK        (~(PAGSIZ - 1))
 /*
  * move `len' bytes of data from process `pid'
@@ -786,6 +819,29 @@ umoven(struct tcb *tcp, long addr, int len, char *laddr)
                char x[sizeof(long)];
        } u;
 
+       if (!process_vm_readv_not_supported) {
+               struct iovec local[1], remote[1];
+               int r;
+
+               local[0].iov_base = laddr;
+               remote[0].iov_base = (void*)addr;
+               local[0].iov_len = remote[0].iov_len = len;
+               r = process_vm_readv(pid,
+                               local, 1,
+                               remote, 1,
+                               /*flags:*/ 0
+               );
+               if (r < 0) {
+                       if (errno == ENOSYS)
+                               process_vm_readv_not_supported = 1;
+                       else /* strange... */
+                               perror("process_vm_readv");
+                       goto vm_readv_didnt_work;
+               }
+               return r;
+       }
+ vm_readv_didnt_work:
+
 #if SUPPORTED_PERSONALITIES > 1
        if (personality_wordsize[current_personality] < sizeof(addr))
                addr &= (1ul << 8 * personality_wordsize[current_personality]) - 1;
@@ -925,6 +981,54 @@ umovestr(struct tcb *tcp, long addr, int len, char *laddr)
                addr &= (1ul << 8 * personality_wordsize[current_personality]) - 1;
 #endif
 
+       if (!process_vm_readv_not_supported) {
+               struct iovec local[1], remote[1];
+
+               local[0].iov_base = laddr;
+               remote[0].iov_base = (void*)addr;
+
+               while (len > 0) {
+                       int end_in_page;
+                       int r;
+                       int chunk_len;
+
+                       /* Don't read kilobytes: most strings are short */
+                       chunk_len = len;
+                       if (chunk_len > 256)
+                               chunk_len = 256;
+                       /* Don't cross pages. I guess otherwise we can get EFAULT
+                        * and fail to notice that terminating NUL lies
+                        * in the existing (first) page.
+                        * (I hope there aren't arches with pages < 4K)
+                        */
+                       end_in_page = ((addr + chunk_len) & 4095);
+                       r = chunk_len - end_in_page;
+                       if (r > 0) /* if chunk_len > end_in_page */
+                               chunk_len = r; /* chunk_len -= end_in_page */
+
+                       local[0].iov_len = remote[0].iov_len = chunk_len;
+                       r = process_vm_readv(pid,
+                                       local, 1,
+                                       remote, 1,
+                                       /*flags:*/ 0
+                       );
+                       if (r < 0) {
+                               if (errno == ENOSYS)
+                                       process_vm_readv_not_supported = 1;
+                               else /* strange... */
+                                       perror("process_vm_readv");
+                               goto vm_readv_didnt_work;
+                       }
+                       if (memchr(local[0].iov_base, '\0', r))
+                               return 1;
+                       local[0].iov_base += r;
+                       remote[0].iov_base += r;
+                       len -= r;
+               }
+               return 0;
+       }
+ vm_readv_didnt_work:
+
        started = 0;
        if (addr & (sizeof(long) - 1)) {
                /* addr not a multiple of sizeof(long) */