From: Jim Warner Date: Thu, 24 Dec 2020 06:00:00 +0000 (-0600) Subject: library: refactor 'escape' logic for newlib essentials X-Git-Tag: v4.0.0~260 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a221b9084ae979e6fd073a83e7fbc46c44551f35;p=procps-ng library: refactor 'escape' logic for newlib essentials This new library provides callers with pure strings or string vectors. It is up to those callers to deal with potential utf8 multibyte characters and any difference between strlen and the corresponding printable widths. So, it makes no sense for the library to go to all the trouble of invoking those rather expensive 'mbrtowc' & 'wcwidth' functions to ultimately yield total 'cells'. Thus, this patch will eliminate all the code and parms that are involved with such possible multibyte issues. [ Along the way we'll lose the ability to substitute ] [ '?' for an invalid/unprintable multibyte sequence. ] [ We will, however, replace ctrl chars with the '?'. ] [ This presents no problem for that ps program since ] [ it now duplicates all of the original escape code. ] [ And, we'll no longer be executing that code twice! ] [ As for the top program, it takes the position that ] [ it is wrong to alter kernel supplied data. So with ] [ potential invalid/unprintable stuff, he'll rely on ] [ terminal emulators to properly handle such issues! ] [ Besides, even using a proper multibyte string, not ] [ all terminals generate the proper printable width. ] [ This is especially true when it comes to an emoji. ] [ And should callers chose not to be portable to all ] [ locales by calling setlocale(LC_ALL, ""), they can ] [ expect to see lots of "?", regardless of what this ] [ library fixes in a faulty multibyte string anyway. ] Signed-off-by: Jim Warner --- diff --git a/proc/escape.c b/proc/escape.c index 8d358403..c881ff5b 100644 --- a/proc/escape.c +++ b/proc/escape.c @@ -17,171 +17,46 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include -#include #include #include -#include /* MB_CUR_MAX */ #include -#include -#include -#include #include "escape.h" #include "readproc.h" -#define SECURE_ESCAPE_ARGS(dst, bytes, cells) do { \ +#define SECURE_ESCAPE_ARGS(dst, bytes) do { \ if ((bytes) <= 0) return 0; \ *(dst) = '\0'; \ if ((bytes) >= INT_MAX) return 0; \ - if ((cells) >= INT_MAX) return 0; \ - if ((cells) <= 0) return 0; \ } while (0) -static int escape_str_utf8 (char *dst, const char *src, int bufsize, int *maxcells) { - int my_cells = 0; - int my_bytes = 0; - mbstate_t s; +int escape_str (unsigned char *dst, const unsigned char *src, int bufsize) { + int i, n; - SECURE_ESCAPE_ARGS(dst, bufsize, *maxcells); + SECURE_ESCAPE_ARGS(dst, bufsize); - memset(&s, 0, sizeof (s)); - - for(;;) { - wchar_t wc; - int len = 0; - - if(my_cells >= *maxcells || my_bytes+1 >= bufsize) - break; - - if (!(len = mbrtowc (&wc, src, MB_CUR_MAX, &s))) - /* 'str' contains \0 */ - break; - - if (len < 0) { - /* invalid multibyte sequence -- zeroize state */ - memset (&s, 0, sizeof (s)); - *(dst++) = '?'; - src++; - my_cells++; - my_bytes++; - - } else if (len==1) { - /* non-multibyte */ - *(dst++) = isprint(*src) ? *src : '?'; - src++; - my_cells++; - my_bytes++; - - } else if (!iswprint(wc)) { - /* multibyte - no printable */ - *(dst++) = '?'; - src+=len; - my_cells++; - my_bytes++; - - } else { - /* multibyte - maybe, kinda "printable" */ - int wlen = wcwidth(wc); - // Got space? - if (wlen > *maxcells-my_cells || len >= bufsize-(my_bytes+1)) break; - // safe multibyte - memcpy(dst, src, len); - dst += len; - src += len; - my_bytes += len; - if (wlen > 0) my_cells += wlen; - } - //fprintf(stdout, "cells: %d\n", my_cells); - } - *dst = '\0'; - - // fprintf(stderr, "maxcells: %d, my_cells; %d\n", *maxcells, my_cells); - - *maxcells -= my_cells; - return my_bytes; // bytes of text, excluding the NUL -} - - -/* sanitize a string via one-way mangle */ -int escape_str (char *dst, const char *src, int bufsize, int *maxcells) { - unsigned char c; - int my_cells = 0; - int my_bytes = 0; - const char codes[] = - "Z..............................." - "||||||||||||||||||||||||||||||||" - "||||||||||||||||||||||||||||||||" - "|||||||||||||||||||||||||||||||." - "????????????????????????????????" - "????????????????????????????????" - "????????????????????????????????" - "????????????????????????????????"; - - static int utf_init=0; - - if(utf_init==0){ - /* first call -- check if UTF stuff is usable */ - char *enc = nl_langinfo(CODESET); - utf_init = enc && strcasecmp(enc, "UTF-8")==0 ? 1 : -1; - } - if (utf_init==1 && MB_CUR_MAX>1) { - /* UTF8 locales */ - return escape_str_utf8(dst, src, bufsize, maxcells); - } - - SECURE_ESCAPE_ARGS(dst, bufsize, *maxcells); - - if(bufsize > *maxcells+1) bufsize=*maxcells+1; // FIXME: assumes 8-bit locale - - for(;;){ - if(my_cells >= *maxcells || my_bytes+1 >= bufsize) - break; - c = (unsigned char) *(src++); - if(!c) break; - if(codes[c]!='|') c=codes[c]; - my_cells++; - my_bytes++; - *(dst++) = c; + n = snprintf(dst, bufsize, "%s", src); + if (n < 0) { + *dst = '\0'; + return 0; } - *dst = '\0'; - - *maxcells -= my_cells; - return my_bytes; // bytes of text, excluding the NUL -} + if (n >= bufsize) n = bufsize-1; -///////////////////////////////////////////////// + // control chars, especially tabs, create alignment problems for ps & top ... + for (i = 0; i < n; i++) + if (dst[i] < 0x20 || dst[i] == 0x7f) + dst[i] = '?'; -// escape an argv or environment string array -// -// bytes arg means sizeof(buf) -static int escape_strlist (char *dst, const char **src, size_t bytes, int *cells) { - size_t i = 0; - - for(;;){ - i += escape_str(dst+i, *src, bytes-i, cells); - if(bytes-i < 3) break; // need room for space, a character, and the NUL - src++; - if(!*src) break; // need something to print - if (*cells<=1) break; // need room for printed size of text - dst[i++] = ' '; - --*cells; - } - return i; // bytes, excluding the NUL + return n; } -/////////////////////////////////////////////////// -int escape_command (char *const outbuf, const proc_t *pp, int bytes, int *cells, unsigned flags) { +int escape_command (unsigned char *outbuf, const proc_t *pp, int bytes, unsigned flags) { int overhead = 0; int end = 0; - if(flags & ESC_ARGS){ - const char **lc = (const char**)pp->cmdline; - if(lc && *lc) return escape_strlist(outbuf, lc, bytes, cells); - } if(flags & ESC_BRACKETS){ overhead += 2; } @@ -189,16 +64,14 @@ int escape_command (char *const outbuf, const proc_t *pp, int bytes, int *cells, if(pp->state=='Z') overhead += 10; // chars in " " else flags &= ~ESC_DEFUNCT; } - if(overhead + 1 >= *cells || // if no room for even one byte of the command name - overhead + 1 >= bytes){ + if(overhead + 1 >= bytes){ // if no room for even one byte of the command name outbuf[0] = '\0'; return 0; } if(flags & ESC_BRACKETS){ outbuf[end++] = '['; } - *cells -= overhead; - end += escape_str(outbuf+end, pp->cmd, bytes-overhead, cells); + end += escape_str(outbuf+end, pp->cmd, bytes-overhead); // Hmmm, do we want "[foo] " or "[foo ]"? if(flags & ESC_BRACKETS){ diff --git a/proc/escape.h b/proc/escape.h index 684eba1d..b55c48b6 100644 --- a/proc/escape.h +++ b/proc/escape.h @@ -3,12 +3,11 @@ #include "readproc.h" -#define ESC_ARGS 0x1 // try to use cmdline instead of cmd #define ESC_BRACKETS 0x2 // if using cmd, put '[' and ']' around it #define ESC_DEFUNCT 0x4 // mark zombies with " " -int escape_command (char *outbuf, const proc_t *pp, int bytes, int *cells, unsigned flags); +int escape_command (unsigned char *outbuf, const proc_t *pp, int bytes, unsigned flags); -int escape_str (char *dst, const char *src, int bufsize, int *maxcells); +int escape_str (unsigned char *dst, const unsigned char *src, int bufsize); #endif diff --git a/proc/readproc.c b/proc/readproc.c index 32591316..15cf1739 100644 --- a/proc/readproc.c +++ b/proc/readproc.c @@ -825,7 +825,7 @@ static int vectorize_dash_rc (char*** vec) { static int fill_cgroup_cvt (const char* directory, proc_t *restrict p) { #define vMAX ( MAX_BUFSZ - (int)(dst - dst_buffer) ) char *src, *dst, *grp, *eob, *name; - int tot, x, whackable_int = MAX_BUFSZ, len; + int tot, x, len; *(dst = dst_buffer) = '\0'; // empty destination tot = read_unvectored(src_buffer, MAX_BUFSZ, directory, "cgroup", '\0'); @@ -841,7 +841,7 @@ static int fill_cgroup_cvt (const char* directory, proc_t *restrict p) { len = snprintf(dst, vMAX, "%s", (dst > dst_buffer) ? "," : ""); if (len < 0 || len >= vMAX) break; dst += len; - dst += escape_str(dst, grp, vMAX, &whackable_int); + dst += escape_str(dst, grp, vMAX); } if (!(p->cgroup = strdup(dst_buffer[0] ? dst_buffer : "-"))) return 1; @@ -859,12 +859,10 @@ static int fill_cgroup_cvt (const char* directory, proc_t *restrict p) { // valid proc_t.cmdline pointer. static int fill_cmdline_cvt (const char* directory, proc_t *restrict p) { #define uFLG ( ESC_BRACKETS | ESC_DEFUNCT ) - int whackable_int = MAX_BUFSZ; - if (read_unvectored(src_buffer, MAX_BUFSZ, directory, "cmdline", ' ')) - escape_str(dst_buffer, src_buffer, MAX_BUFSZ, &whackable_int); + escape_str(dst_buffer, src_buffer, MAX_BUFSZ); else - escape_command(dst_buffer, p, MAX_BUFSZ, &whackable_int, uFLG); + escape_command(dst_buffer, p, MAX_BUFSZ, uFLG); p->cmdline = strdup(dst_buffer[0] ? dst_buffer : "?"); if (!p->cmdline) return 1; @@ -876,11 +874,9 @@ static int fill_cmdline_cvt (const char* directory, proc_t *restrict p) { // This routine reads an 'environ' for the designated proc_t and // guarantees the caller a valid proc_t.environ pointer. static int fill_environ_cvt (const char* directory, proc_t *restrict p) { - int whackable_int = MAX_BUFSZ; - dst_buffer[0] = '\0'; if (read_unvectored(src_buffer, MAX_BUFSZ, directory, "environ", ' ')) - escape_str(dst_buffer, src_buffer, MAX_BUFSZ, &whackable_int); + escape_str(dst_buffer, src_buffer, MAX_BUFSZ); p->environ = strdup(dst_buffer[0] ? dst_buffer : "-"); if (!p->environ) return 1; diff --git a/proc/readproc.h b/proc/readproc.h index fea120a6..51e64d17 100644 --- a/proc/readproc.h +++ b/proc/readproc.h @@ -215,8 +215,6 @@ typedef struct PROCTAB { #define PROC_FILL_LUID 0x400000 // fill in proc_t luid (login user id) #define PROC_FILL_EXE 0x200000 // fill in proc_t exe path + pgm name -#define PROC_LOOSE_TASKS 0x2000 // treat threads as if they were processes - // consider only processes with one of the passed: #define PROC_PID 0x1000 // process id numbers ( 0 terminated) #define PROC_UID 0x4000 // user id numbers ( length needed )