From 356e9a0672140653b805ba8d619beb6df4e4770f Mon Sep 17 00:00:00 2001 From: Jim Warner Date: Wed, 17 Jun 2020 08:58:50 -0500 Subject: [PATCH] library: correct that 'escape_str_utf8' guy's behavior Thanks to Konstantin for discovering 2 problems in the issue referenced below. That 15+ year old logic went a little too far overboard wrestling with a utf8 string. Henceforth, we will not treat 'x9b' as special. And we also will handle a 'combining acute accent' correctly. Reference(s): https://gitlab.com/procps-ng/procps/-/issues/176 Signed-off-by: Jim Warner --- proc/escape.c | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/proc/escape.c b/proc/escape.c index 5188857d..acbdb03b 100644 --- a/proc/escape.c +++ b/proc/escape.c @@ -87,36 +87,16 @@ static int escape_str_utf8(char *restrict dst, const char *restrict src, int buf my_bytes++; } else { - /* multibyte - printable */ + /* multibyte - maybe, kinda "printable" */ int wlen = wcwidth(wc); - - if (wlen<=0) { - // invisible multibyte -- we don't ignore it, because some terminal - // interpret it wrong and more safe is replace it with '?' - *(dst++) = '?'; - src+=len; - my_cells++; - my_bytes++; - } else { - // multibyte - printable - // Got space? - if (wlen > *maxcells-my_cells || len >= bufsize-(my_bytes+1)) break; - // 0x9b is control byte for some terminals - if (memchr(src, 0x9B, len)) { - // unsafe multibyte - *(dst++) = '?'; - src+=len; - my_cells++; - my_bytes++; - } else { - // safe multibyte - memcpy(dst, src, len); - my_cells += wlen; - dst += len; - my_bytes += len; - src += len; - } - } + // Got space? + if (wlen > *maxcells-my_cells || len >= bufsize-(my_bytes+1)) break; + // safe multibyte + memcpy(dst, src, len); + dst += len; + src += len; + my_bytes += len; + if (wlen > 0) my_cells += wlen; } //fprintf(stdout, "cells: %d\n", my_cells); } -- 2.40.0