xz: Multiple fixes.

author Lasse Collin <lasse.collin@tukaani.org>

Fri, 10 Sep 2010 07:30:33 +0000 (10:30 +0300)

committer Lasse Collin <lasse.collin@tukaani.org>

Fri, 10 Sep 2010 07:30:33 +0000 (10:30 +0300)
author Lasse Collin <lasse.collin@tukaani.org>
Fri, 10 Sep 2010 07:30:33 +0000 (10:30 +0300)
committer Lasse Collin <lasse.collin@tukaani.org>
Fri, 10 Sep 2010 07:30:33 +0000 (10:30 +0300)
diff --git a/configure.ac b/configure.ac

index e93e806e678d53bef2970ca36e32fc323b0705bb..8c61b5da8f34511cd2558845e32d21bf8b0c0b0a 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -522,6 +522,7 @@ TUKLIB_PROGNAME
  TUKLIB_INTEGER
  TUKLIB_PHYSMEM
  TUKLIB_CPUCORES
+TUKLIB_MBSTR
  
  
  ###############################################################################
diff --git a/m4/tuklib_mbstr.m4 b/m4/tuklib_mbstr.m4

new file mode 100644 (file)

index 0000000..991be9b
--- /dev/null
+++ b/m4/tuklib_mbstr.m4
@@ -0,0 +1,30 @@
+#
+# SYNOPSIS
+#
+#   TUKLIB_MBSTR
+#
+# DESCRIPTION
+#
+#   Check if multibyte and wide character functionality is available
+#   for use by tuklib_mbstr_* functions. If not enough multibyte string
+#   support is available in the C library, the functions keep working
+#   with the assumption that all strings are a in single-byte character
+#   set without combining characters, e.g. US-ASCII or ISO-8859-*.
+#
+#   This .m4 file and tuklib_mbstr.h are common to all tuklib_mbstr_*
+#   functions, but each function is put into a separate .c file so
+#   that it is possible to pick only what is strictly needed.
+#
+# COPYING
+#
+#   Author: Lasse Collin
+#
+#   This file has been put into the public domain.
+#   You can do whatever you want with this file.
+#
+
+AC_DEFUN_ONCE([TUKLIB_MBSTR], [
+AC_REQUIRE([TUKLIB_COMMON])
+AC_FUNC_MBRTOWC
+AC_CHECK_FUNCS([wcwidth])
+])dnl
diff --git a/src/common/tuklib_mbstr.h b/src/common/tuklib_mbstr.h

new file mode 100644 (file)

index 0000000..9f35835
--- /dev/null
+++ b/src/common/tuklib_mbstr.h
@@ -0,0 +1,66 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       tuklib_mstr.h
+/// \brief      Utility functions for handling multibyte strings
+///
+/// If not enough multibyte string support is available in the C library,
+/// these functions keep working with the assumption that all strings
+/// are in a single-byte character set without combining characters, e.g.
+/// US-ASCII or ISO-8859-*.
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef TUKLIB_MBSTR_H
+#define TUKLIB_MBSTR_H
+
+#include "tuklib_common.h"
+TUKLIB_DECLS_BEGIN
+
+#define tuklib_mbstr_width TUKLIB_SYMBOL(tuklib_mbstr_width)
+extern size_t tuklib_mbstr_width(const char *str, size_t *bytes);
+///<
+/// \brief      Get the number of columns needed for the multibyte string
+///
+/// This is somewhat similar to wcswidth() but works on multibyte strings.
+///
+/// \param      str         String whose width is to be calculated. If the
+///                         current locale uses a multibyte character set
+///                         that has shift states, the string must begin
+///                         and end in the initial shift state.
+/// \param      bytes       If this is not NULL, *bytes is set to the
+///                         value returned by strlen(str) (even if an
+///                         error occurs when calculating the width).
+///
+/// \return     On success, the number of columns needed to display the
+///             string e.g. in a terminal emulator is returned. On error,
+///             (size_t)-1 is returned. Possible errors include invalid,
+///             partial, or non-printable multibyte character in str, or
+///             that str doesn't end in the initial shift state.
+
+#define tuklib_mbstr_fw TUKLIB_SYMBOL(tuklib_mbstr_fw)
+extern int tuklib_mbstr_fw(const char *str, int columns_min);
+///<
+/// \brief      Get the field width for printf() e.g. to align table columns
+///
+/// Printing simple tables to a terminal can be done using the field field
+/// feature in the printf() format string, but it works only with single-byte
+/// character sets. To do the same with multibyte strings, tuklib_mbstr_fw()
+/// can be used to calculate appropriate field width.
+///
+/// The behavior of this function is undefined, if
+///   - str is NULL or not terminated with '\0';
+///   - columns_min <= 0; or
+///   - the calculated field width exceeds INT_MAX.
+///
+/// \return     If tuklib_mbstr_width(str, NULL) fails, -1 is returned.
+///             If str needs more columns than columns_min, zero is returned.
+///             Otherwise a positive integer is returned, which can be
+///             used as the field width, e.g. printf("%*s", fw, str).
+
+TUKLIB_DECLS_END
+#endif
diff --git a/src/common/tuklib_mbstr_fw.c b/src/common/tuklib_mbstr_fw.c

new file mode 100644 (file)

index 0000000..978a3fe
--- /dev/null
+++ b/src/common/tuklib_mbstr_fw.c
@@ -0,0 +1,31 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       tuklib_mstr_fw.c
+/// \brief      Get the field width for printf() e.g. to align table columns
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "tuklib_mbstr.h"
+
+
+extern int
+tuklib_mbstr_fw(const char *str, int columns_min)
+{
+       size_t len;
+       const size_t width = tuklib_mbstr_width(str, &len);
+       if (width == (size_t)-1)
+               return -1;
+
+       if (width > (size_t)columns_min)
+               return 0;
+
+       if (width < (size_t)columns_min)
+               len += (size_t)columns_min - width;
+
+       return len;
+}
diff --git a/src/common/tuklib_mbstr_width.c b/src/common/tuklib_mbstr_width.c

new file mode 100644 (file)

index 0000000..3c38990
--- /dev/null
+++ b/src/common/tuklib_mbstr_width.c
@@ -0,0 +1,64 @@
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file       tuklib_mstr_width.c
+/// \brief      Calculate width of a multibyte string
+//
+//  Author:     Lasse Collin
+//
+//  This file has been put into the public domain.
+//  You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#include "tuklib_mbstr.h"
+
+#if defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
+#      include <wchar.h>
+#endif
+
+
+extern size_t
+tuklib_mbstr_width(const char *str, size_t *bytes)
+{
+       const size_t len = strlen(str);
+       if (bytes != NULL)
+               *bytes = len;
+
+#if !(defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH))
+       // In single-byte mode, the width of the string is the same
+       // as its length.
+       return len;
+
+#else
+       mbstate_t state;
+       memset(&state, 0, sizeof(state));
+
+       size_t width = 0;
+       size_t i = 0;
+
+       // Convert one multibyte character at a time to wchar_t
+       // and get its width using wcwidth().
+       while (i < len) {
+               wchar_t wc;
+               const size_t ret = mbrtowc(&wc, str + i, len - i, &state);
+               if (ret < 1 || ret > len)
+                       return (size_t)-1;
+
+               i += ret;
+
+               const int wc_width = wcwidth(wc);
+               if (wc_width < 0)
+                       return (size_t)-1;
+
+               width += wc_width;
+       }
+
+       // Require that the string ends in the initial shift state.
+       // This way the caller can be combine the string with other
+       // strings without needing to worry about the shift states.
+       if (!mbsinit(&state))
+               return (size_t)-1;
+
+       return width;
+#endif
+}
diff --git a/src/xz/Makefile.am b/src/xz/Makefile.am

index 4dbe0f2604ef821325137039136291eba220c3e8..da716dca638a12a575ec2705653c12663dc6c887 100644 (file)
--- a/src/xz/Makefile.am
+++ b/src/xz/Makefile.am
@@ -34,7 +34,9 @@ xz_SOURCES = \
         $(top_srcdir)/src/common/tuklib_open_stdxxx.c \
         $(top_srcdir)/src/common/tuklib_progname.c \
         $(top_srcdir)/src/common/tuklib_exit.c \
-       $(top_srcdir)/src/common/tuklib_cpucores.c
+       $(top_srcdir)/src/common/tuklib_cpucores.c \
+       $(top_srcdir)/src/common/tuklib_mbstr_width.c \
+       $(top_srcdir)/src/common/tuklib_mbstr_fw.c
  
  if COND_W32
  xz_SOURCES += xz_w32res.rc
diff --git a/src/xz/list.c b/src/xz/list.c

index e136cc2ec57b40202fa04c3cb4e65518287638cf..bd4aee4e4dbbce8b9d3414260a478a50000b4e59 100644 (file)
--- a/src/xz/list.c
+++ b/src/xz/list.c
@@ -49,31 +49,43 @@ typedef struct {
         uint64_t memusage;
  
         /// The filter chain of this Block in human-readable form
-       const char *filter_chain;
+       char filter_chain[FILTERS_STR_SIZE];
  
  } block_header_info;
  
  
  /// Check ID to string mapping
  static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = {
-       "None",
+       // TRANSLATORS: Indicates that there is no integrity check.
+       // This string is used in tables, so the width must not
+       // exceed ten columns with a fixed-width font.
+       N_("None"),
         "CRC32",
-       "Unknown-2",
-       "Unknown-3",
+       // TRANSLATORS: Indicates that integrity check name is not known,
+       // but the Check ID is known (here 2). This and other "Unknown-N"
+       // strings are used in tables, so the width must not exceed ten
+       // columns with a fixed-width font. It's OK to omit the dash if
+       // you need space for one extra letter.
+       N_("Unknown-2"),
+       N_("Unknown-3"),
         "CRC64",
-       "Unknown-5",
-       "Unknown-6",
-       "Unknown-7",
-       "Unknown-8",
-       "Unknown-9",
+       N_("Unknown-5"),
+       N_("Unknown-6"),
+       N_("Unknown-7"),
+       N_("Unknown-8"),
+       N_("Unknown-9"),
         "SHA-256",
-       "Unknown-11",
-       "Unknown-12",
-       "Unknown-13",
-       "Unknown-14",
-       "Unknown-15",
+       N_("Unknown-11"),
+       N_("Unknown-12"),
+       N_("Unknown-13"),
+       N_("Unknown-14"),
+       N_("Unknown-15"),
  };
  
+/// Buffer size for get_check_names(). This may be a bit ridiculous,
+/// but at least it's enough if some language needs many multibyte chars.
+#define CHECKS_STR_SIZE 1024
+
  
  /// Value of the Check field as hexadecimal string.
  /// This is set by parse_check_value().
@@ -442,7 +454,7 @@ parse_block_header(file_pair *pair, const lzma_index_iter *iter,
                 xfi->memusage_max = bhi->memusage;
  
         // Convert the filter chain to human readable form.
-       bhi->filter_chain = message_filters_to_str(filters, false);
+       message_filters_to_str(bhi->filter_chain, filters, false);
  
         // Free the memory allocated by lzma_block_header_decode().
         for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
@@ -533,7 +545,7 @@ parse_details(file_pair *pair, const lzma_index_iter *iter,
  
  /// \brief      Get the compression ratio
  ///
-/// This has slightly different format than that is used by in message.c.
+/// This has slightly different format than that is used in message.c.
  static const char *
  get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
  {
@@ -545,7 +557,7 @@ get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
         if (ratio > 9.999)
                 return "---";
  
-       static char buf[6];
+       static char buf[16];
         snprintf(buf, sizeof(buf), "%.3f", ratio);
         return buf;
  }
@@ -553,19 +565,22 @@ get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
  
  /// \brief      Get a comma-separated list of Check names
  ///
+/// The check names are translated with gettext except when in robot mode.
+///
+/// \param      buf     Buffer to hold the resulting string
  /// \param      checks  Bit mask of Checks to print
  /// \param      space_after_comma
  ///                     It's better to not use spaces in table-like listings,
  ///                     but in more verbose formats a space after a comma
  ///                     is good for readability.
-static const char *
-get_check_names(uint32_t checks, bool space_after_comma)
+static void
+get_check_names(char buf[CHECKS_STR_SIZE],
+               uint32_t checks, bool space_after_comma)
  {
         assert(checks != 0);
  
-       static char buf[sizeof(check_names)];
         char *pos = buf;
-       size_t left = sizeof(buf);
+       size_t left = CHECKS_STR_SIZE;
  
         const char *sep = space_after_comma ? ", " : ",";
         bool comma = false;
@@ -573,12 +588,14 @@ get_check_names(uint32_t checks, bool space_after_comma)
         for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) {
                 if (checks & (UINT32_C(1) << i)) {
                         my_snprintf(&pos, &left, "%s%s",
-                                       comma ? sep : "", check_names[i]);
+                                       comma ? sep : "",
+                                       opt_robot ? check_names[i]
+                                               : _(check_names[i]));
                         comma = true;
                 }
         }
  
-       return buf;
+       return;
  }
  
  
@@ -596,18 +613,29 @@ print_info_basic(const xz_file_info *xfi, file_pair *pair)
                                 "Check   Filename"));
         }
  
-       printf("%5s %7s  %11s  %11s  %5s  %-7s %s\n",
-                       uint64_to_str(lzma_index_stream_count(xfi->idx), 0),
-                       uint64_to_str(lzma_index_block_count(xfi->idx), 1),
-                       uint64_to_nicestr(lzma_index_file_size(xfi->idx),
-                               NICESTR_B, NICESTR_TIB, false, 2),
-                       uint64_to_nicestr(
-                               lzma_index_uncompressed_size(xfi->idx),
-                               NICESTR_B, NICESTR_TIB, false, 3),
-                       get_ratio(lzma_index_file_size(xfi->idx),
-                               lzma_index_uncompressed_size(xfi->idx)),
-                       get_check_names(lzma_index_checks(xfi->idx), false),
-                       pair->src_name);
+       char checks[CHECKS_STR_SIZE];
+       get_check_names(checks, lzma_index_checks(xfi->idx), false);
+
+       const char *cols[7] = {
+               uint64_to_str(lzma_index_stream_count(xfi->idx), 0),
+               uint64_to_str(lzma_index_block_count(xfi->idx), 1),
+               uint64_to_nicestr(lzma_index_file_size(xfi->idx),
+                       NICESTR_B, NICESTR_TIB, false, 2),
+               uint64_to_nicestr(lzma_index_uncompressed_size(xfi->idx),
+                       NICESTR_B, NICESTR_TIB, false, 3),
+               get_ratio(lzma_index_file_size(xfi->idx),
+                       lzma_index_uncompressed_size(xfi->idx)),
+               checks,
+               pair->src_name,
+       };
+       printf("%*s %*s  %*s  %*s  %*s  %-*s %s\n",
+                       tuklib_mbstr_fw(cols[0], 5), cols[0],
+                       tuklib_mbstr_fw(cols[1], 7), cols[1],
+                       tuklib_mbstr_fw(cols[2], 11), cols[2],
+                       tuklib_mbstr_fw(cols[3], 11), cols[3],
+                       tuklib_mbstr_fw(cols[4], 5), cols[4],
+                       tuklib_mbstr_fw(cols[5], 7), cols[5],
+                       cols[6]);
  
         return false;
  }
@@ -618,6 +646,9 @@ print_adv_helper(uint64_t stream_count, uint64_t block_count,
                 uint64_t compressed_size, uint64_t uncompressed_size,
                 uint32_t checks, uint64_t stream_padding)
  {
+       char checks_str[CHECKS_STR_SIZE];
+       get_check_names(checks_str, checks, true);
+
         printf(_("  Streams:            %s\n"),
                         uint64_to_str(stream_count, 0));
         printf(_("  Blocks:             %s\n"),
@@ -630,8 +661,7 @@ print_adv_helper(uint64_t stream_count, uint64_t block_count,
                                 NICESTR_B, NICESTR_TIB, true, 0));
         printf(_("  Ratio:              %s\n"),
                         get_ratio(compressed_size, uncompressed_size));
-       printf(_("  Check:              %s\n"),
-                       get_check_names(checks, true));
+       printf(_("  Check:              %s\n"), checks_str);
         printf(_("  Stream padding:     %s\n"),
                         uint64_to_nicestr(stream_padding,
                                 NICESTR_B, NICESTR_TIB, true, 0));
@@ -669,21 +699,32 @@ print_info_adv(xz_file_info *xfi, file_pair *pair)
         lzma_index_iter_init(&iter, xfi->idx);
  
         while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) {
-               printf("    %6s %9s %15s %15s ",
-                               uint64_to_str(iter.stream.number, 0),
-                               uint64_to_str(iter.stream.block_count, 1),
-                               uint64_to_str(
-                                       iter.stream.compressed_offset, 2),
-                               uint64_to_str(
-                                       iter.stream.uncompressed_offset, 3));
-               printf("%15s %15s  %5s  %-10s %7s\n",
-                               uint64_to_str(iter.stream.compressed_size, 0),
-                               uint64_to_str(
-                                       iter.stream.uncompressed_size, 1),
-                               get_ratio(iter.stream.compressed_size,
-                                       iter.stream.uncompressed_size),
-                               check_names[iter.stream.flags->check],
-                               uint64_to_str(iter.stream.padding, 2));
+               const char *cols1[4] = {
+                       uint64_to_str(iter.stream.number, 0),
+                       uint64_to_str(iter.stream.block_count, 1),
+                       uint64_to_str(iter.stream.compressed_offset, 2),
+                       uint64_to_str(iter.stream.uncompressed_offset, 3),
+               };
+               printf("    %*s %*s %*s %*s ",
+                               tuklib_mbstr_fw(cols1[0], 6), cols1[0],
+                               tuklib_mbstr_fw(cols1[1], 9), cols1[1],
+                               tuklib_mbstr_fw(cols1[2], 15), cols1[2],
+                               tuklib_mbstr_fw(cols1[3], 15), cols1[3]);
+
+               const char *cols2[5] = {
+                       uint64_to_str(iter.stream.compressed_size, 0),
+                       uint64_to_str(iter.stream.uncompressed_size, 1),
+                       get_ratio(iter.stream.compressed_size,
+                               iter.stream.uncompressed_size),
+                       _(check_names[iter.stream.flags->check]),
+                       uint64_to_str(iter.stream.padding, 2),
+               };
+               printf("%*s %*s  %*s  %-*s %*s\n",
+                               tuklib_mbstr_fw(cols2[0], 15), cols2[0],
+                               tuklib_mbstr_fw(cols2[1], 15), cols2[1],
+                               tuklib_mbstr_fw(cols2[2], 5), cols2[2],
+                               tuklib_mbstr_fw(cols2[3], 10), cols2[3],
+                               tuklib_mbstr_fw(cols2[4], 7), cols2[4]);
  
                 // Update the maximum Check size.
                 if (lzma_check_size(iter.stream.flags->check) > check_max)
@@ -730,41 +771,63 @@ print_info_adv(xz_file_info *xfi, file_pair *pair)
                         if (detailed && parse_details(pair, &iter, &bhi, xfi))
                                         return true;
  
-                       printf("    %6s %9s %15s %15s ",
+                       const char *cols1[4] = {
                                 uint64_to_str(iter.stream.number, 0),
                                 uint64_to_str(
                                         iter.block.number_in_stream, 1),
                                 uint64_to_str(
                                         iter.block.compressed_file_offset, 2),
                                 uint64_to_str(
-                                       iter.block.uncompressed_file_offset,
-                                       3));
-                       printf("%15s %15s  %5s  %-*s",
+                                       iter.block.uncompressed_file_offset, 3)
+                       };
+                       printf("    %*s %*s %*s %*s ",
+                               tuklib_mbstr_fw(cols1[0], 6), cols1[0],
+                               tuklib_mbstr_fw(cols1[1], 9), cols1[1],
+                               tuklib_mbstr_fw(cols1[2], 15), cols1[2],
+                               tuklib_mbstr_fw(cols1[3], 15), cols1[3]);
+
+                       const char *cols2[4] = {
                                 uint64_to_str(iter.block.total_size, 0),
                                 uint64_to_str(iter.block.uncompressed_size,
                                                 1),
                                 get_ratio(iter.block.total_size,
                                         iter.block.uncompressed_size),
-                               detailed ? 11 : 1,
-                               check_names[iter.stream.flags->check]);
+                               _(check_names[iter.stream.flags->check])
+                       };
+                       printf("%*s %*s  %*s  %-*s",
+                               tuklib_mbstr_fw(cols2[0], 15), cols2[0],
+                               tuklib_mbstr_fw(cols2[1], 15), cols2[1],
+                               tuklib_mbstr_fw(cols2[2], 5), cols2[2],
+                               tuklib_mbstr_fw(cols2[3], detailed ? 11 : 1),
+                                       cols2[3]);
  
                         if (detailed) {
-                               // Show MiB for memory usage, because it
-                               // is the only size which is not in bytes.
                                 const lzma_vli compressed_size
                                                 = iter.block.unpadded_size
                                                 - bhi.header_size
                                                 - lzma_check_size(
                                                 iter.stream.flags->check);
-                               printf("%-*s  %6s  %-5s %15s %7s MiB  %s",
-                                       checkval_width, check_value,
+
+                               const char *cols3[6] = {
+                                       check_value,
                                         uint64_to_str(bhi.header_size, 0),
                                         bhi.flags,
                                         uint64_to_str(compressed_size, 1),
                                         uint64_to_str(
                                                 round_up_to_mib(bhi.memusage),
                                                 2),
-                                       bhi.filter_chain);
+                                       bhi.filter_chain
+                               };
+                               // Show MiB for memory usage, because it
+                               // is the only size which is not in bytes.
+                               printf("%-*s  %*s  %-5s %*s %*s MiB  %s",
+                                       checkval_width, cols3[0],
+                                       tuklib_mbstr_fw(cols3[1], 6), cols3[1],
+                                       cols3[2],
+                                       tuklib_mbstr_fw(cols3[3], 15),
+                                               cols3[3],
+                                       tuklib_mbstr_fw(cols3[4], 7), cols3[4],
+                                       cols3[5]);
                         }
  
                         putchar('\n');
@@ -785,6 +848,9 @@ print_info_adv(xz_file_info *xfi, file_pair *pair)
  static bool
  print_info_robot(xz_file_info *xfi, file_pair *pair)
  {
+       char checks[CHECKS_STR_SIZE];
+       get_check_names(checks, lzma_index_checks(xfi->idx), false);
+
         printf("name\t%s\n", pair->src_name);
  
         printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
@@ -795,7 +861,7 @@ print_info_robot(xz_file_info *xfi, file_pair *pair)
                         lzma_index_uncompressed_size(xfi->idx),
                         get_ratio(lzma_index_file_size(xfi->idx),
                                 lzma_index_uncompressed_size(xfi->idx)),
-                       get_check_names(lzma_index_checks(xfi->idx), false),
+                       checks,
                         xfi->stream_padding);
  
         if (message_verbosity_get() >= V_VERBOSE) {
@@ -893,6 +959,10 @@ print_totals_basic(void)
         line[sizeof(line) - 1] = '\0';
         puts(line);
  
+       // Get the check names.
+       char checks[CHECKS_STR_SIZE];
+       get_check_names(checks, totals.checks, false);
+
         // Print the totals except the file count, which needs
         // special handling.
         printf("%5s %7s  %11s  %11s  %5s  %-7s ",
@@ -904,7 +974,7 @@ print_totals_basic(void)
                                 NICESTR_B, NICESTR_TIB, false, 3),
                         get_ratio(totals.compressed_size,
                                 totals.uncompressed_size),
-                       get_check_names(totals.checks, false));
+                       checks);
  
         // Since we print totals only when there are at least two files,
         // the English message will always use "%s files". But some other
@@ -947,6 +1017,9 @@ print_totals_adv(void)
  static void
  print_totals_robot(void)
  {
+       char checks[CHECKS_STR_SIZE];
+       get_check_names(checks, totals.checks, false);
+
         printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
                         "\t%s\t%s\t%" PRIu64 "\t%" PRIu64,
                         totals.streams,
@@ -955,7 +1028,7 @@ print_totals_robot(void)
                         totals.uncompressed_size,
                         get_ratio(totals.compressed_size,
                                 totals.uncompressed_size),
-                       get_check_names(totals.checks, false),
+                       checks,
                         totals.stream_padding,
                         totals.files);
  
diff --git a/src/xz/message.c b/src/xz/message.c

index 6dfa4aacfcfee7e5c955d973465c86d0942fc32e..9c2dfbd8327fafd637a1a77e45e19e96df3ebefd 100644 (file)
--- a/src/xz/message.c
+++ b/src/xz/message.c
@@ -321,7 +321,8 @@ progress_percentage(uint64_t in_pos)
         double percentage = (double)(in_pos) / (double)(expected_in_size)
                         * 99.9;
  
-       static char buf[sizeof("99.9 %")];
+       // Use big enough buffer to hold e.g. a multibyte decimal point.
+       static char buf[16];
         snprintf(buf, sizeof(buf), "%.1f %%", percentage);
  
         return buf;
@@ -333,12 +334,8 @@ progress_percentage(uint64_t in_pos)
  static const char *
  progress_sizes(uint64_t compressed_pos, uint64_t uncompressed_pos, bool final)
  {
-       // This is enough to hold sizes up to about 99 TiB if thousand
-       // separator is used, or about 1 PiB without thousand separator.
-       // After that the progress indicator will look a bit silly, since
-       // the compression ratio no longer fits with three decimal places.
-       static char buf[36];
-
+       // Use big enough buffer to hold e.g. a multibyte thousand separators.
+       static char buf[128];
         char *pos = buf;
         size_t left = sizeof(buf);
  
@@ -402,7 +399,8 @@ progress_speed(uint64_t uncompressed_pos, uint64_t elapsed)
         //  - 9.9 KiB/s
         //  - 99 KiB/s
         //  - 999 KiB/s
-       static char buf[sizeof("999 GiB/s")];
+       // Use big enough buffer to hold e.g. a multibyte decimal point.
+       static char buf[16];
         snprintf(buf, sizeof(buf), "%.*f %s",
                         speed > 9.9 ? 0 : 1, speed, unit[unit_index]);
         return buf;
@@ -588,12 +586,19 @@ message_progress_update(void)
         // Print the actual progress message. The idea is that there is at
         // least three spaces between the fields in typical situations, but
         // even in rare situations there is at least one space.
-       fprintf(stderr, "\r %6s %35s   %9s %10s   %10s\r",
+       const char *cols[5] = {
                 progress_percentage(in_pos),
                 progress_sizes(compressed_pos, uncompressed_pos, false),
                 progress_speed(uncompressed_pos, elapsed),
                 progress_time(elapsed),
-               progress_remaining(in_pos, elapsed));
+               progress_remaining(in_pos, elapsed),
+       };
+       fprintf(stderr, "\r %*s %*s   %*s %10s   %10s\r",
+                       tuklib_mbstr_fw(cols[0], 6), cols[0],
+                       tuklib_mbstr_fw(cols[1], 35), cols[1],
+                       tuklib_mbstr_fw(cols[2], 9), cols[2],
+                       cols[3],
+                       cols[4]);
  
  #ifdef SIGALRM
         // Updating the progress info was finished. Reset
@@ -663,12 +668,19 @@ progress_flush(bool finished)
         // statistics are printed in the same format as the progress
         // indicator itself.
         if (progress_automatic) {
-               fprintf(stderr, "\r %6s %35s   %9s %10s   %10s\n",
+               const char *cols[5] = {
                         finished ? "100 %" : progress_percentage(in_pos),
                         progress_sizes(compressed_pos, uncompressed_pos, true),
                         progress_speed(uncompressed_pos, elapsed),
                         progress_time(elapsed),
-                       finished ? "" : progress_remaining(in_pos, elapsed));
+                       finished ? "" : progress_remaining(in_pos, elapsed),
+               };
+               fprintf(stderr, "\r %*s %*s   %*s %10s   %10s\n",
+                               tuklib_mbstr_fw(cols[0], 6), cols[0],
+                               tuklib_mbstr_fw(cols[1], 35), cols[1],
+                               tuklib_mbstr_fw(cols[2], 9), cols[2],
+                               cols[3],
+                               cols[4]);
         } else {
                 // The filename is always printed.
                 fprintf(stderr, "%s: ", filename);
@@ -848,8 +860,10 @@ message_mem_needed(enum message_verbosity v, uint64_t memusage)
         // the user might need to +1 MiB to get high enough limit.)
         memusage = round_up_to_mib(memusage);
  
+       // With US-ASCII:
         // 2^64 with thousand separators + " MiB" suffix + '\0' = 26 + 4 + 1
-       char memlimitstr[32];
+       // But there may be multibyte chars so reserve enough space.
+       char memlimitstr[128];
  
         // Show the memory usage limit as MiB unless it is less than 1 MiB.
         // This way it's easy to notice errors where one has typed
@@ -895,13 +909,12 @@ uint32_to_optstr(uint32_t num)
  }
  
  
-extern const char *
-message_filters_to_str(const lzma_filter *filters, bool all_known)
+extern void
+message_filters_to_str(char buf[FILTERS_STR_SIZE],
+               const lzma_filter *filters, bool all_known)
  {
-       static char buf[512];
-
         char *pos = buf;
-       size_t left = sizeof(buf);
+       size_t left = FILTERS_STR_SIZE;
  
         for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i) {
                 // Add the dashes for the filter option. A space is
@@ -1025,7 +1038,7 @@ message_filters_to_str(const lzma_filter *filters, bool all_known)
                 }
         }
  
-       return buf;
+       return;
  }
  
  
@@ -1035,8 +1048,9 @@ message_filters_show(enum message_verbosity v, const lzma_filter *filters)
         if (v > verbosity)
                 return;
  
-       fprintf(stderr, _("%s: Filter chain: %s\n"), progname,
-                       message_filters_to_str(filters, true));
+       char buf[FILTERS_STR_SIZE];
+       message_filters_to_str(buf, filters, true);
+       fprintf(stderr, _("%s: Filter chain: %s\n"), progname, buf);
         return;
  }
  
diff --git a/src/xz/message.h b/src/xz/message.h

index dd5fa4d4806c4b8c5e09c484088a7be6b897b6b5..37e608216886f6400049bab3ac01db63442c6655 100644 (file)
--- a/src/xz/message.h
+++ b/src/xz/message.h
@@ -86,15 +86,19 @@ extern const char *message_strm(lzma_ret code);
  extern void message_mem_needed(enum message_verbosity v, uint64_t memusage);
  
  
+/// Buffer size for message_filters_to_str()
+#define FILTERS_STR_SIZE 512
+
+
  /// \brief      Get the filter chain as a string
  ///
+/// \param      buf         Pointer to caller allocated buffer to hold
+///                         the filter chain string
  /// \param      filters     Pointer to the filter chain
  /// \param      all_known   If true, all filter options are printed.
  ///                         If false, only the options that get stored
  ///                         into .xz headers are printed.
-///
-/// \return     Pointer to a statically allocated buffer.
-extern const char *message_filters_to_str(
+extern void message_filters_to_str(char buf[FILTERS_STR_SIZE],
                 const lzma_filter *filters, bool all_known);
  
  
diff --git a/src/xz/private.h b/src/xz/private.h

index 15136bfe878b8061fc0c7c73c2a342c4b66e8e20..6b01e51354e1cfcbc526b060dd92a8c5bf67037b 100644 (file)
--- a/src/xz/private.h
+++ b/src/xz/private.h
@@ -25,6 +25,7 @@
  #include "tuklib_gettext.h"
  #include "tuklib_progname.h"
  #include "tuklib_exit.h"
+#include "tuklib_mbstr.h"
  
  #if defined(_WIN32) && !defined(__CYGWIN__)
  #      define WIN32_LEAN_AND_MEAN
diff --git a/src/xz/util.c b/src/xz/util.c

index 19f5eee36667eafab34b3c95fc7eba55fbbcccb5..987b4430253959f9c4f24fe6097883a2d47a052d 100644 (file)
--- a/src/xz/util.c
+++ b/src/xz/util.c
@@ -14,6 +14,13 @@
  #include <stdarg.h>
  
  
+/// Buffers for uint64_to_str() and uint64_to_nicestr()
+static char bufs[4][128];
+
+/// Thousand separator support in uint64_to_str() and uint64_to_nicestr()
+static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN;
+
+
  extern void *
  xrealloc(void *ptr, size_t size)
  {
@@ -125,22 +132,28 @@ round_up_to_mib(uint64_t n)
  }
  
  
-extern const char *
-uint64_to_str(uint64_t value, uint32_t slot)
+/// Check if thousand separator is supported. Run-time checking is easiest,
+/// because it seems to be sometimes lacking even on POSIXish system.
+static void
+check_thousand_sep(uint32_t slot)
  {
-       // 2^64 with thousand separators is 26 bytes plus trailing '\0'.
-       static char bufs[4][32];
-
-       assert(slot < ARRAY_SIZE(bufs));
-
-       static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN;
         if (thousand == UNKNOWN) {
                 bufs[slot][0] = '\0';
-               snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64,
-                               UINT64_C(1));
+               snprintf(bufs[slot], sizeof(bufs[slot]), "%'u", 1U);
                 thousand = bufs[slot][0] == '1' ? WORKS : BROKEN;
         }
  
+       return;
+}
+
+
+extern const char *
+uint64_to_str(uint64_t value, uint32_t slot)
+{
+       assert(slot < ARRAY_SIZE(bufs));
+
+       check_thousand_sep(slot);
+
         if (thousand == WORKS)
                 snprintf(bufs[slot], sizeof(bufs[slot]), "%'" PRIu64, value);
         else
@@ -157,14 +170,21 @@ uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min,
  {
         assert(unit_min <= unit_max);
         assert(unit_max <= NICESTR_TIB);
+       assert(slot < ARRAY_SIZE(bufs));
+
+       check_thousand_sep(slot);
  
         enum nicestr_unit unit = NICESTR_B;
-       const char *str;
+       char *pos = bufs[slot];
+       size_t left = sizeof(bufs[slot]);
  
         if ((unit_min == NICESTR_B && value < 10000)
                         || unit_max == NICESTR_B) {
                 // The value is shown as bytes.
-               str = uint64_to_str(value, slot);
+               if (thousand == WORKS)
+                       my_snprintf(&pos, &left, "%'u", (unsigned int)value);
+               else
+                       my_snprintf(&pos, &left, "%u", (unsigned int)value);
         } else {
                 // Scale the value to a nicer unit. Unless unit_min and
                 // unit_max limit us, we will show at most five significant
@@ -175,49 +195,23 @@ uint64_to_nicestr(uint64_t value, enum nicestr_unit unit_min,
                         ++unit;
                 } while (unit < unit_min || (d > 9999.9 && unit < unit_max));
  
-               str = double_to_str(d);
+               if (thousand == WORKS)
+                       my_snprintf(&pos, &left, "%'.1f", d);
+               else
+                       my_snprintf(&pos, &left, "%.1f", d);
         }
  
         static const char suffix[5][4] = { "B", "KiB", "MiB", "GiB", "TiB" };
+       my_snprintf(&pos, &left, " %s", suffix[unit]);
  
-       // Minimum buffer size:
-       // 26   2^64 with thousand separators
-       //  4   " KiB"
-       //  2   " ("
-       // 26   2^64 with thousand separators
-       //  3   " B)"
-       //  1   '\0'
-       // 62   Total
-       static char buf[4][64];
-       char *pos = buf[slot];
-       size_t left = sizeof(buf[slot]);
-       my_snprintf(&pos, &left, "%s %s", str, suffix[unit]);
-
-       if (always_also_bytes && value >= 10000)
-               snprintf(pos, left, " (%s B)", uint64_to_str(value, slot));
-
-       return buf[slot];
-}
-
-
-extern const char *
-double_to_str(double value)
-{
-       static char buf[64];
-
-       static enum { UNKNOWN, WORKS, BROKEN } thousand = UNKNOWN;
-       if (thousand == UNKNOWN) {
-               buf[0] = '\0';
-               snprintf(buf, sizeof(buf), "%'.1f", 2.0);
-               thousand = buf[0] == '2' ? WORKS : BROKEN;
+       if (always_also_bytes && value >= 10000) {
+               if (thousand == WORKS)
+                       snprintf(pos, left, " (%'" PRIu64 " B)", value);
+               else
+                       snprintf(pos, left, " (%" PRIu64 " B)", value);
         }
  
-       if (thousand == WORKS)
-               snprintf(buf, sizeof(buf), "%'.1f", value);
-       else
-               snprintf(buf, sizeof(buf), "%.1f", value);
-
-       return buf;
+       return bufs[slot];
  }
  
  
@@ -231,7 +225,10 @@ my_snprintf(char **pos, size_t *left, const char *fmt, ...)
  
         // If an error occurred, we want the caller to think that the whole
         // buffer was used. This way no more data will be written to the
-       // buffer. We don't need better error handling here.
+       // buffer. We don't need better error handling here, although it
+       // is possible that the result looks garbage on the terminal if
+       // e.g. an UTF-8 character gets split. That shouldn't (easily)
+       // happen though, because the buffers used have some extra room.
         if (len < 0 || (size_t)(len) >= *left) {
                 *left = 0;
         } else {
@@ -243,45 +240,6 @@ my_snprintf(char **pos, size_t *left, const char *fmt, ...)
  }
  
  
-/*
-/// \brief      Simple quoting to get rid of ASCII control characters
-///
-/// This is not so cool and locale-dependent, but should be good enough
-/// At least we don't print any control characters on the terminal.
-///
-extern char *
-str_quote(const char *str)
-{
-       size_t dest_len = 0;
-       bool has_ctrl = false;
-
-       while (str[dest_len] != '\0')
-               if (*(unsigned char *)(str + dest_len++) < 0x20)
-                       has_ctrl = true;
-
-       char *dest = malloc(dest_len + 1);
-       if (dest != NULL) {
-               if (has_ctrl) {
-                       for (size_t i = 0; i < dest_len; ++i)
-                               if (*(unsigned char *)(str + i) < 0x20)
-                                       dest[i] = '?';
-                               else
-                                       dest[i] = str[i];
-
-                       dest[dest_len] = '\0';
-
-               } else {
-                       // Usually there are no control characters,
-                       // so we can optimize.
-                       memcpy(dest, str, dest_len + 1);
-               }
-       }
-
-       return dest;
-}
-*/
-
-
  extern bool
  is_empty_filename(const char *filename)
  {
diff --git a/src/xz/util.h b/src/xz/util.h

index 2e08b4a868ecd0d128b4e39178c59545b102df13..fea8cc6695dce31af1fbd39be44e46acb7e50edc 100644 (file)
--- a/src/xz/util.h
+++ b/src/xz/util.h
@@ -96,13 +96,6 @@ extern const char *uint64_to_nicestr(uint64_t value,
                 bool always_also_bytes, uint32_t slot);
  
  
-/// \brief      Convert double to a string with one decimal place
-///
-/// This is like uint64_to_str() except that this converts a double and
-/// uses exactly one decimal place.
-extern const char *double_to_str(double value);
-
-
  /// \brief      Wrapper for snprintf() to help constructing a string in pieces
  ///
  /// A maximum of *left bytes is written starting from *pos. *pos and *left
author	Lasse Collin <lasse.collin@tukaani.org>
	Fri, 10 Sep 2010 07:30:33 +0000 (10:30 +0300)
committer	Lasse Collin <lasse.collin@tukaani.org>
	Fri, 10 Sep 2010 07:30:33 +0000 (10:30 +0300)
configure.ac		patch \| blob \| history
m4/tuklib_mbstr.m4	[new file with mode: 0644]	patch \| blob
src/common/tuklib_mbstr.h	[new file with mode: 0644]	patch \| blob
src/common/tuklib_mbstr_fw.c	[new file with mode: 0644]	patch \| blob
src/common/tuklib_mbstr_width.c	[new file with mode: 0644]	patch \| blob
src/xz/Makefile.am		patch \| blob \| history
src/xz/list.c		patch \| blob \| history
src/xz/message.c		patch \| blob \| history
src/xz/message.h		patch \| blob \| history
src/xz/private.h		patch \| blob \| history
src/xz/util.c		patch \| blob \| history
src/xz/util.h		patch \| blob \| history