]> granicus.if.org Git - xz/commitdiff
Miscellaneous LZ and LZMA encoder cleanups
authorLasse Collin <lasse.collin@tukaani.org>
Wed, 17 Sep 2008 19:11:39 +0000 (22:11 +0300)
committerLasse Collin <lasse.collin@tukaani.org>
Wed, 17 Sep 2008 19:11:39 +0000 (22:11 +0300)
src/liblzma/api/lzma/lzma.h
src/liblzma/lz/lz_encoder.c
src/liblzma/lzma/Makefile.am
src/liblzma/lzma/lzma_encoder.c
src/liblzma/lzma/lzma_encoder_features.c [deleted file]

index 8979b2d8ae4a240cd5238e94e57dbd912bacb38a..c4d5dbfadb8d86cb18eed5b97937190647354519 100644 (file)
  * Usually hash chains are faster than binary trees.
  */
 typedef enum {
-       LZMA_MF_INVALID = -1,
-               /**<
-                * \brief       Invalid match finder ID
-                *
-                * Used as array terminator in lzma_available_match_finders.
-                */
-
        LZMA_MF_HC3     = 0x03,
                /**<
                 * \brief       Hash Chain with 3 bytes hashing
@@ -121,13 +114,6 @@ extern lzma_bool lzma_mf_is_supported(lzma_match_finder match_finder)
  * finder.
  */
 typedef enum {
-       LZMA_MODE_INVALID = -1,
-               /**<
-                * \brief       Invalid mode
-                *
-                * Used as array terminator in lzma_available_modes.
-                */
-
        LZMA_MODE_FAST = 0,
                /**<
                 * \brief       Fast compression
index 29a19a5d9e1f172bcc76ec66fa5fb48ac2d8f3a1..159080eef35efcc8bb753960312120e4e975b904 100644 (file)
@@ -288,8 +288,12 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
                return true;
        }
 
-       // Calculate the sizes of mf->hash and mf->son.
+       // Calculate the sizes of mf->hash and mf->son and check that
+       // find_len_max is big enough for the selected match finder.
        const uint32_t hash_bytes = lz_options->match_finder & 0x0F;
+       if (hash_bytes > mf->find_len_max)
+               return true;
+
        const bool is_bt = (lz_options->match_finder & 0x10) != 0;
        uint32_t hs;
 
@@ -351,7 +355,7 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator,
        // Maximum number of match finder cycles
        mf->loops = lz_options->match_finder_cycles;
        if (mf->loops == 0) {
-               mf->loops = 16 + (lz_options->find_len_max / 2);
+               mf->loops = 16 + (mf->find_len_max / 2);
                if (!is_bt)
                        mf->loops /= 2;
        }
index 7aeceb6335c2154235b01b3e281e71bb4d477f72..bcc1fdc649a78ce1614eecb64bdbf5659f590dd2 100644 (file)
@@ -31,7 +31,6 @@ liblzma2_la_SOURCES += \
        lzma_encoder.c \
        lzma_encoder_presets.c \
        lzma_encoder_private.h \
-       lzma_encoder_features.c \
        lzma_encoder_optimum_fast.c \
        lzma_encoder_optimum_normal.c
 
index 5d73bfbce90c7d1f51fd1309fe308b2c01b0a6e0..02b7d19adf6cad4e037efdea2e5d9182c1601d9a 100644 (file)
@@ -422,16 +422,11 @@ lzma_encode(lzma_coder *restrict coder, lzma_mf *restrict mf,
 // Initialization //
 ////////////////////
 
-static bool
+static void
 set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options)
 {
-       if (!is_lclppb_valid(options)
-                       || options->fast_bytes < LZMA_FAST_BYTES_MIN
-                       || options->fast_bytes > LZMA_FAST_BYTES_MAX)
-               return true;
-
-       // FIXME validation
-
+       // LZ encoder initialization does the validation, also when just
+       // calculating memory usage, so we don't need to validate here.
        lz_options->before_size = OPTS;
        lz_options->dictionary_size = options->dictionary_size;
        lz_options->after_size = LOOP_INPUT_MAX;
@@ -441,8 +436,6 @@ set_lz_options(lzma_lz_options *lz_options, const lzma_options_lzma *options)
        lz_options->match_finder_cycles = options->match_finder_cycles;
        lz_options->preset_dictionary = options->preset_dictionary;
        lz_options->preset_dictionary_size = options->preset_dictionary_size;
-
-       return false;
 }
 
 
@@ -476,8 +469,7 @@ lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options)
 
        coder->pos_mask = (1U << options->pos_bits) - 1;
        coder->literal_context_bits = options->literal_context_bits;
-       coder->literal_pos_mask = (1 << options->literal_pos_bits) - 1;
-
+       coder->literal_pos_mask = (1U << options->literal_pos_bits) - 1;
 
        // Range coder
        rc_reset(&coder->rc);
@@ -519,7 +511,19 @@ lzma_lzma_encoder_reset(lzma_coder *coder, const lzma_options_lzma *options)
        length_encoder_reset(&coder->rep_len_encoder,
                        1U << options->pos_bits, coder->fast_mode);
 
-       // FIXME: Too big or too small won't work when resetting in the middle of LZMA2.
+       // Price counts are incremented every time appropriate probabilities
+       // are changed. price counts are set to zero when the price tables
+       // are updated, which is done when the appropriate price counts have
+       // big enough value, and lzma_mf.read_ahead == 0 which happens at
+       // least every OPTS (a few thousand) possible price count increments.
+       //
+       // By resetting price counts to UINT32_MAX / 2, we make sure that the
+       // price tables will be initialized before they will be used (since
+       // the value is definitely big enough), and that it is OK to increment
+       // price counts without risk of integer overflow (since UINT32_MAX / 2
+       // is small enough). The current code doesn't increment price counts
+       // before initializing price tables, but it maybe done in future if
+       // we add support for saving the state between LZMA2 chunks.
        coder->match_price_count = UINT32_MAX / 2;
        coder->align_price_count = UINT32_MAX / 2;
 
@@ -540,10 +544,10 @@ lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
 
        lzma_coder *coder = *coder_ptr;
 
-       // Validate options that aren't validated elsewhere.
-       if (!is_lclppb_valid(options)
-                       || options->fast_bytes < LZMA_FAST_BYTES_MIN
-                       || options->fast_bytes > LZMA_FAST_BYTES_MAX)
+       // Validate some of the options. LZ encoder validates fast_bytes too
+       // but we need a valid value here earlier.
+       if (!is_lclppb_valid(options) || options->fast_bytes < MATCH_LEN_MIN
+                       || options->fast_bytes > MATCH_LEN_MAX)
                return LZMA_OPTIONS_ERROR;
 
        // Set compression mode.
@@ -581,9 +585,7 @@ lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator,
 
        lzma_lzma_encoder_reset(coder, options);
 
-       // LZ encoder options FIXME validation
-       if (set_lz_options(lz_options, options))
-               return LZMA_OPTIONS_ERROR;
+       set_lz_options(lz_options, options);
 
        return LZMA_OK;
 }
@@ -603,25 +605,6 @@ extern lzma_ret
 lzma_lzma_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
                const lzma_filter_info *filters)
 {
-       // Initialization call chain:
-       //
-       //    lzma_lzma_encoder_init()
-       //      `-- lzma_lz_encoder_init()
-       //            `-- lzma_encoder_init()
-       //                  `-- lzma_encoder_init2()
-       //
-       // The above complexity is to let LZ encoder store the pointer to
-       // the LZMA encoder structure. Encoding call tree:
-       //
-       //    lz_encode()
-       //      |-- fill_window()
-       //      |     `-- Next coder in the chain, if any
-       //      `-- lzma_encode()
-       //            |-- lzma_dict_find()
-       //            `-- lzma_dict_skip()
-       //
-       // FIXME ^
-       //
        return lzma_lz_encoder_init(
                        next, allocator, filters, &lzma_encoder_init);
 }
@@ -631,8 +614,7 @@ extern uint64_t
 lzma_lzma_encoder_memusage(const void *options)
 {
        lzma_lz_options lz_options;
-       if (set_lz_options(&lz_options, options))
-               return UINT64_MAX;
+       set_lz_options(&lz_options, options);
 
        const uint64_t lz_memusage = lzma_lz_encoder_memusage(&lz_options);
        if (lz_memusage == UINT64_MAX)
diff --git a/src/liblzma/lzma/lzma_encoder_features.c b/src/liblzma/lzma/lzma_encoder_features.c
deleted file mode 100644 (file)
index 9fecee4..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-//
-/// \file       lzma_encoder_features.c
-/// \brief      Information about features enabled at compile time
-//
-//  Copyright (C) 2007 Lasse Collin
-//
-//  This library is free software; you can redistribute it and/or
-//  modify it under the terms of the GNU Lesser General Public
-//  License as published by the Free Software Foundation; either
-//  version 2.1 of the License, or (at your option) any later version.
-//
-//  This library is distributed in the hope that it will be useful,
-//  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-//  Lesser General Public License for more details.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#include "common.h"
-
-
-static lzma_mode modes[] = {
-       LZMA_MODE_FAST,
-       LZMA_MODE_NORMAL,
-       LZMA_MODE_INVALID
-};
-
-
-LZMA_API const lzma_mode *const lzma_available_modes = modes;
-
-
-static lzma_match_finder match_finders[] = {
-#ifdef HAVE_MF_HC3
-       LZMA_MF_HC3,
-#endif
-
-#ifdef HAVE_MF_HC4
-       LZMA_MF_HC4,
-#endif
-
-#ifdef HAVE_MF_BT2
-       LZMA_MF_BT2,
-#endif
-
-#ifdef HAVE_MF_BT3
-       LZMA_MF_BT3,
-#endif
-
-#ifdef HAVE_MF_BT4
-       LZMA_MF_BT4,
-#endif
-
-       LZMA_MF_INVALID
-};
-
-
-LZMA_API const lzma_match_finder *const lzma_available_match_finders
-               = match_finders;