From: Lasse Collin Date: Tue, 27 Jan 2009 16:36:05 +0000 (+0200) Subject: Added initial support for preset dictionary for raw LZMA1 X-Git-Tag: v4.999.8beta~23 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f76e39cf930f888d460b443d18f977ebedea8b2a;p=xz Added initial support for preset dictionary for raw LZMA1 and LZMA2. It is not supported by the .xz format or the xz command line tool yet. --- diff --git a/src/liblzma/lz/lz_decoder.c b/src/liblzma/lz/lz_decoder.c index 5ad5c966..99430c9f 100644 --- a/src/liblzma/lz/lz_decoder.c +++ b/src/liblzma/lz/lz_decoder.c @@ -210,7 +210,7 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret (*lz_init)(lzma_lz_decoder *lz, lzma_allocator *allocator, const void *options, - size_t *dict_size)) + lzma_lz_options *lz_options)) { // Allocate the base structure if it isn't already allocated. if (next->coder == NULL) { @@ -229,17 +229,17 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, // Allocate and initialize the LZ-based decoder. It will also give // us the dictionary size. - size_t dict_size; + lzma_lz_options lz_options; return_if_error(lz_init(&next->coder->lz, allocator, - filters[0].options, &dict_size)); + filters[0].options, &lz_options)); // If the dictionary size is very small, increase it to 4096 bytes. // This is to prevent constant wrapping of the dictionary, which // would slow things down. The downside is that since we don't check // separately for the real dictionary size, we may happily accept // corrupt files. - if (dict_size < 4096) - dict_size = 4096; + if (lz_options.dict_size < 4096) + lz_options.dict_size = 4096; // Make dictionary size a multipe of 16. Some LZ-based decoders like // LZMA use the lowest bits lzma_dict.pos to know the alignment of the @@ -248,23 +248,38 @@ lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, // recommended to give aligned buffers to liblzma. // // Avoid integer overflow. - if (dict_size > SIZE_MAX - 15) + if (lz_options.dict_size > SIZE_MAX - 15) return LZMA_MEM_ERROR; - dict_size = (dict_size + 15) & ~((size_t)(15)); + lz_options.dict_size = (lz_options.dict_size + 15) & ~((size_t)(15)); // Allocate and initialize the dictionary. - if (next->coder->dict.size != dict_size) { + if (next->coder->dict.size != lz_options.dict_size) { lzma_free(next->coder->dict.buf, allocator); - next->coder->dict.buf = lzma_alloc(dict_size, allocator); + next->coder->dict.buf + = lzma_alloc(lz_options.dict_size, allocator); if (next->coder->dict.buf == NULL) return LZMA_MEM_ERROR; - next->coder->dict.size = dict_size; + next->coder->dict.size = lz_options.dict_size; } lz_decoder_reset(next->coder); + // Use the preset dictionary if it was given to us. + if (lz_options.preset_dict != NULL + && lz_options.preset_dict_size > 0) { + // If the preset dictionary is bigger than the actual + // dictionary, copy only the tail. + const size_t copy_size = MIN(lz_options.preset_dict_size, + lz_options.dict_size); + const size_t offset = lz_options.preset_dict_size - copy_size; + memcpy(next->coder->dict.buf, lz_options.preset_dict + offset, + copy_size); + next->coder->dict.pos = copy_size; + next->coder->dict.full = copy_size; + } + // Miscellaneous initializations next->coder->next_finished = false; next->coder->this_finished = false; diff --git a/src/liblzma/lz/lz_decoder.h b/src/liblzma/lz/lz_decoder.h index 5ac44057..9041d0bd 100644 --- a/src/liblzma/lz/lz_decoder.h +++ b/src/liblzma/lz/lz_decoder.h @@ -51,6 +51,13 @@ typedef struct { } lzma_dict; +typedef struct { + size_t dict_size; + const uint8_t *preset_dict; + size_t preset_dict_size; +} lzma_lz_options; + + typedef struct { /// Data specific to the LZ-based decoder lzma_coder *coder; @@ -86,7 +93,7 @@ extern lzma_ret lzma_lz_decoder_init(lzma_next_coder *next, lzma_allocator *allocator, const lzma_filter_info *filters, lzma_ret (*lz_init)(lzma_lz_decoder *lz, lzma_allocator *allocator, const void *options, - size_t *dict_size)); + lzma_lz_options *lz_options)); extern uint64_t lzma_lz_decoder_memusage(size_t dictionary_size); diff --git a/src/liblzma/lz/lz_encoder.c b/src/liblzma/lz/lz_encoder.c index 7bd6d03e..bd379533 100644 --- a/src/liblzma/lz/lz_encoder.c +++ b/src/liblzma/lz/lz_encoder.c @@ -363,7 +363,8 @@ lz_encoder_prepare(lzma_mf *mf, lzma_allocator *allocator, static bool -lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator) +lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator, + const lzma_lz_options *lz_options) { // Allocate the history buffer. if (mf->buffer == NULL) { @@ -421,6 +422,19 @@ lz_encoder_init(lzma_mf *mf, lzma_allocator *allocator) // we avoid wasting RAM and improve initialization speed a lot. //memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t)); + // Handle preset dictionary. + if (lz_options->preset_dict != NULL + && lz_options->preset_dict_size > 0) { + // If the preset dictionary is bigger than the actual + // dictionary, use only the tail. + mf->write_pos = MIN(lz_options->preset_dict_size, mf->size); + memcpy(mf->buffer, lz_options->preset_dict + + lz_options->preset_dict_size - mf->write_pos, + mf->write_pos); + mf->action = LZMA_SYNC_FLUSH; + mf->skip(mf, mf->write_pos); + } + mf->action = LZMA_RUN; return false; @@ -509,7 +523,7 @@ lzma_lz_encoder_init(lzma_next_coder *next, lzma_allocator *allocator, // Allocate new buffers if needed, and do the rest of // the initialization. - if (lz_encoder_init(&next->coder->mf, allocator)) + if (lz_encoder_init(&next->coder->mf, allocator, &lz_options)) return LZMA_MEM_ERROR; // Initialize the next filter in the chain, if any. diff --git a/src/liblzma/lzma/lzma2_decoder.c b/src/liblzma/lzma/lzma2_decoder.c index ff90803b..fe925a59 100644 --- a/src/liblzma/lzma/lzma2_decoder.c +++ b/src/liblzma/lzma/lzma2_decoder.c @@ -230,7 +230,7 @@ lzma2_decoder_end(lzma_coder *coder, lzma_allocator *allocator) static lzma_ret lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, - const void *options, size_t *dict_size) + const void *opt, lzma_lz_options *lz_options) { if (lz->coder == NULL) { lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -243,12 +243,15 @@ lzma2_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, lz->coder->lzma = LZMA_LZ_DECODER_INIT; } + const lzma_options_lzma *options = opt; + lz->coder->sequence = SEQ_CONTROL; lz->coder->need_properties = true; - lz->coder->need_dictionary_reset = true; + lz->coder->need_dictionary_reset = options->preset_dict == NULL + || options->preset_dict_size == 0; return lzma_lzma_decoder_create(&lz->coder->lzma, - allocator, options, dict_size); + allocator, options, lz_options); } diff --git a/src/liblzma/lzma/lzma2_encoder.c b/src/liblzma/lzma/lzma2_encoder.c index 81b6f973..cc676d5e 100644 --- a/src/liblzma/lzma/lzma2_encoder.c +++ b/src/liblzma/lzma/lzma2_encoder.c @@ -318,15 +318,17 @@ lzma2_encoder_init(lzma_lz_encoder *lz, lzma_allocator *allocator, lz->coder->lzma = NULL; } - lz->coder->sequence = SEQ_INIT; - lz->coder->need_properties = true; - lz->coder->need_state_reset = false; - lz->coder->need_dictionary_reset = true; - lz->coder->opt_cur = *(const lzma_options_lzma *)(options); lz->coder->opt_new = lz->coder->opt_cur.persistent ? options : NULL; + lz->coder->sequence = SEQ_INIT; + lz->coder->need_properties = true; + lz->coder->need_state_reset = false; + lz->coder->need_dictionary_reset + = lz->coder->opt_cur.preset_dict == NULL + || lz->coder->opt_cur.preset_dict_size == 0; + // Initialize LZMA encoder return_if_error(lzma_lzma_encoder_create(&lz->coder->lzma, allocator, &lz->coder->opt_cur, lz_options)); diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c index 03e3251a..80a9fd6c 100644 --- a/src/liblzma/lzma/lzma_decoder.c +++ b/src/liblzma/lzma/lzma_decoder.c @@ -941,7 +941,7 @@ lzma_decoder_reset(lzma_coder *coder, const void *opt) extern lzma_ret lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator, - const void *opt, size_t *dict_size) + const void *opt, lzma_lz_options *lz_options) { if (lz->coder == NULL) { lz->coder = lzma_alloc(sizeof(lzma_coder), allocator); @@ -956,7 +956,9 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator, // All dictionary sizes are OK here. LZ decoder will take care of // the special cases. const lzma_options_lzma *options = opt; - *dict_size = options->dict_size; + lz_options->dict_size = options->dict_size; + lz_options->preset_dict = options->preset_dict; + lz_options->preset_dict_size = options->preset_dict_size; return LZMA_OK; } @@ -967,13 +969,13 @@ lzma_lzma_decoder_create(lzma_lz_decoder *lz, lzma_allocator *allocator, /// the LZ initialization). static lzma_ret lzma_decoder_init(lzma_lz_decoder *lz, lzma_allocator *allocator, - const void *options, size_t *dict_size) + const void *options, lzma_lz_options *lz_options) { if (!is_lclppb_valid(options)) return LZMA_PROG_ERROR; return_if_error(lzma_lzma_decoder_create( - lz, allocator, options, dict_size)); + lz, allocator, options, lz_options)); lzma_decoder_reset(lz->coder, options); lzma_decoder_uncompressed(lz->coder, LZMA_VLI_UNKNOWN); diff --git a/src/liblzma/lzma/lzma_decoder.h b/src/liblzma/lzma/lzma_decoder.h index 133d2608..15844052 100644 --- a/src/liblzma/lzma/lzma_decoder.h +++ b/src/liblzma/lzma/lzma_decoder.h @@ -48,7 +48,7 @@ extern bool lzma_lzma_lclppb_decode( /// LZMA2 decoders. extern lzma_ret lzma_lzma_decoder_create( lzma_lz_decoder *lz, lzma_allocator *allocator, - const void *opt, size_t *dict_size); + const void *opt, lzma_lz_options *lz_options); /// Gets memory usage without validating lc/lp/pb. This is used by LZMA2 /// decoder, because raw LZMA2 decoding doesn't need lc/lp/pb. diff --git a/src/liblzma/lzma/lzma_encoder.c b/src/liblzma/lzma/lzma_encoder.c index 79bb8f9b..cf7637d2 100644 --- a/src/liblzma/lzma/lzma_encoder.c +++ b/src/liblzma/lzma/lzma_encoder.c @@ -274,6 +274,8 @@ encode_symbol(lzma_coder *coder, lzma_mf *mf, static bool encode_init(lzma_coder *coder, lzma_mf *mf) { + assert(mf_position(mf) == 0); + if (mf->read_pos == mf->read_limit) { if (mf->action == LZMA_RUN) return false; // We cannot do anything. @@ -594,7 +596,12 @@ lzma_lzma_encoder_create(lzma_coder **coder_ptr, lzma_allocator *allocator, return LZMA_OPTIONS_ERROR; } - coder->is_initialized = false; + // We don't need to write the first byte as literal if there is + // a non-empty preset dictionary. encode_init() wouldn't even work + // if there is a non-empty preset dictionary, because encode_init() + // assumes that position is zero and previous byte is also zero. + coder->is_initialized = options->preset_dict != NULL + && options->preset_dict_size > 0; coder->is_flushed = false; set_lz_options(lz_options, options);