From: Lasse Collin Date: Tue, 11 Mar 2008 13:35:34 +0000 (+0200) Subject: Apply a minor speed optimization to LZMA decoder. X-Git-Tag: v4.999.3alpha~21 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bfde3b24a5ae25ce53c854762b6148952386b025;p=xz Apply a minor speed optimization to LZMA decoder. --- diff --git a/src/liblzma/lzma/lzma_decoder.c b/src/liblzma/lzma/lzma_decoder.c index 9e1226f7..fce9594a 100644 --- a/src/liblzma/lzma/lzma_decoder.c +++ b/src/liblzma/lzma/lzma_decoder.c @@ -179,43 +179,41 @@ decode_dummy(const lzma_coder *restrict coder, coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0)); uint32_t symbol = 1; - if (!is_char_state(state)) { - // Decode literal with match byte. + if (is_char_state(state)) { + // Decode literal without match byte. + do { + if_bit_0(subcoder[symbol]) { + update_bit_0_dummy(); + symbol <<= 1; + } else { + update_bit_1_dummy(); + symbol = (symbol << 1) | 1; + } + } while (symbol < 0x100); - assert(rep0 != UINT32_MAX); + } else { + // Decode literal with match byte. uint32_t match_byte = lz_get_byte(coder->lz, rep0); + uint32_t subcoder_offset = 0x100; do { match_byte <<= 1; - const uint32_t match_bit = match_byte & 0x100; - const uint32_t subcoder_index = 0x100 + match_bit + symbol; + const uint32_t match_bit = match_byte & subcoder_offset; + const uint32_t subcoder_index + = subcoder_offset + match_bit + symbol; if_bit_0(subcoder[subcoder_index]) { update_bit_0_dummy(); symbol <<= 1; - if (match_bit != 0) - break; + subcoder_offset &= ~match_bit; } else { update_bit_1_dummy(); symbol = (symbol << 1) | 1; - if (match_bit == 0) - break; + subcoder_offset &= match_bit; } } while (symbol < 0x100); } - // Decode literal without match byte. This is also - // the tail of the with-match-byte function. - while (symbol < 0x100) { - if_bit_0(subcoder[symbol]) { - update_bit_0_dummy(); - symbol <<= 1; - } else { - update_bit_1_dummy(); - symbol = (symbol << 1) | 1; - } - } - break; } @@ -366,43 +364,46 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in, now_pos, lz_get_byte(coder->lz, 0)); uint32_t symbol = 1; - if (!is_char_state(state)) { - // Decode literal with match byte. + if (is_char_state(state)) { + // Decode literal without match byte. + do { + if_bit_0(subcoder[symbol]) { + update_bit_0(subcoder[symbol]); + symbol <<= 1; + } else { + update_bit_1(subcoder[symbol]); + symbol = (symbol << 1) | 1; + } + } while (symbol < 0x100); - assert(rep0 != UINT32_MAX); + } else { + // Decode literal with match byte. + // + // The usage of subcoder_offset allows omitting some + // branches, which should give tiny speed improvement on + // some CPUs. subcoder_offset gets set to zero if match_bit + // didn't match. uint32_t match_byte = lz_get_byte(coder->lz, rep0); + uint32_t subcoder_offset = 0x100; do { match_byte <<= 1; - const uint32_t match_bit = match_byte & 0x100; - const uint32_t subcoder_index = 0x100 + match_bit + symbol; + const uint32_t match_bit = match_byte & subcoder_offset; + const uint32_t subcoder_index + = subcoder_offset + match_bit + symbol; if_bit_0(subcoder[subcoder_index]) { update_bit_0(subcoder[subcoder_index]); symbol <<= 1; - if (match_bit != 0) - break; + subcoder_offset &= ~match_bit; } else { update_bit_1(subcoder[subcoder_index]); symbol = (symbol << 1) | 1; - if (match_bit == 0) - break; + subcoder_offset &= match_bit; } } while (symbol < 0x100); } - // Decode literal without match byte. This is also - // the tail of the with-match-byte function. - while (symbol < 0x100) { - if_bit_0(subcoder[symbol]) { - update_bit_0(subcoder[symbol]); - symbol <<= 1; - } else { - update_bit_1(subcoder[symbol]); - symbol = (symbol << 1) | 1; - } - } - // Put the decoded byte to the dictionary, update the // decoder state, and start a new decoding loop. coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol);