xz: Add --block-size=SIZE.

author Lasse Collin <lasse.collin@tukaani.org>

Fri, 18 Mar 2011 17:10:30 +0000 (19:10 +0200)

committer Lasse Collin <lasse.collin@tukaani.org>

Fri, 18 Mar 2011 17:10:30 +0000 (19:10 +0200)
author Lasse Collin <lasse.collin@tukaani.org>
Fri, 18 Mar 2011 17:10:30 +0000 (19:10 +0200)
committer Lasse Collin <lasse.collin@tukaani.org>
Fri, 18 Mar 2011 17:10:30 +0000 (19:10 +0200)
diff --git a/src/xz/args.c b/src/xz/args.c

index 1a357b243557c3df2e5b06a1c781e5feba3a6214..f207e7f445940f5d46395acb3226db24ad05f767 100644 (file)
--- a/src/xz/args.c
+++ b/src/xz/args.c
@@ -72,6 +72,7 @@ parse_real(args_info *args, int argc, char **argv)
                 OPT_NO_SPARSE,
                 OPT_FILES,
                 OPT_FILES0,
+               OPT_BLOCK_SIZE,
                 OPT_MEM_COMPRESS,
                 OPT_MEM_DECOMPRESS,
                 OPT_NO_ADJUST,
@@ -105,6 +106,7 @@ parse_real(args_info *args, int argc, char **argv)
                 // Basic compression settings
                 { "format",       required_argument, NULL,  'F' },
                 { "check",        required_argument, NULL,  'C' },
+               { "block-size",   required_argument, NULL,  OPT_BLOCK_SIZE },
                 { "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
                 { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
                 { "memlimit",     required_argument, NULL,  'M' },
@@ -370,6 +372,11 @@ parse_real(args_info *args, int argc, char **argv)
                         break;
                 }
  
+               case OPT_BLOCK_SIZE:
+                       opt_block_size = str_to_uint64("block-size", optarg,
+                                       0, LZMA_VLI_MAX);
+                       break;
+
                 case OPT_SINGLE_STREAM:
                         opt_single_stream = true;
                         break;
diff --git a/src/xz/coder.c b/src/xz/coder.c

index 266482eb459a1c7501fdbf0de4ab1015c5f7c665..5e5ed04cdfa32c7a18a1527f56048f07ae17119a 100644 (file)
--- a/src/xz/coder.c
+++ b/src/xz/coder.c
@@ -25,6 +25,7 @@ enum operation_mode opt_mode = MODE_COMPRESS;
  enum format_type opt_format = FORMAT_AUTO;
  bool opt_auto_adjust = true;
  bool opt_single_stream = false;
+uint64_t opt_block_size = 0;
  
  
  /// Stream used to communicate with liblzma
@@ -461,8 +462,8 @@ coder_normal(file_pair *pair)
         // Encoder needs to know when we have given all the input to it.
         // The decoders need to know it too when we are using
         // LZMA_CONCATENATED. We need to check for src_eof here, because
-       // the first input chunk has been already read, and that may
-       // have been the only chunk we will read.
+       // the first input chunk has been already read if decompressing,
+       // and that may have been the only chunk we will read.
         lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN;
  
         lzma_ret ret;
@@ -470,6 +471,16 @@ coder_normal(file_pair *pair)
         // Assume that something goes wrong.
         bool success = false;
  
+       // block_remaining indicates how many input bytes to encode until
+       // finishing the current .xz Block. The Block size is set with
+       // --block-size=SIZE. It has an effect only when compressing
+       // to the .xz format. If block_remaining == UINT64_MAX, only
+       // a single block is created.
+       uint64_t block_remaining = UINT64_MAX;
+       if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ
+                       && opt_block_size > 0)
+               block_remaining = opt_block_size;
+
         strm.next_out = out_buf.u8;
         strm.avail_out = IO_BUFFER_SIZE;
  
@@ -478,14 +489,23 @@ coder_normal(file_pair *pair)
                 // end of file yet.
                 if (strm.avail_in == 0 && !pair->src_eof) {
                         strm.next_in = in_buf.u8;
-                       strm.avail_in = io_read(
-                                       pair, &in_buf, IO_BUFFER_SIZE);
+                       strm.avail_in = io_read(pair, &in_buf,
+                                       my_min(block_remaining,
+                                               IO_BUFFER_SIZE));
  
                         if (strm.avail_in == SIZE_MAX)
                                 break;
  
-                       if (pair->src_eof)
+                       if (pair->src_eof) {
                                 action = LZMA_FINISH;
+
+                       } else if (block_remaining != UINT64_MAX) {
+                               // Start a new Block after every
+                               // opt_block_size bytes of input.
+                               block_remaining -= strm.avail_in;
+                               if (block_remaining == 0)
+                                       action = LZMA_FULL_FLUSH;
+                       }
                 }
  
                 // Let liblzma do the actual work.
@@ -501,7 +521,12 @@ coder_normal(file_pair *pair)
                         strm.avail_out = IO_BUFFER_SIZE;
                 }
  
-               if (ret != LZMA_OK) {
+               if (ret == LZMA_STREAM_END && action == LZMA_FULL_FLUSH) {
+                       // Start a new Block.
+                       action = LZMA_RUN;
+                       block_remaining = opt_block_size;
+
+               } else if (ret != LZMA_OK) {
                         // Determine if the return value indicates that we
                         // won't continue coding.
                         const bool stop = ret != LZMA_NO_CHECK
@@ -627,10 +652,15 @@ coder_run(const char *filename)
         // Assume that something goes wrong.
         bool success = false;
  
-       // Read the first chunk of input data. This is needed to detect
-       // the input file type (for now, only for decompression).
-       strm.next_in = in_buf.u8;
-       strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
+       if (opt_mode == MODE_COMPRESS) {
+               strm.next_in = NULL;
+               strm.avail_in = 0;
+       } else {
+               // Read the first chunk of input data. This is needed
+               // to detect the input file type.
+               strm.next_in = in_buf.u8;
+               strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
+       }
  
         if (strm.avail_in != SIZE_MAX) {
                 // Initialize the coder. This will detect the file format
diff --git a/src/xz/coder.h b/src/xz/coder.h

index d95319e5c4c835846bbc2e5140636d936b14400d..7edca039910a30659a7c3ed283454d92066e308f 100644 (file)
--- a/src/xz/coder.h
+++ b/src/xz/coder.h
@@ -44,6 +44,9 @@ extern bool opt_auto_adjust;
  /// If true, stop after decoding the first stream.
  extern bool opt_single_stream;
  
+/// If non-zero, start a new .xz Block after every opt_block_size bytes
+/// of input. This has an effect only when compressing to the .xz format.
+extern uint64_t opt_block_size;
  
  /// Set the integrity check type used when compressing
  extern void coder_set_check(lzma_check check);
diff --git a/src/xz/message.c b/src/xz/message.c

index 2a928107343267a0c36f4d12c533cbeba503ec20..249e934f80c8293f4f231992a8c13a1e07bd1c36 100644 (file)
--- a/src/xz/message.c
+++ b/src/xz/message.c
@@ -1128,6 +1128,10 @@ message_help(bool long_help)
  "                      `auto' (default), `xz', `lzma', and `raw'\n"
  "  -C, --check=CHECK   integrity check type: `none' (use with caution),\n"
  "                      `crc32', `crc64' (default), or `sha256'"));
+               puts(_(
+"      --block-size=SIZE\n"
+"                      when compressing to the .xz format, start a new block\n"
+"                      after every SIZE bytes of input; 0=disabled (default)"));
         }
  
         puts(_(
author	Lasse Collin <lasse.collin@tukaani.org>
	Fri, 18 Mar 2011 17:10:30 +0000 (19:10 +0200)
committer	Lasse Collin <lasse.collin@tukaani.org>
	Fri, 18 Mar 2011 17:10:30 +0000 (19:10 +0200)
src/xz/args.c		patch \| blob \| history
src/xz/coder.c		patch \| blob \| history
src/xz/coder.h		patch \| blob \| history
src/xz/message.c		patch \| blob \| history