From 1bbebbe9ef7ca0c717f94d85d5f2283e046574af Mon Sep 17 00:00:00 2001 From: Reuben Thomas Date: Mon, 22 Jan 2018 22:22:09 +0000 Subject: [PATCH] Remove --sequence=files Assume that any reasonable target OS has virtual memory (on which holding the data in memory has much the same performance implications as using files). --- doc/recode.texi | 20 +++++++------------- src/main.c | 15 ++++----------- src/recodext.h | 1 - src/task.c | 34 +++++----------------------------- tests/Recode.pyx | 2 -- tests/t50_methods.py | 16 ---------------- 6 files changed, 16 insertions(+), 72 deletions(-) diff --git a/doc/recode.texi b/doc/recode.texi index ea77970..dc6c34a 100644 --- a/doc/recode.texi +++ b/doc/recode.texi @@ -1439,21 +1439,18 @@ behaviour, which is also explained below. @cindex memory sequencing When the recoding requires a combination of two or more elementary recoding steps, this option forces many passes over the data, using -in-memory buffers to hold all intermediary results. -@c This should be the default behaviour when -@c files to be recoded are @emph{small} enough. +in-memory buffers to hold all intermediate results. +If this option is selected in filter +mode, that is, when the program reads standard input and writes standard +output, it might take longer for programs further down the pipe chain to +start receiving some recoded data. @item -i @itemx --sequence=files @opindex -i @cindex file sequencing -When the recoding requires a combination of two or more elementary -recoding steps, this option forces many passes over the data, using -intermediate files between passes. This is the default behaviour when -files are recoded over themselves. If this option is selected in filter -mode, that is, when the program reads standard input and writes standard -output, it might take longer for programs further down the pipe chain to -start receiving some recoded data. +This option is accepted for backwards compatibility, and acts like +@samp{--sequence=memory}. @item -p @itemx --sequence=pipe @@ -2405,9 +2402,6 @@ the header file @file{recodext.h}. Other possible values are: @item RECODE_SEQUENCE_IN_MEMORY @vindex RECODE_SEQUENCE_IN_MEMORY Keep intermediate recodings in memory. -@item RECODE_SEQUENCE_WITH_FILES -@vindex RECODE_SEQUENCE_WITH_FILES -Do not fork, use intermediate files. @item RECODE_SEQUENCE_WITH_PIPE @vindex RECODE_SEQUENCE_WITH_PIPE Fork processes connected with @code{pipe(2)}. diff --git a/src/main.c b/src/main.c index cd286a6..72a88a3 100644 --- a/src/main.c +++ b/src/main.c @@ -227,7 +227,6 @@ Operation modes:\n\ -q, --quiet, --silent inhibit messages about irreversible recodings\n\ -f, --force force recodings even when not reversible\n\ -t, --touch touch the recoded files after replacement\n\ - -i, --sequence=files use intermediate files for sequencing passes\n\ --sequence=memory use memory buffers for sequencing passes\n\ "), stdout); @@ -393,11 +392,8 @@ main (int argc, char *const *argv) usage (EXIT_FAILURE, 0); case 0: - task_option.strategy = RECODE_SEQUENCE_IN_MEMORY; - break; - case 1: - task_option.strategy = RECODE_SEQUENCE_WITH_FILES; + task_option.strategy = RECODE_SEQUENCE_IN_MEMORY; break; case 2: @@ -501,7 +497,7 @@ main (int argc, char *const *argv) break; case 'i': - task_option.strategy = RECODE_SEQUENCE_WITH_FILES; + task_option.strategy = RECODE_SEQUENCE_IN_MEMORY; break; case 'k': @@ -789,13 +785,10 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"), if (optind < argc) { /* When reading and writing files, unless the user selected - otherwise, avoid forking and use intermediate files. */ - - /* FIXME: On a file per file basis, force recoding to be done in - memory whenever files are small, which is the usual case. */ + otherwise, avoid forking and use memory. */ if (task->strategy == RECODE_STRATEGY_UNDECIDED) - task->strategy = RECODE_SEQUENCE_WITH_FILES; + task->strategy = RECODE_SEQUENCE_IN_MEMORY; /* In case files are recoded over themselves and there is no recoding step at all, do not even try to touch the files. */ diff --git a/src/recodext.h b/src/recodext.h index dbd91b5..100c5f4 100644 --- a/src/recodext.h +++ b/src/recodext.h @@ -415,7 +415,6 @@ enum recode_sequence_strategy { RECODE_STRATEGY_UNDECIDED, /* sequencing strategy is undecided yet */ RECODE_SEQUENCE_IN_MEMORY, /* keep intermediate recodings in memory */ - RECODE_SEQUENCE_WITH_FILES, /* do not fork, use intermediate files */ RECODE_SEQUENCE_WITH_PIPE /* fork processes connected with `pipe(2)' */ }; diff --git a/src/task.c b/src/task.c index 0ba57ce..04a15df 100644 --- a/src/task.c +++ b/src/task.c @@ -260,7 +260,7 @@ perform_sequence (RECODE_TASK task, enum recode_sequence_strategy strategy) child_process = -1; subtask->output = task->output; - if (strategy == RECODE_SEQUENCE_IN_MEMORY || strategy == RECODE_SEQUENCE_WITH_FILES) + if (strategy == RECODE_SEQUENCE_IN_MEMORY) if (sequence_index > 0) { /* Select the input text for this step. */ @@ -269,14 +269,6 @@ perform_sequence (RECODE_TASK task, enum recode_sequence_strategy strategy) subtask->input.cursor = input.buffer; subtask->input.limit = input.cursor; subtask->input.file = input.file; - - if (strategy == RECODE_SEQUENCE_WITH_FILES && - fseek (subtask->input.file, 0L, SEEK_SET) != 0) - { - recode_perror (NULL, "fseek (%s)", subtask->input.name); - recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); - goto exit; - } } /* Select the output text for this step. */ @@ -290,15 +282,6 @@ perform_sequence (RECODE_TASK task, enum recode_sequence_strategy strategy) subtask->output.cursor = subtask->output.buffer; break; - case RECODE_SEQUENCE_WITH_FILES: - if (subtask->output.file = tmpfile (), subtask->output.file == NULL) - { - recode_perror (NULL, "tmpfile ()"); - recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); - goto exit; - } - break; - #if HAVE_PIPE case RECODE_SEQUENCE_WITH_PIPE: /* Create all subprocesses, from the first to the last, and @@ -401,7 +384,7 @@ perform_sequence (RECODE_TASK task, enum recode_sequence_strategy strategy) break; /* parent process: escape from loop */ } - if (strategy != RECODE_SEQUENCE_WITH_PIPE) + if (strategy == RECODE_SEQUENCE_IN_MEMORY) { /* Post-step clean up. */ @@ -572,21 +555,14 @@ recode_perform_task (RECODE_TASK task) { case RECODE_SEQUENCE_WITH_PIPE: #if !HAVE_PIPE - strategy = RECODE_SEQUENCE_WITH_FILES; + strategy = RECODE_SEQUENCE_IN_MEMORY; #endif case RECODE_SEQUENCE_IN_MEMORY: - case RECODE_SEQUENCE_WITH_FILES: break; - case RECODE_STRATEGY_UNDECIDED: default: /* This should not happen, but if it does, try to recover. */ - /* Let's use only memory if either end is memory, or only temporary - files if both ends are files. This is a crude choice, FIXME! */ - if ((task->input.name || task->input.file) - && (task->output.name || task->output.file)) - strategy = RECODE_SEQUENCE_WITH_FILES; - else - strategy = RECODE_SEQUENCE_IN_MEMORY; + case RECODE_STRATEGY_UNDECIDED: + strategy = RECODE_SEQUENCE_IN_MEMORY; break; } diff --git a/tests/Recode.pyx b/tests/Recode.pyx index 11d8541..8cf0fb7 100644 --- a/tests/Recode.pyx +++ b/tests/Recode.pyx @@ -170,7 +170,6 @@ cdef extern from "common.h": enum recode_sequence_strategy: RECODE_STRATEGY_UNDECIDED RECODE_SEQUENCE_IN_MEMORY - RECODE_SEQUENCE_WITH_FILES RECODE_SEQUENCE_WITH_PIPE enum recode_swap_input: @@ -488,7 +487,6 @@ EXPLODE_STEP = RECODE_EXPLODE_STEP STRATEGY_UNDECIDED = RECODE_STRATEGY_UNDECIDED SEQUENCE_IN_MEMORY = RECODE_SEQUENCE_IN_MEMORY -SEQUENCE_WITH_FILES = RECODE_SEQUENCE_WITH_FILES SEQUENCE_WITH_PIPE = RECODE_SEQUENCE_WITH_PIPE SWAP_UNDECIDED = RECODE_SWAP_UNDECIDED diff --git a/tests/t50_methods.py b/tests/t50_methods.py index 23f5044..6345c73 100644 --- a/tests/t50_methods.py +++ b/tests/t50_methods.py @@ -11,64 +11,48 @@ def test_1(): # No step at all. yield validate, 'texte..texte', 'memory', 'filter' yield validate, 'texte..texte', 'memory', 'squash' - yield validate, 'texte..texte', 'files', 'filter' - yield validate, 'texte..texte', 'files', 'squash' yield validate, 'texte..texte', 'pipe', 'filter' yield validate, 'texte..texte', 'pipe', 'squash' # One single step. yield validate, 'latin1..ibmpc/', 'memory', 'filter' yield validate, 'latin1..ibmpc/', 'memory', 'squash' - yield validate, 'latin1..ibmpc/', 'files', 'filter' - yield validate, 'latin1..ibmpc/', 'files', 'squash' yield validate, 'latin1..ibmpc/', 'pipe', 'filter' yield validate, 'latin1..ibmpc/', 'pipe', 'squash' # One single step and a surface yield validate, 'latin1..ibmpc', 'memory', 'filter' yield validate, 'latin1..ibmpc', 'memory', 'squash' - yield validate, 'latin1..ibmpc', 'files', 'filter' - yield validate, 'latin1..ibmpc', 'files', 'squash' yield validate, 'latin1..ibmpc', 'pipe', 'filter' yield validate, 'latin1..ibmpc', 'pipe', 'squash' # One single step. yield validate, 'texte..latin1', 'memory', 'filter' yield validate, 'texte..latin1', 'memory', 'squash' - yield validate, 'texte..latin1', 'files', 'filter' - yield validate, 'texte..latin1', 'files', 'squash' yield validate, 'texte..latin1', 'pipe', 'filter' yield validate, 'texte..latin1', 'pipe', 'squash' # Two single steps. yield validate, 'texte..bangbang', 'memory', 'filter' yield validate, 'texte..bangbang', 'memory', 'squash' - yield validate, 'texte..bangbang', 'files', 'filter' - yield validate, 'texte..bangbang', 'files', 'squash' yield validate, 'texte..bangbang', 'pipe', 'filter' yield validate, 'texte..bangbang', 'pipe', 'squash' # Two single steps and a surface. yield validate, 'texte..ibmpc', 'memory', 'filter' yield validate, 'texte..ibmpc', 'memory', 'squash' - yield validate, 'texte..ibmpc', 'files', 'filter' - yield validate, 'texte..ibmpc', 'files', 'squash' yield validate, 'texte..ibmpc', 'pipe', 'filter' yield validate, 'texte..ibmpc', 'pipe', 'squash' # Three single steps. yield validate, 'texte..iconqnx', 'memory', 'filter' yield validate, 'texte..iconqnx', 'memory', 'squash' - yield validate, 'texte..iconqnx', 'files', 'filter' - yield validate, 'texte..iconqnx', 'files', 'squash' yield validate, 'texte..iconqnx', 'pipe', 'filter' yield validate, 'texte..iconqnx', 'pipe', 'squash' # Four single steps, optimized into three (with iconv) or two (without). yield validate, 'ascii-bs..ebcdic', 'memory', 'filter' yield validate, 'ascii-bs..ebcdic', 'memory', 'squash' - yield validate, 'ascii-bs..ebcdic', 'files', 'filter' - yield validate, 'ascii-bs..ebcdic', 'files', 'squash' yield validate, 'ascii-bs..ebcdic', 'pipe', 'filter' yield validate, 'ascii-bs..ebcdic', 'pipe', 'squash' -- 2.40.0