From: Reuben Thomas Date: Tue, 30 Jan 2018 21:40:53 +0000 (+0000) Subject: Remove conversion methods X-Git-Tag: v3.7~4 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=76d7efd2d6c9dfa81270401a346c27c52a9e7f11;p=recode Remove conversion methods See NEWS for explanation and rationale. Also reorder NEWS items for 3.7 better. --- diff --git a/NEWS b/NEWS index 6a6ca59..6bf2f20 100644 --- a/NEWS +++ b/NEWS @@ -12,15 +12,22 @@ Version 3.7 :Author: François Pinard, 2008-03; Reuben Thomas, 2018-01. ++ Converters for BibTeX (from Vincent Danjean) and the ANSEL and ISO 5426 + character sets (from Wolfram Schneider) have been added. ++ The conversion strategies (whether to use pipes, memory or files) are + no longer available. Now it is reasonable to assume virtual memory, so + files and memory have similar performance characteristics (in + particular, the memory method is not limited by physical memory.) + Further, tests showed that even for runs on little data, the pipes + method has minimal performance impact (none was measured). This is + not a surprise, as for one-step recodings, the commonest case, no + forking is needed. The command-line options -i, -p and + --sequence=STRATEGY are ignored for backwards compatibility. + Recode does not include libiconv anymore, but uses an external iconv library if one was available at installation time. The -x: option to the program, or a new flag to the library recode_new_outer function, inhibits the initialisation and usage of iconv. -+ Converters for BibTeX (from Vincent Danjean) and the ANSEL and ISO 5426 -character sets (from Wolfram Schneider) have been added. -+ The option --sequence=file (short option -i) is now an (undocumented) -synonym for --sequence=memory. -+ Many internal changes, for correcting reported bugs. ++ Many bug fixes. + Long ago, I renamed GNU recode to Free recode: the permission for using the GNU prefix mandated a level of obedience to the FSF that once went overboard, in my opinion. After that change, I realized diff --git a/doc/recode.texi b/doc/recode.texi index 53b35d0..fd91eaf 100644 --- a/doc/recode.texi +++ b/doc/recode.texi @@ -1420,11 +1420,8 @@ four different interconnected tasks, logically equivalent to: @var{step1} <@var{input} | @var{step2} | @var{step3} | @var{step4} >@var{output} @end example -By default, the splitting is simulated by using memory buffers. The -@samp{--sequence=@var{strategy}} options gives you control over the flow -methods, by replacing @var{strategy} with @samp{memory} or @samp{pipe}. -On systems where the pipes method is not available, strategy selection -is ignored. +On systems where the pipes method is not available, the steps are +performed in series. @table @samp @item --sequence=memory @@ -2323,23 +2320,6 @@ task level function with no output buffer at all to start with, in which case all three fields should have @code{NULL} as a value. This is the situation immediately after a call to @code{recode_new_task}. -@item strategy -@vindex strategy -@vindex RECODE_STRATEGY_UNDECIDED -This field, which is of type @code{enum recode_sequence_strategy}, tells -how various recoding steps (passes) will be interconnected. Its initial -value is @code{RECODE_STRATEGY_UNDECIDED}, which is a constant defined in -the header file @file{recodext.h}. Other possible values are: - -@table @code -@item RECODE_SEQUENCE_IN_MEMORY -@vindex RECODE_SEQUENCE_IN_MEMORY -Keep intermediate recodings in memory. -@item RECODE_SEQUENCE_WITH_PIPE -@vindex RECODE_SEQUENCE_WITH_PIPE -Fork processes connected with pipes. -@end table - @item byte_order_mark @vindex byte_order_mark This field, which is preset to @code{true}, indicates that a byte order diff --git a/src/main.c b/src/main.c index fd60a41..0b058af 100644 --- a/src/main.c +++ b/src/main.c @@ -231,20 +231,9 @@ Operation modes:\n\ -q, --quiet, --silent inhibit messages about irreversible recodings\n\ -f, --force force recodings even when not reversible\n\ -t, --touch touch the recoded files after replacement\n\ - --sequence=memory use memory buffers for sequencing passes\n\ + -i, -p, --sequence=STRATEGY ignored for backwards compatibility\n\ "), stdout); -#if HAVE_PIPE - fputs (_("\ - -p, --sequence=pipe use pipe machinery for sequencing passes\n\ -"), - stdout); -#else - fputs (_("\ - -p, --sequence=pipe same as -i (on this system)\n\ -"), - stdout); -#endif fputs (_("\ \n\ Fine tuning:\n\ @@ -285,7 +274,6 @@ with an empty surface name means no surfaces at all. See the manual.\n"), stdout); fputs (_("\ \n\ -If none of -i and -p are given, presume -p if no FILE, else -i.\n\ Each FILE is recoded over itself, destroying the original. If no\n\ FILE is specified, then act as a filter and recode stdin to stdout.\n"), stdout); @@ -369,7 +357,6 @@ main (int argc, char *const *argv) memset (&task_option, 0, sizeof (struct recode_task)); request_option.diaeresis_char = '"'; - task_option.strategy = RECODE_STRATEGY_UNDECIDED; task_option.fail_level = RECODE_AMBIGUOUS_OUTPUT; task_option.abort_level = RECODE_AMBIGUOUS_OUTPUT; @@ -397,13 +384,10 @@ main (int argc, char *const *argv) usage (EXIT_FAILURE, 0); break; + /* Ignore for backwards compatibility with version 3.6. */ case 0: case 1: - task_option.strategy = RECODE_SEQUENCE_IN_MEMORY; - break; - case 2: - task_option.strategy = RECODE_SEQUENCE_WITH_PIPE; break; default: @@ -507,7 +491,7 @@ main (int argc, char *const *argv) break; case 'i': - task_option.strategy = RECODE_SEQUENCE_IN_MEMORY; + /* Ignore for backwards compatibility with version 3.6. */ break; case 'k': @@ -551,7 +535,7 @@ main (int argc, char *const *argv) break; case 'p': - task_option.strategy = RECODE_SEQUENCE_WITH_PIPE; + /* Ignore for backwards compatibility with version 3.6. */ break; case 'q': @@ -789,7 +773,6 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"), RECODE_TASK task; task = recode_new_task (request); - task->strategy = task_option.strategy; task->fail_level = task_option.fail_level; task->abort_level = task_option.fail_level; @@ -798,12 +781,6 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"), if (optind < argc) { - /* When reading and writing files, unless the user selected - otherwise, avoid forking and use memory. */ - - if (task->strategy == RECODE_STRATEGY_UNDECIDED) - task->strategy = RECODE_SEQUENCE_IN_MEMORY; - /* In case files are recoded over themselves and there is no recoding step at all, do not even try to touch the files. */ @@ -917,12 +894,6 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"), } else { - /* When reading stdin and writing stdout, unless the user selected - otherwise, fork processes interconnected with pipes. */ - - if (task->strategy == RECODE_STRATEGY_UNDECIDED) - task->strategy = RECODE_SEQUENCE_WITH_PIPE; - task->input.name = ""; task->output.name = ""; if (!(*processor) (task)) diff --git a/src/mixed.c b/src/mixed.c index 3a8d6cb..8e21b16 100644 --- a/src/mixed.c +++ b/src/mixed.c @@ -40,7 +40,6 @@ open_mixed (struct mixed *mixed, RECODE_TASK task) { mixed->input_name = task->input.name; mixed->output_name = task->output.name; - task->strategy = RECODE_SEQUENCE_IN_MEMORY; task->byte_order_mark = false; /* Open both files ourselves. */ diff --git a/src/recodext.h b/src/recodext.h index 28ab992..be7a965 100644 --- a/src/recodext.h +++ b/src/recodext.h @@ -405,15 +405,6 @@ struct recode_read_write_text char *limit; }; -/* Tells how various passes are interconnected. */ - -enum recode_sequence_strategy - { - RECODE_STRATEGY_UNDECIDED, /* sequencing strategy is undecided yet */ - RECODE_SEQUENCE_IN_MEMORY, /* keep intermediate recodings in memory */ - RECODE_SEQUENCE_WITH_PIPE /* fork processes connected with `pipe(2)' */ - }; - /* Tells how to swap the incoming pair of bytes, while reading UCS-2. */ enum recode_swap_input @@ -466,9 +457,6 @@ struct recode_task struct recode_read_only_text input; struct recode_read_write_text output; - /* Tells how various recoding steps (passes) will be interconnected. */ - enum recode_sequence_strategy strategy : 3; - /* Produce a byte order mark on UCS-2 output, insist for it on input. */ bool byte_order_mark : 1; diff --git a/src/request.c b/src/request.c index bc7d71d..41284f2 100644 --- a/src/request.c +++ b/src/request.c @@ -1209,7 +1209,6 @@ recode_buffer_to_buffer (RECODE_CONST_REQUEST request, task->output.cursor = *output_buffer_pointer; task->output.limit = *output_buffer_pointer + *output_allocated_pointer; - task->strategy = RECODE_SEQUENCE_IN_MEMORY; success = recode_perform_task (task) && guarantee_nul_terminator (task); *output_buffer_pointer = task->output.buffer; *output_length_pointer = task->output.cursor - task->output.buffer; @@ -1236,7 +1235,6 @@ recode_buffer_to_file (RECODE_CONST_REQUEST request, task->input.limit = input_buffer + input_length; task->output.file = output_file; - task->strategy = RECODE_SEQUENCE_IN_MEMORY; success = recode_perform_task (task); recode_delete_task (task); @@ -1261,7 +1259,6 @@ recode_file_to_buffer (RECODE_CONST_REQUEST request, task->output.cursor = *output_buffer_pointer; task->output.limit = *output_buffer_pointer + *output_allocated_pointer; - task->strategy = RECODE_SEQUENCE_IN_MEMORY; success = recode_perform_task (task) && guarantee_nul_terminator (task); *output_buffer_pointer = task->output.buffer; *output_length_pointer = task->output.cursor - task->output.buffer; diff --git a/src/task.c b/src/task.c index 487ea65..5b70ca7 100644 --- a/src/task.c +++ b/src/task.c @@ -222,7 +222,7 @@ transform_byte_to_variable (RECODE_SUBTASK subtask) `---------------------------------------------------------------------*/ static bool -perform_sequence (RECODE_TASK task, enum recode_sequence_strategy strategy) +perform_sequence (RECODE_TASK task) { RECODE_CONST_REQUEST request = task->request; struct recode_subtask subtask_block; @@ -284,64 +284,61 @@ perform_sequence (RECODE_TASK task, enum recode_sequence_strategy strategy) subtask->output.cursor = subtask->output.buffer; #if HAVE_PIPE - if (strategy == RECODE_SEQUENCE_WITH_PIPE) - { - /* Create all subprocesses, from the first to the last, and - interconnect them. */ + /* Create all subprocesses, from the first to the last, and + interconnect them. */ - if (pipe (pipe_pair) < 0) - { - recode_perror (NULL, "pipe ()"); - recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); - SUBTASK_RETURN (subtask); - } - xset_binary_mode (pipe_pair[0], O_BINARY); - xset_binary_mode (pipe_pair[1], O_BINARY); - if (child_process = fork (), child_process < 0) - { - recode_perror (NULL, "fork ()"); - recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); - SUBTASK_RETURN (subtask); - } - if (child_process == 0) - { - /* The child executes its recoding step, reading from the - current input file and writing to the pipe; then it exits. */ - - if (close (pipe_pair[0]) < 0) - { - recode_perror (NULL, "close ()"); - recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); - } - if (subtask->output.file = fdopen (pipe_pair[1], "w"), - subtask->output.file == NULL) - { - recode_perror (NULL, "fdopen ()"); - recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); - } - } - else - { - /* The parent saves the read end of the pipe for the next step. */ - - if (input.file = fdopen (pipe_pair[0], "r"), - input.file == NULL) - { - recode_perror (NULL, "fdopen ()"); - recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); - close (pipe_pair[0]); - close (pipe_pair[1]); - SUBTASK_RETURN (subtask); - } - if (close (pipe_pair[1]) < 0) - { - recode_perror (NULL, "close ()"); - recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); - close (pipe_pair[0]); - fclose (input.file); - SUBTASK_RETURN (subtask); - } - } + if (pipe (pipe_pair) < 0) + { + recode_perror (NULL, "pipe ()"); + recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); + SUBTASK_RETURN (subtask); + } + xset_binary_mode (pipe_pair[0], O_BINARY); + xset_binary_mode (pipe_pair[1], O_BINARY); + if (child_process = fork (), child_process < 0) + { + recode_perror (NULL, "fork ()"); + recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); + SUBTASK_RETURN (subtask); + } + if (child_process == 0) + { + /* The child executes its recoding step, reading from the + current input file and writing to the pipe; then it exits. */ + + if (close (pipe_pair[0]) < 0) + { + recode_perror (NULL, "close ()"); + recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); + } + if (subtask->output.file = fdopen (pipe_pair[1], "w"), + subtask->output.file == NULL) + { + recode_perror (NULL, "fdopen ()"); + recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); + } + } + else + { + /* The parent saves the read end of the pipe for the next step. */ + + if (input.file = fdopen (pipe_pair[0], "r"), + input.file == NULL) + { + recode_perror (NULL, "fdopen ()"); + recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); + close (pipe_pair[0]); + close (pipe_pair[1]); + SUBTASK_RETURN (subtask); + } + if (close (pipe_pair[1]) < 0) + { + recode_perror (NULL, "close ()"); + recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); + close (pipe_pair[0]); + fclose (input.file); + SUBTASK_RETURN (subtask); + } } #endif } @@ -379,9 +376,9 @@ perform_sequence (RECODE_TASK task, enum recode_sequence_strategy strategy) subtask->step = step; (*step->transform_routine) (subtask); - if (strategy == RECODE_SEQUENCE_WITH_PIPE) - break; /* child/top-level process: escape from loop */ - +#if HAVE_PIPE + break; /* child/top-level process: escape from loop */ +#else /* Post-step clean up for memory sequence. */ if (subtask->input.file) @@ -406,6 +403,7 @@ perform_sequence (RECODE_TASK task, enum recode_sequence_strategy strategy) output = input; input = subtask->output; } +#endif } if (sequence_index + 1 == (unsigned)request->sequence_length) @@ -428,61 +426,56 @@ perform_sequence (RECODE_TASK task, enum recode_sequence_strategy strategy) } #if HAVE_PIPE - if (strategy == RECODE_SEQUENCE_WITH_PIPE) - { - /* Child process exits here. */ + /* Child process exits here. */ - if (child_process == 0) - exit (task->error_so_far < task->fail_level ? EXIT_SUCCESS - : EXIT_FAILURE); - else - { - /* Wait on all children, mainly to avoid synchronisation problems on - output file contents, but also to reduce the number of zombie - processes in case the user recodes many files at once. */ + if (child_process == 0) + exit (task->error_so_far < task->fail_level ? EXIT_SUCCESS + : EXIT_FAILURE); + else + { + /* Wait on all children, mainly to avoid synchronisation problems on + output file contents, but also to reduce the number of zombie + processes in case the user recodes many files at once. */ - while (wait (&wait_status) > 0) - { - /* Diagnose and abort on any abnormally terminating child. */ + while (wait (&wait_status) > 0) + { + /* Diagnose and abort on any abnormally terminating child. */ - if (!(WIFEXITED (wait_status) - || (WIFSIGNALED (wait_status) - && WTERMSIG (wait_status) == SIGPIPE))) - { - recode_error (NULL, _("Child process wait status is 0x%0.2x"), - wait_status); - recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); - SUBTASK_RETURN (subtask); - } + if (!(WIFEXITED (wait_status) + || (WIFSIGNALED (wait_status) + && WTERMSIG (wait_status) == SIGPIPE))) + { + recode_error (NULL, _("Child process wait status is 0x%0.2x"), + wait_status); + recode_if_nogo (RECODE_SYSTEM_ERROR, subtask); + SUBTASK_RETURN (subtask); + } - /* Check for a nonzero exit from the terminating child. */ - - if (WIFEXITED (wait_status) - ? WEXITSTATUS (wait_status) != 0 - : WTERMSIG (wait_status) != 0) - /* FIXME: It is not very clear what happened in sub-processes. */ - if (task->error_so_far < task->fail_level) - { - task->error_so_far = task->fail_level; - task->error_at_step = request->sequence_array + (unsigned)request->sequence_length - 1; - } - } + /* Check for a nonzero exit from the terminating child. */ + + if (WIFEXITED (wait_status) + ? WEXITSTATUS (wait_status) != 0 + : WTERMSIG (wait_status) != 0) + /* FIXME: It is not very clear what happened in sub-processes. */ + if (task->error_so_far < task->fail_level) + { + task->error_so_far = task->fail_level; + task->error_at_step = request->sequence_array + (unsigned)request->sequence_length - 1; + } + } - if (recode_interrupted) - /* FIXME: It is not very clear what happened in sub-processes. */ - if (task->error_so_far < task->fail_level) - { - task->error_so_far = task->fail_level; - task->error_at_step = request->sequence_array + (unsigned)request->sequence_length - 1; - } - } + if (recode_interrupted) + /* FIXME: It is not very clear what happened in sub-processes. */ + if (task->error_so_far < task->fail_level) + { + task->error_so_far = task->fail_level; + task->error_at_step = request->sequence_array + (unsigned)request->sequence_length - 1; + } } - else +#else + free (input.buffer); + free (output.buffer); #endif - { - free (input.buffer); - free (output.buffer); - } task->output = subtask->output; SUBTASK_RETURN (subtask); @@ -507,7 +500,6 @@ recode_new_task (RECODE_CONST_REQUEST request) return NULL; task->request = request; - task->strategy = RECODE_STRATEGY_UNDECIDED; task->fail_level = RECODE_NOT_CANONICAL; task->abort_level = RECODE_USER_ERROR; task->error_so_far = RECODE_NO_ERROR; @@ -525,39 +517,15 @@ recode_delete_task (RECODE_TASK task) } /*------------------------------------------------------------------------. -| Execute the conversion sequence for a recoding TASK, using the selected | -| strategy whenever more than one conversion step is needed. If no | -| conversion are needed, merely copy the input onto the output. Returns | -| zero if the recoding has been found to be non-reversible. Tell what | -| goes on if VERBOSE. | +| Execute the conversion sequence for a recoding TASK. If no conversions | +| are needed, merely copy the input onto the output. | +| Returns zero if the recoding has been found to be non-reversible. | +| Tell what goes on if VERBOSE. | `------------------------------------------------------------------------*/ -/* If some sequencing strategies are missing, this routine automatically - uses fallback strategies. */ - bool recode_perform_task (RECODE_TASK task) { - /* Leave task->strategy alone, as the same task may be used many - times differently, and the fact the strategy is undecided is a - clue we want to preserve between calls. */ - enum recode_sequence_strategy strategy = task->strategy; - - switch (strategy) - { - case RECODE_SEQUENCE_WITH_PIPE: -#if !HAVE_PIPE - strategy = RECODE_SEQUENCE_IN_MEMORY; -#endif - case RECODE_SEQUENCE_IN_MEMORY: - break; - - default: /* This should not happen, but if it does, try to recover. */ - case RECODE_STRATEGY_UNDECIDED: - strategy = RECODE_SEQUENCE_IN_MEMORY; - break; - } - /* Switch stdin and stdout to binary mode unless they are ttys, as this has nasty side-effects on several DOSish systems. For example, the Ctrl-Z character is no longer interpreted as EOF, and thus the poor user cannot @@ -570,5 +538,5 @@ recode_perform_task (RECODE_TASK task) if (task->output.name && !*task->output.name && !isatty (fileno (stdout))) xset_binary_mode (fileno (stdout), O_BINARY); - return perform_sequence (task, strategy); + return perform_sequence (task); } diff --git a/tests/Recode.pyx b/tests/Recode.pyx index 04444da..96dfaa9 100644 --- a/tests/Recode.pyx +++ b/tests/Recode.pyx @@ -165,11 +165,6 @@ cdef extern from "common.h": char *cursor char *limit - enum recode_sequence_strategy: - RECODE_STRATEGY_UNDECIDED - RECODE_SEQUENCE_IN_MEMORY - RECODE_SEQUENCE_WITH_PIPE - enum recode_swap_input: RECODE_SWAP_UNDECIDED RECODE_SWAP_NO @@ -190,7 +185,6 @@ cdef extern from "common.h": RECODE_CONST_REQUEST request recode_read_only_text input recode_read_write_text output - recode_sequence_strategy strategy bool byte_order_mark recode_swap_input swap_input recode_error_ fail_level @@ -483,10 +477,6 @@ COMBINE_EXPLODE = RECODE_COMBINE_EXPLODE COMBINE_STEP = RECODE_COMBINE_STEP EXPLODE_STEP = RECODE_EXPLODE_STEP -STRATEGY_UNDECIDED = RECODE_STRATEGY_UNDECIDED -SEQUENCE_IN_MEMORY = RECODE_SEQUENCE_IN_MEMORY -SEQUENCE_WITH_PIPE = RECODE_SEQUENCE_WITH_PIPE - SWAP_UNDECIDED = RECODE_SWAP_UNDECIDED SWAP_NO = RECODE_SWAP_NO SWAP_YES = RECODE_SWAP_YES