From: François Pinard Date: Sun, 24 Feb 2008 04:23:29 +0000 (-0500) Subject: Integrate bigauto to test suite X-Git-Tag: v3.7-beta1~12 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=274643820a520b7b4bbf9151f1c3798f73395458;p=recode Integrate bigauto to test suite --- diff --git a/python/Recode.pyx b/python/Recode.pyx index f69e11b..798f17c 100644 --- a/python/Recode.pyx +++ b/python/Recode.pyx @@ -13,25 +13,233 @@ cdef extern from "config.h": cdef extern from "common.h": - ## From "recode.h" + ## Symbols. - # Published (opaque) typedefs. + enum recode_symbol_type: + RECODE_NO_SYMBOL_TYPE + RECODE_CHARSET + RECODE_DATA_SURFACE + RECODE_TREE_SURFACE + + enum recode_data_type: + RECODE_NO_CHARSET_DATA + RECODE_STRIP_DATA + RECODE_EXPLODE_DATA - struct recode_outer: - pass - struct recode_request: - pass - struct recode_task: - pass struct recode_symbol: - pass - ctypedef recode_outer *RECODE_OUTER + recode_symbol *next + unsigned ordinal + char *name + recode_data_type data_type + void *data + void *resurfacer # really: recode_single * + void *unsurfacer # really: recode_single * + recode_symbol_type type + bool ignore + ctypedef recode_symbol *RECODE_SYMBOL + ctypedef recode_symbol *RECODE_CONST_SYMBOL + + struct recode_surface_list: + RECODE_CONST_SYMBOL surface + recode_surface_list *next + + struct recode_alias: + char *name + RECODE_SYMBOL symbol + recode_surface_list *implied_surfaces + ctypedef recode_alias *RECODE_ALIAS + ctypedef recode_alias *RECODE_CONST_ALIAS + + ## Steps. + + enum recode_size: + RECODE_1 + RECODE_2 + RECODE_4 + RECODE_N + + struct recode_quality: + recode_size in_size + recode_size out_size + bool reversible + bool slower + bool faster + + struct recode_option_list: + char *option + recode_option_list *next + ctypedef recode_option_list *RECODE_OPTION_LIST + ctypedef recode_option_list *RECODE_CONST_OPTION_LIST + + #typedef bool (*Recode_init)(RECODE_STEP, RECODE_CONST_REQUEST, + # RECODE_CONST_OPTION_LIST, + # RECODE_CONST_OPTION_LIST) + #typedef bool (*Recode_term)(RECODE_STEP, RECODE_CONST_REQUEST) + #typedef bool (*Recode_transform)(RECODE_SUBTASK) + #typedef bool (*Recode_fallback)(RECODE_SUBTASK, unsigned) + + struct recode_single: + recode_single *next + RECODE_SYMBOL before + RECODE_SYMBOL after + short conversion_cost + void *initial_step_table + recode_quality quality + #Recode_init init_routine + #Recode_transform transform_routine + #Recode_fallback fallback_routine + ctypedef recode_single *RECODE_SINGLE + + enum recode_step_type: + RECODE_NO_STEP_TABLE + RECODE_BYTE_TO_BYTE + RECODE_BYTE_TO_STRING + RECODE_UCS2_TO_BYTE + RECODE_UCS2_TO_STRING + RECODE_STRING_TO_UCS2 + RECODE_COMBINE_EXPLODE + RECODE_COMBINE_STEP + RECODE_EXPLODE_STEP + + struct recode_step: + RECODE_SYMBOL before + RECODE_SYMBOL after + recode_quality quality + recode_step_type step_type + void *step_table + void *local + #Recode_transform transform_routine + #Recode_fallback fallback_routine + #Recode_term term_routine + ctypedef recode_step *RECODE_STEP + ctypedef recode_step *RECODE_CONST_STEP + + ## Requests. + + struct recode_request: + void *outer # really: RECODE_OUTER + bool verbose_flag + char diaeresis_char + bool make_header_flag + bool diacritics_only + bool ascii_graphics + RECODE_STEP sequence_array + size_t sequence_allocated + short sequence_length ctypedef recode_request *RECODE_REQUEST - ctypedef recode_task *RECODE_TASK ctypedef recode_request *RECODE_CONST_REQUEST - ctypedef recode_symbol *RECODE_CONST_SYMBOL - # Description of list formats. + ## Tasks. + + struct recode_read_only_text: + char *name + FILE *file + char *buffer + char *cursor + char *limit + + struct recode_read_write_text: + char *name + FILE *file + char *buffer + char *cursor + char *limit + + enum recode_sequence_strategy: + RECODE_STRATEGY_UNDECIDED + RECODE_SEQUENCE_IN_MEMORY + RECODE_SEQUENCE_WITH_FILES + RECODE_SEQUENCE_WITH_PIPE + + enum recode_swap_input: + RECODE_SWAP_UNDECIDED + RECODE_SWAP_NO + RECODE_SWAP_YES + + enum recode_error: + RECODE_NO_ERROR + RECODE_NOT_CANONICAL + RECODE_AMBIGUOUS_OUTPUT + RECODE_UNTRANSLATABLE + RECODE_INVALID_INPUT + RECODE_SYSTEM_ERROR + RECODE_USER_ERROR + RECODE_INTERNAL_ERROR + RECODE_MAXIMUM_ERROR + + struct recode_task: + RECODE_CONST_REQUEST request + recode_read_only_text input + recode_read_write_text output + recode_sequence_strategy strategy + bool byte_order_mark + recode_swap_input swap_input + recode_error fail_level + recode_error abort_level + recode_error error_so_far + RECODE_CONST_STEP error_at_step + ctypedef recode_task *RECODE_TASK + ctypedef recode_task *RECODE_CONST_TASK + + struct recode_subtask: + RECODE_TASK task + RECODE_CONST_STEP step + recode_read_only_text input + recode_read_write_text output + unsigned newline_count + unsigned character_count + ctypedef recode_subtask *RECODE_SUBTASK + + int get_byte(RECODE_SUBTASK) + void put_byte(int, RECODE_SUBTASK) + void SET_SUBTASK_ERROR(recode_error, RECODE_SUBTASK) + bool SUBTASK_RETURN(RECODE_SUBTASK) + void RETURN_IF_NOGO(recode_error, RECODE_SUBTASK) + void GOT_CHARACTER(RECODE_SUBTASK) + void GOT_NEWLINE(RECODE_SUBTASK) + + ## Outers. + + struct recode_known_pair: + unsigned char left + unsigned char right + + struct recode_outer: + bool auto_abort + bool auto_reversibility + # charset.c: + recode_known_pair *pair_restriction + unsigned pair_restrictions + void *alias_table + RECODE_SYMBOL symbol_list + unsigned number_of_symbols + char **argmatch_charset_array + char **argmatch_surface_array + char **realname_charset_array + char **realname_surface_array + # recode.c: + recode_single *single_list + unsigned number_of_singles + unsigned char *one_to_same + RECODE_SYMBOL data_symbol + RECODE_SYMBOL tree_symbol + RECODE_SYMBOL ucs2_charset + RECODE_SYMBOL libiconv_pivot + RECODE_SYMBOL crlf_surface + RECODE_SYMBOL cr_surface + recode_quality quality_byte_reversible + recode_quality quality_byte_to_byte + recode_quality quality_byte_to_ucs2 + recode_quality quality_byte_to_variable + recode_quality quality_ucs2_to_byte + recode_quality quality_ucs2_to_variable + recode_quality quality_variable_to_byte + recode_quality quality_variable_to_ucs2 + recode_quality quality_variable_to_variable + ctypedef recode_outer *RECODE_OUTER + ctypedef recode_outer *RECODE_CONST_OUTER + + ## Miscellaneous. enum recode_list_format: RECODE_NO_FORMAT @@ -40,14 +248,163 @@ cdef extern from "common.h": RECODE_HEXADECIMAL_FORMAT RECODE_FULL_FORMAT - # Description of programming languages. - enum recode_programming_language: RECODE_NO_LANGUAGE RECODE_LANGUAGE_C RECODE_LANGUAGE_PERL - # Recode library at OUTER level. + enum: + NUL = c'\0' + STRIP_SIZE = 8 + + ctypedef unsigned short recode_ucs2 + + struct strip_data: + recode_ucs2 *pool + short offset[256 / STRIP_SIZE] + + struct ucs2_to_byte: + recode_ucs2 code + unsigned char byte + + struct ucs2_to_string: + recode_ucs2 code + unsigned short flags + char *string + + ## Per module declarations. + + # recode.c + + ##define ALLOC_SIZE(Variable, Size, Type) \ + # (Variable = (Type *) recode_malloc (outer, (Size)), Variable) + + ##define ALLOC(Variable, Count, Type) \ + # ALLOC_SIZE (Variable, (Count) * sizeof (Type), Type) + + ##define REALLOC(Variable, Count, Type) \ + # (Variable = (Type *) recode_realloc (outer, Variable, \ + # (Count) * sizeof(Type)), \ + # Variable) + + #void recode_error(RECODE_OUTER, char *, ...) + #void recode_perror(RECODE_OUTER, char *, ...) + void *recode_malloc(RECODE_OUTER, size_t) + void *recode_realloc(RECODE_OUTER, void *, size_t) + + unsigned char *invert_table(RECODE_OUTER, unsigned char *) + bool complete_pairs(RECODE_OUTER, RECODE_STEP, + recode_known_pair *, unsigned, bool, bool) + bool transform_byte_to_ucs2(RECODE_SUBTASK) + bool init_ucs2_to_byte(RECODE_STEP, RECODE_CONST_REQUEST, + RECODE_CONST_OPTION_LIST, RECODE_CONST_OPTION_LIST) + bool transform_ucs2_to_byte(RECODE_SUBTASK) + + # charname.c and fr-charname.c + + char *ucs2_to_charname(int) + char *ucs2_to_french_charname(int) + + # charset.c + + enum alias_find_type: + SYMBOL_CREATE_CHARSET + SYMBOL_CREATE_DATA_SURFACE + SYMBOL_CREATE_TREE_SURFACE + ALIAS_FIND_AS_CHARSET + ALIAS_FIND_AS_SURFACE + ALIAS_FIND_AS_EITHER + + int code_to_ucs2 (RECODE_CONST_SYMBOL, unsigned) + bool prepare_for_aliases(RECODE_OUTER) + RECODE_ALIAS declare_alias(RECODE_OUTER, char *, char *) + bool declare_implied_surface(RECODE_OUTER, RECODE_ALIAS, + RECODE_CONST_SYMBOL) + bool make_argmatch_arrays(RECODE_OUTER) + RECODE_ALIAS find_alias(RECODE_OUTER, char *, alias_find_type) + bool find_and_report_subsets(RECODE_OUTER) + bool decode_known_pairs(RECODE_OUTER, char *) + + # combine.c + + #enum: # a few #define's, in fact + # DONE + # ELSE # ELSE is reserved in Pyrex + + bool init_explode(RECODE_STEP, RECODE_CONST_REQUEST, + RECODE_CONST_OPTION_LIST, RECODE_CONST_OPTION_LIST) + bool explode_byte_byte(RECODE_SUBTASK) + bool explode_ucs2_byte(RECODE_SUBTASK) + bool explode_byte_ucs2(RECODE_SUBTASK) + bool explode_ucs2_ucs2(RECODE_SUBTASK) + + bool init_combine(RECODE_STEP, RECODE_CONST_REQUEST, + RECODE_CONST_OPTION_LIST, RECODE_CONST_OPTION_LIST) + bool combine_byte_byte(RECODE_SUBTASK) + bool combine_ucs2_byte(RECODE_SUBTASK) + bool combine_byte_ucs2(RECODE_SUBTASK) + bool combine_ucs2_ucs2(RECODE_SUBTASK) + + # freeze.c + + void recode_freeze_tables(RECODE_OUTER) + + # libiconv.c + + bool transform_with_libiconv(RECODE_SUBTASK) + + # mixed.c + + bool transform_c_source(RECODE_TASK) + bool transform_po_source(RECODE_TASK) + + # outer.c + + bool reversibility(RECODE_SUBTASK, unsigned) + #RECODE_SINGLE declare_single + # (RECODE_OUTER, char *, char *, + # struct recode_quality, + # bool (*) (RECODE_STEP, RECODE_CONST_REQUEST, + # RECODE_CONST_OPTION_LIST, RECODE_CONST_OPTION_LIST), + # bool (*) (RECODE_SUBTASK)) + bool declare_libiconv(RECODE_OUTER, char *) + bool declare_explode_data(RECODE_OUTER, unsigned short *, char *, char *) + bool declare_strip_data(RECODE_OUTER, strip_data *, char *) + + # pool.c + + extern recode_ucs2 ucs2_data_pool[] + + # request.c + + char *edit_sequence(RECODE_REQUEST, bool) + + # rfc1345.c + + char *ucs2_to_rfc1345(recode_ucs2) + + # task.c + + int get_byte_helper(RECODE_SUBTASK) + void put_byte_helper(int, RECODE_SUBTASK) + bool recode_if_nogo(recode_error, RECODE_SUBTASK) + bool transform_byte_to_byte(RECODE_SUBTASK) + bool transform_byte_to_variable(RECODE_SUBTASK) + + # ucs.c + + enum: # a few #define's, in fact + REPLACEMENT_CHARACTER + NOT_A_CHARACTER + BYTE_ORDER_MARK + BYTE_ORDER_MARK_SWAPPED + + bool get_ucs2(unsigned *, RECODE_SUBTASK) + bool get_ucs4(unsigned *, RECODE_SUBTASK) + bool put_ucs2(unsigned, RECODE_SUBTASK) + bool put_ucs4(unsigned, RECODE_SUBTASK) + + ## Recode library at OUTER level. RECODE_OUTER recode_new_outer(bool) bool recode_delete_outer(RECODE_OUTER) @@ -82,781 +439,56 @@ cdef extern from "common.h": bool recode_delete_task(RECODE_TASK) bool recode_perform_task(RECODE_TASK) - ## From "recodext.h" - - #/* Internal typedefs, to supplement those in "recode.h". */ - # - #typedef struct recode_symbol * RECODE_SYMBOL; - #typedef struct recode_option_list * RECODE_OPTION_LIST; - #typedef struct recode_single * RECODE_SINGLE; - #typedef struct recode_step * RECODE_STEP; - #typedef struct recode_alias * RECODE_ALIAS; - #typedef struct recode_subtask * RECODE_SUBTASK; - # - #typedef const struct recode_option_list * RECODE_CONST_OPTION_LIST; - #typedef const struct recode_outer * RECODE_CONST_OUTER; - #typedef const struct recode_step * RECODE_CONST_STEP; - #typedef const struct recode_alias * RECODE_CONST_ALIAS; - #typedef const struct recode_task * RECODE_CONST_TASK; - # - #/*---------------------------------------------------------. - #| Maintain maximum of ERROR and current error in SUBTASK. | - #`---------------------------------------------------------*/ - # - ##define SET_SUBTASK_ERROR(Error, Subtask) \ - # recode_if_nogo (Error, Subtask) - # - #/*--------------------------------------------------------------------------. - #| Return from SUBTASK with `false', if the failure level has been reached. | - #`--------------------------------------------------------------------------*/ - # - ##define SUBTASK_RETURN(Subtask) \ - # return (Subtask)->task->error_so_far < (Subtask)->task->fail_level - # - #/*-------------------------------------------------------------------------. - #| Maintain maximum of ERROR and current error in SUBTASK. If the abort | - #| level has been reached, then return immediately as with SUBTASK_RETURN. | - #`-------------------------------------------------------------------------*/ - # - ##define RETURN_IF_NOGO(Error, Subtask) \ - # do { \ - # if (recode_if_nogo (Error, Subtask)) \ - # SUBTASK_RETURN (Subtask); \ - # } while (false) - # - #/* Various structure declarations. */ - # - #/*-----------------------------------------. - #| Outer variables for the recode library. | - #`-----------------------------------------*/ - # - #/* Error codes, in increasing severity. */ - # - #enum recode_error - # { - # RECODE_NO_ERROR, /* no error so far */ - # RECODE_NOT_CANONICAL, /* input is not exact, but equivalent */ - # RECODE_AMBIGUOUS_OUTPUT, /* output will be misleading */ - # RECODE_UNTRANSLATABLE, /* input is getting lost, while valid */ - # RECODE_INVALID_INPUT, /* input is getting lost, but was invalid */ - # RECODE_SYSTEM_ERROR, /* system returned input/output failure */ - # RECODE_USER_ERROR, /* library is being misused */ - # RECODE_INTERNAL_ERROR, /* programming botch in the library */ - # RECODE_MAXIMUM_ERROR /* impossible value (should be kept last) */ - # }; - # - #/* Structure for relating alias names to charsets and surfaces. */ - # - #struct recode_alias - # { - # const char *name; /* charset, surface or alias name */ - # RECODE_SYMBOL symbol; /* associated symbol */ - # /* If a charset, list of surfaces usually applied by default. */ - # struct recode_surface_list *implied_surfaces; - # }; - # - #/* The sole purpose of qualities is for later attributing step costs. */ - # - #enum recode_size - # { - # RECODE_1, /* roughly one byte per character */ - # RECODE_2, /* roughly two bytes per character */ - # RECODE_4, /* roughly four bytes per character */ - # RECODE_N /* variable number of bytes per character */ - # }; - # - #struct recode_quality - # { - # enum recode_size in_size : 3; /* rough byte size of input characters */ - # enum recode_size out_size : 3; /* rough byte size of output characters */ - # bool reversible : 1; /* transformation is known to be reversible */ - # bool slower : 1; /* transformation is slower than average */ - # bool faster : 1; /* transformation is faster than average */ - # }; - # - #/* Main variables of the initialised library. */ - # - #struct recode_outer - # { - # /* This flag asks the library to diagnose and abort itself if errors. */ - # bool auto_abort; - # - # /* If new steps should automatically have reversibility for fallback. */ - # bool auto_reversibility; - # - # /* charset.c */ - # /* --------- */ - # - # /* Known pairs (for restricting listing). */ - # struct recode_known_pair *pair_restriction; - # unsigned pair_restrictions; - # - # /* Opaque Hash_table pointer giving access to the single table holding all - # names and aliases for charsets, surfaces and fallback methods. */ - # void *alias_table; - # - # /* Unique symbols are linked into a list and counted. */ - # RECODE_SYMBOL symbol_list; - # unsigned number_of_symbols; - # - # /* Arrays of strings ready for argmatch. */ - # char **argmatch_charset_array; - # char **argmatch_surface_array; - # const char **realname_charset_array; - # const char **realname_surface_array; - # - # /* recode.c */ - # /* -------- */ - # - # /* Known single steps. */ - # struct recode_single *single_list; - # unsigned number_of_singles; - # - # /* Identity recoding table. */ - # const unsigned char *one_to_same; - # - # /* Preset charsets and surfaces. */ - # RECODE_SYMBOL data_symbol;/* special charset defining surfaces */ - # RECODE_SYMBOL tree_symbol; /* special charset defining structures */ - # RECODE_SYMBOL ucs2_charset; /* UCS-2 */ - # RECODE_SYMBOL libiconv_pivot; /* `libiconv' internal UCS */ - # RECODE_SYMBOL crlf_surface; /* for IBM PC machines */ - # RECODE_SYMBOL cr_surface; /* for Macintosh machines */ - # - # /* Preset qualities, to make step initialisation simpler. */ - # struct recode_quality quality_byte_reversible; - # struct recode_quality quality_byte_to_byte; - # struct recode_quality quality_byte_to_ucs2; - # struct recode_quality quality_byte_to_variable; - # struct recode_quality quality_ucs2_to_byte; - # struct recode_quality quality_ucs2_to_variable; - # struct recode_quality quality_variable_to_byte; - # struct recode_quality quality_variable_to_ucs2; - # struct recode_quality quality_variable_to_variable; - # }; - # - #/*--------------------------. - #| Description of a symbol. | - #`--------------------------*/ - # - #enum recode_symbol_type - # { - # RECODE_NO_SYMBOL_TYPE, /* missing value */ - # RECODE_CHARSET, /* visible in the space of charsets */ - # RECODE_DATA_SURFACE, /* this is a mere data surface */ - # RECODE_TREE_SURFACE /* this is a structural surface */ - # }; - # - #enum recode_data_type - # { - # RECODE_NO_CHARSET_DATA, /* the charset_table field is unused */ - # RECODE_STRIP_DATA, /* pool pointer and array of strips */ - # RECODE_EXPLODE_DATA /* explode variable length data */ - # }; - # - #struct recode_symbol - # { - # /* Chaining of all known symbols (charsets and surfaces). */ - # RECODE_SYMBOL next; - # - # /* Unique ordinal for this symbol, counted from zero. */ - # unsigned ordinal; - # - # /* Main name. */ - # const char *name; - # - # /* Type of table. */ - # enum recode_data_type data_type; - # - # /* Recoding table. */ - # void *data; - # - # /* Step for data..CHARSET transformation, if any, or NULL. */ - # struct recode_single *resurfacer; - # - # /* Step for CHARSET..data transformation, if any, or NULL. */ - # struct recode_single *unsurfacer; - # - # /* Non zero if this is an acceptable charset (not only a surface). */ - # enum recode_symbol_type type : 3; - # - # /* Non zero if this one should be ignored. */ - # bool ignore : 2; - # }; - # - #struct recode_surface_list - # { - # RECODE_CONST_SYMBOL surface; - # struct recode_surface_list *next; - # }; - # - #/*-------------------------------------------. - #| Description of a single step of recoding. | - #`-------------------------------------------*/ - # - #typedef bool (*Recode_init) PARAMS ((RECODE_STEP, RECODE_CONST_REQUEST, - # RECODE_CONST_OPTION_LIST, - # RECODE_CONST_OPTION_LIST)); - #typedef bool (*Recode_term) PARAMS ((RECODE_STEP, RECODE_CONST_REQUEST)); - #typedef bool (*Recode_transform) PARAMS ((RECODE_SUBTASK)); - #typedef bool (*Recode_fallback) PARAMS ((RECODE_SUBTASK, unsigned)); - # - #/* The `single' structure holds data needed to decide of sequences, and is - # invariant over actual requests. The `step' structure holds data needed for - # task execution, it may take care of fallback and option variance. */ - # - #struct recode_single - # { - # /* Chaining of all known single steps. */ - # struct recode_single *next; - # - # /* Charset before conversion. */ - # RECODE_SYMBOL before; - # - # /* Charset after conversion. */ - # RECODE_SYMBOL after; - # - # /* Cost for this single step only. */ - # short conversion_cost; - # - # /* Initial value for step_table. */ - # void *initial_step_table; - # - # /* Recoding quality. */ - # struct recode_quality quality; - # - # /* Initialisation handler, to be called before step optimisation. */ - # Recode_init init_routine; - # - # /* Transformation handler, for doing the actual recoding work. */ - # Recode_transform transform_routine; - # - # /* Default fallback for the step. Merely to implement `-s' option. */ - # Recode_fallback fallback_routine; - # }; - # - #enum recode_step_type - # { - # RECODE_NO_STEP_TABLE, /* the step_table field is unused */ - # RECODE_BYTE_TO_BYTE, /* array of 256 bytes */ - # RECODE_BYTE_TO_STRING, /* array of 256 strings */ - # RECODE_UCS2_TO_BYTE, /* hash from ucs2 to byte */ - # RECODE_UCS2_TO_STRING, /* hash from ucs2 to string */ - # RECODE_STRING_TO_UCS2, /* hash from ucs2 to string, reversed */ - # RECODE_COMBINE_EXPLODE, /* raw data for combining or exploding */ - # RECODE_COMBINE_STEP, /* special hash for combining */ - # RECODE_EXPLODE_STEP /* special hash for exploding */ - # }; - # - #struct recode_step - # { - # /* Charset before conversion. */ - # RECODE_SYMBOL before; - # - # /* Charset after conversion. */ - # RECODE_SYMBOL after; - # - # /* Recoding quality. */ - # struct recode_quality quality; - # - # /* Type of table. */ - # enum recode_step_type step_type; - # - # /* Recoding table. */ - # void *step_table; - # - # /* Step specific variables. */ - # void *local; - # - # /* Transformation handler, for doing the actual recoding work. */ - # Recode_transform transform_routine; - # - # /* Fallback for the step. */ - # Recode_fallback fallback_routine; - # - # /* Cleanup handler, to be called after the recoding is done. */ - # Recode_term term_routine; - # }; - # - #struct recode_option_list - # { - # const char *option; - # RECODE_OPTION_LIST next; - # }; - # - #/*------------------------------------------------------------------------. - #| A recoding request holds, among other things, a selected path among the | - #| available recoding steps, it so represents a kind of recoding plan. | - #`------------------------------------------------------------------------*/ - # - #struct recode_request - # { - # /* A request is always associated with a recoding system. */ - # RECODE_OUTER outer; - # - # /* By setting the following flag, the program will echo to stderr the - # sequence of elementary recoding steps needed to achieve the requested - # recoding. */ - # bool verbose_flag : 1; - # - # /* In `texte' charset, some countries use double quotes to mark diaeresis, - # while some other prefer colons. This field contains the diaeresis - # character for `texte' charset. Nominally set to a double quote, it can - # be forced to a colon. Those are the only two acceptable values. */ - # char diaeresis_char; - # - # /* If producing a recoding table in source form, there will be no actual - # recoding done, and consequently, the optimisation of step sequence can - # be attempted more aggressively. If the step sequence cannot be reduced - # to a single step, table production will fail. */ - # bool make_header_flag : 1; - # - # /* For `latex' charset, it is often convenient to recode the diacritics - # only, while letting other LaTeX code using backslashes unrecoded. In - # the other charset, one can edit text as well as LaTeX directives. */ - # bool diacritics_only : 1; - # - # /* For `ibmpc' charset, characters 176 to 223 are use to draw boxes. If - # this field is set, while getting out of `ibmpc', ASCII characters are - # selected so to approximate these boxes. */ - # bool ascii_graphics : 1; - # - # /* Array stating the sequence of conversions. */ - # RECODE_STEP sequence_array; - # size_t sequence_allocated; - # short sequence_length; - # - # /* Internal variables used while scanning request text. */ - # char *work_string; /* buffer space for generated work strings */ - # size_t work_string_length; /* length of work_string */ - # size_t work_string_allocated; /* allocated length of work_string */ - # const char *scan_cursor; /* next character to be seen */ - # char *scanned_string; /* buffer space to scan strings */ - # }; - # - #/*--------------------------------------------------------------------. - #| A recoding text is either an external file or an in memory buffer. | - #`--------------------------------------------------------------------*/ - # - #/* While the recoding is going on, FILE being non-NULL has precedence over - # BUFFER. Moreover, if NAME is not NULL at start of recoding, this is - # interpreted as a request for the library to open the named file, either - # in read or write mode, and also to close it afterwards. Standard input - # or output is denoted by NAME being non-NULL, but otherwise empty. - # - # If FILE is NULL in input mode, the in-memory read-only text extends from - # BUFFER to LIMIT. There is no clue about if the buffer has been allocated - # bigger. When CURSOR reaches LIMIT, there is no more data to get. If - # FILE is NULL in output mode, the in-memory text extends from BUFFER to - # CURSOR, but the buffer has been allocated until LIMIT. When CURSOR - # reaches LIMIT, the buffer should be reallocated bigger, as needed. */ - # - #struct recode_read_only_text - # { - # const char *name; - # FILE *file; - # const char *buffer; - # const char *cursor; - # const char *limit; - # }; - # - #struct recode_read_write_text - # { - # const char *name; - # FILE *file; - # char *buffer; - # char *cursor; - # char *limit; - # }; - # - #/* Tells how various passes are interconnected. */ - # - #enum recode_sequence_strategy - # { - # RECODE_STRATEGY_UNDECIDED, /* sequencing strategy is undecided yet */ - # RECODE_SEQUENCE_IN_MEMORY, /* keep intermediate recodings in memory */ - # RECODE_SEQUENCE_WITH_FILES, /* do not fork, use intermediate files */ - # RECODE_SEQUENCE_WITH_PIPE /* fork processes connected with `pipe(2)' */ - # }; - # - #/* Tells how to swap the incoming pair of bytes, while reading UCS-2. */ - # - #enum recode_swap_input - # { - # RECODE_SWAP_UNDECIDED, /* the text has not been read yet */ - # RECODE_SWAP_NO, /* no need to swap pair of bytes */ - # RECODE_SWAP_YES /* should swap incoming pair of bytes */ - # }; - # - #/*--------------------------------------------------------------------------. - #| A recoding subtask associates a particular recoding step to a given input | - #| text, for producing a corresponding output text. It also holds error | - #| related statistics for the execution of that step. | - #`--------------------------------------------------------------------------*/ - # - #struct recode_subtask - # { - # /* Task for which this subtask is an element. */ - # RECODE_TASK task; - # - # /* Step being executed by this subtask. */ - # RECODE_CONST_STEP step; - # - # /* Current input and output. */ - # struct recode_read_only_text input; - # struct recode_read_write_text output; - # - # /* Line count and character count in last line, both zero-based. */ - # unsigned newline_count; - # unsigned character_count; - # }; - # - ##define GOT_CHARACTER(Subtask) \ - # ((Subtask)->character_count++) - # - ##define GOT_NEWLINE(Subtask) \ - # ((Subtask)->newline_count++, (Subtask)->character_count = 0) - # - #/*--------------------------------------------------------------------------. - #| A recoding task associates a sequence of steps to a given input text, for | - #| producing a corresponding output text. It holds an array of subtasks. | - #`--------------------------------------------------------------------------*/ - # - #struct recode_task - # { - # /* Associated request. */ - # RECODE_CONST_REQUEST request; - # - # /* Initial input and final output. */ - # struct recode_read_only_text input; - # struct recode_read_write_text output; - # - # /* Tells how various recoding steps (passes) will be interconnected. */ - # enum recode_sequence_strategy strategy : 3; - # - # /* Produce a byte order mark on UCS-2 output, insist for it on input. */ - # bool byte_order_mark : 1; - # - # /* The input UCS-2 stream might have bytes swapped (status variable). */ - # enum recode_swap_input swap_input : 3; - # - # /* Error processing. */ - # /* ----------------- */ - # - # /* At this level, there will be failure. */ - # enum recode_error fail_level : 5; - # - # /* At this level, task should be interrupted. */ - # enum recode_error abort_level : 5; - # - # /* Maximum error level met so far (status variable). */ - # enum recode_error error_so_far : 5; - # - # /* Step being executed when error_so_far was last set. */ - # RECODE_CONST_STEP error_at_step; - # }; - # - #/* Specialities for some function arguments. */ - # - #/* For restricting charset lists. */ - # - #struct recode_known_pair - # { - # unsigned char left; /* first character in pair */ - # unsigned char right; /* second character in pair */ - # }; - # - #/*----------------------. - #| Various definitions. | - #`----------------------*/ - # - #typedef unsigned short recode_ucs2; - # - #/* Double tables are generated as arrays of indices into a pool of strips, - # each strip holds STRIP_SIZE UCS-2 characters. Some memory is saved by - # not allowing duplicate strips in the pool. A smaller strip size yields - # more duplicates and so, a smaller pool, but then, tables get longer - # because more strip indices are needed for each table. It is difficult to - # predict the optimal strip size. Tests made on 1997-09-22 showed that a - # strip size of 4 needs 27808 bytes total, 8 needs 22656, 16 needs 23584 - # and 32 needs 25568, so we decided to stick to a strip size of 8. Change - # $STRIP_SIZE in `doc/tables.pl' if you change the value here. */ - # - #/* "Are we speaking slips, strips or bars?" (of gold press'latinum :-) */ - ##define STRIP_SIZE 8 - # - #/* An struct strip_data is a pointer to a pool of strips, and an array - # of 256/STRIP_SIZE offsets for the start of strips into the pool, each strip - # describes STRIP_SIZE UCS-2 characters. A missing character in a strip is - # indicated by all 16 bits set. */ - #struct strip_data - # { - # const recode_ucs2 *pool; - # const short offset[256 / STRIP_SIZE]; - # }; - # - #struct ucs2_to_byte - # { - # recode_ucs2 code; /* UCS-2 value */ - # unsigned char byte; /* corresponding byte */ - # }; - # - #struct ucs2_to_string - # { - # recode_ucs2 code; /* UCS-2 value */ - # unsigned short flags; /* various flags */ - # const char *string; /* corresponding string */ - # }; - # - #/* Per module declarations. */ - # - ##ifdef __cplusplus - #extern "C" { - ##endif - # - #/* recode.c. */ - # - ##define ALLOC_SIZE(Variable, Size, Type) \ - # (Variable = (Type *) recode_malloc (outer, (Size)), Variable) - # - ##define ALLOC(Variable, Count, Type) \ - # ALLOC_SIZE (Variable, (Count) * sizeof (Type), Type) - # - ##define REALLOC(Variable, Count, Type) \ - # (Variable = (Type *) recode_realloc (outer, Variable, \ - # (Count) * sizeof(Type)), \ - # Variable) - # - #void recode_error PARAMS ((RECODE_OUTER, const char *, ...)); - #void recode_perror PARAMS ((RECODE_OUTER, const char *, ...)); - #void *recode_malloc PARAMS ((RECODE_OUTER, size_t)); - #void *recode_realloc PARAMS ((RECODE_OUTER, void *, size_t)); - # - #unsigned char *invert_table PARAMS ((RECODE_OUTER, const unsigned char *)); - #bool complete_pairs PARAMS ((RECODE_OUTER, RECODE_STEP, - # const struct recode_known_pair *, unsigned, - # bool, bool)); - #bool transform_byte_to_ucs2 PARAMS ((RECODE_SUBTASK)); - #bool init_ucs2_to_byte PARAMS ((RECODE_STEP, RECODE_CONST_REQUEST, - # RECODE_CONST_OPTION_LIST, - # RECODE_CONST_OPTION_LIST)); - #bool transform_ucs2_to_byte PARAMS ((RECODE_SUBTASK)); - # - #/* charname.c and fr-charname.c. */ - # - #const char *ucs2_to_charname PARAMS ((int)); - #const char *ucs2_to_french_charname PARAMS ((int)); - # - #/* charset.c. */ - # - #enum alias_find_type - #{ - # SYMBOL_CREATE_CHARSET, /* charset as given, create as needed */ - # SYMBOL_CREATE_DATA_SURFACE, /* data surface as given, create as needed */ - # SYMBOL_CREATE_TREE_SURFACE, /* tree surface as given, create as needed */ - # ALIAS_FIND_AS_CHARSET, /* disambiguate only as a charset */ - # ALIAS_FIND_AS_SURFACE, /* disambiguate only as a surface */ - # ALIAS_FIND_AS_EITHER /* disambiguate as a charset or a surface */ - #}; - # - #int code_to_ucs2 (RECODE_CONST_SYMBOL, unsigned); - #bool prepare_for_aliases PARAMS ((RECODE_OUTER)); - #RECODE_ALIAS declare_alias PARAMS ((RECODE_OUTER, - # const char *, const char *)); - #bool declare_implied_surface PARAMS ((RECODE_OUTER, RECODE_ALIAS, - # RECODE_CONST_SYMBOL)); - #bool make_argmatch_arrays PARAMS ((RECODE_OUTER)); - #RECODE_ALIAS find_alias PARAMS ((RECODE_OUTER, const char *, - # enum alias_find_type)); - #bool find_and_report_subsets PARAMS ((RECODE_OUTER)); - #bool decode_known_pairs PARAMS ((RECODE_OUTER, const char *)); - # - #/* combine.c. */ - # - ##define DONE NOT_A_CHARACTER - ##define ELSE BYTE_ORDER_MARK_SWAPPED - # - #bool init_explode PARAMS ((RECODE_STEP, RECODE_CONST_REQUEST, - # RECODE_CONST_OPTION_LIST, - # RECODE_CONST_OPTION_LIST)); - #bool explode_byte_byte PARAMS ((RECODE_SUBTASK)); - #bool explode_ucs2_byte PARAMS ((RECODE_SUBTASK)); - #bool explode_byte_ucs2 PARAMS ((RECODE_SUBTASK)); - #bool explode_ucs2_ucs2 PARAMS ((RECODE_SUBTASK)); - # - #bool init_combine PARAMS ((RECODE_STEP, RECODE_CONST_REQUEST, - # RECODE_CONST_OPTION_LIST, - # RECODE_CONST_OPTION_LIST)); - #bool combine_byte_byte PARAMS ((RECODE_SUBTASK)); - #bool combine_ucs2_byte PARAMS ((RECODE_SUBTASK)); - #bool combine_byte_ucs2 PARAMS ((RECODE_SUBTASK)); - #bool combine_ucs2_ucs2 PARAMS ((RECODE_SUBTASK)); - # - #/* freeze.c. */ - # - #void recode_freeze_tables PARAMS ((RECODE_OUTER)); - # - #/* libiconv.c. */ - # - #bool transform_with_libiconv PARAMS ((RECODE_SUBTASK)); - # - #/* mixed.c. */ - # - #bool transform_c_source PARAMS ((RECODE_TASK)); - #bool transform_po_source PARAMS ((RECODE_TASK)); - # - #/* outer.c. */ - # - #bool reversibility PARAMS ((RECODE_SUBTASK, unsigned)); - #RECODE_SINGLE declare_single - # PARAMS ((RECODE_OUTER, const char *, const char *, - # struct recode_quality, - # bool (*) (RECODE_STEP, RECODE_CONST_REQUEST, - # RECODE_CONST_OPTION_LIST, RECODE_CONST_OPTION_LIST), - # bool (*) (RECODE_SUBTASK))); - #bool declare_libiconv PARAMS ((RECODE_OUTER, const char *)); - #bool declare_explode_data PARAMS ((RECODE_OUTER, const unsigned short *, - # const char *, const char *)); - #bool declare_strip_data PARAMS ((RECODE_OUTER, struct strip_data *, - # const char *)); - # - #/* pool.c. */ - # - #extern const recode_ucs2 ucs2_data_pool[]; - # - #/* request.c. */ - # - #char *edit_sequence PARAMS ((RECODE_REQUEST, bool)); - # - #/* rfc1345.c. */ - # - #const char *ucs2_to_rfc1345 PARAMS ((recode_ucs2)); - # - #/* task.c. */ - # - ##if USE_HELPERS - #int get_byte_helper PARAMS ((RECODE_SUBTASK)); - ##endif - #void put_byte_helper PARAMS ((int, RECODE_SUBTASK)); - #bool recode_if_nogo PARAMS ((enum recode_error, RECODE_SUBTASK)); - #bool transform_byte_to_byte PARAMS ((RECODE_SUBTASK)); - #bool transform_byte_to_variable PARAMS ((RECODE_SUBTASK)); - # - #/* ucs.c. */ - # - #/* Replacement character for when correctly formed character has no - # equivalent. It is not used for ill-formed characters, however. */ - ##define REPLACEMENT_CHARACTER 0xFFFD - # - #/* Device for detecting if bytes are swapped. This value should appear first - # in UCS-2 files. */ - ##define BYTE_ORDER_MARK 0xFEFF - ##define BYTE_ORDER_MARK_SWAPPED 0xFFFE - # - #/* Never an UCS-2 character. */ - ##define NOT_A_CHARACTER 0xFFFF - # - #bool get_ucs2 PARAMS ((unsigned *, RECODE_SUBTASK)); - #bool get_ucs4 PARAMS ((unsigned *, RECODE_SUBTASK)); - #bool put_ucs2 PARAMS ((unsigned, RECODE_SUBTASK)); - #bool put_ucs4 PARAMS ((unsigned, RECODE_SUBTASK)); - # - ##ifdef __cplusplus - #} - ##endif - # - #/* Global macros specifically for `recode'. */ - # - #/* Giving a name to the ASCII character assigned to position 0. */ - ##define NUL '\0' - # - ##if USE_HELPERS - # - ## define get_byte(Subtask) \ - # get_byte_helper ((Subtask)) - # - ## define put_byte(Byte, Subtask) \ - # put_byte_helper ((Byte), (Subtask)) - # - ##else /* not USE_HELPERS */ - # - ## define get_byte(Subtask) \ - # ((Subtask)->input.file \ - # ? getc ((Subtask)->input.file) \ - # : (Subtask)->input.cursor == (Subtask)->input.limit \ - # ? EOF \ - # : (unsigned char) *(Subtask)->input.cursor++) - # - ## define put_byte(Byte, Subtask) \ - # ((Subtask)->output.file \ - # ? (putc ((char) (Byte), (Subtask)->output.file), 0) \ - # : (Subtask)->output.cursor == (Subtask)->output.limit \ - # ? (put_byte_helper ((int) (Byte), (Subtask)), 0) \ - # : (*(Subtask)->output.cursor++ = (Byte), 0)) - # - ##endif /* not USE_HELPERS */ - # - ##ifdef FLEX_SCANNER - # - ## if !INLINE_HARDER - # - ## undef put_byte - ## define put_byte(Byte, Subtask) \ - # put_byte_helper ((Byte), (Subtask)) - # - ## endif - # - ## define PUT_NON_DIACRITIC_BYTE(Byte, Subtask) \ - # if (request->diacritics_only) \ - # ECHO; \ - # else \ - # put_byte ((Byte), (Subtask)) - # - #/* ECHO may not have a (Subtask) argument, because some ECHO without argument - # is generated by Flex -- yet Vern tells me it won't happen if I inhibit - # the rule about default copying. Happily enough, within Flex, Subtask is - # `subtask' quite systematically, so it may be used as a constant, here. */ - ## define ECHO \ - # do { \ - # const char *cursor = yytext; int counter = yyleng; \ - # for (; counter > 0; cursor++, counter--) \ - # put_byte (*cursor, subtask); \ - # } while (false) - # - ##endif /* FLEX_SCANNER */ - class error(Exception): pass # Description of list formats. NO_FORMAT = RECODE_NO_FORMAT -DECIMAL_FORMAT = RECODE_DECIMAL_FORMAT -OCTAL_FORMAT = RECODE_OCTAL_FORMAT -HEXADECIMAL_FORMAT = RECODE_HEXADECIMAL_FORMAT -FULL_FORMAT = RECODE_FULL_FORMAT +#DECIMAL_FORMAT = RECODE_DECIMAL_FORMAT +#OCTAL_FORMAT = RECODE_OCTAL_FORMAT +#HEXADECIMAL_FORMAT = RECODE_HEXADECIMAL_FORMAT +#FULL_FORMAT = RECODE_FULL_FORMAT # Description of programming languages. -NO_LANGUAGE = RECODE_NO_LANGUAGE -LANGUAGE_C = RECODE_LANGUAGE_C -LANGUAGE_PERL = RECODE_LANGUAGE_PERL +#NO_LANGUAGE = RECODE_NO_LANGUAGE +#LANGUAGE_C = RECODE_LANGUAGE_C +#LANGUAGE_PERL = RECODE_LANGUAGE_PERL -# Recode library at OUTER level. +## Recode library at OUTER level. cdef class Outer: cdef RECODE_OUTER outer - def __init__(self): - self.outer = recode_new_outer(true) + def __init__(self, strict=False): + self.outer = recode_new_outer(strict) def __dealloc__(self): recode_delete_outer(self.outer) - def all_symbols(self): - ok = list_all_symbols(self.outer, NULL) - if not ok: - raise error + def all_charsets(self): + list = [] + cdef RECODE_SYMBOL symbol + symbol = self.outer.symbol_list + while symbol is not NULL: + if (symbol.type == RECODE_CHARSET + and symbol is not self.outer.libiconv_pivot + and symbol is not self.outer.data_symbol + and symbol is not self.outer.tree_symbol): + list.append(symbol.name) + symbol = symbol.next + return list + + def all_surfaces(self): + list = [] + cdef RECODE_SYMBOL symbol + symbol = self.outer.symbol_list + while symbol is not NULL: + if symbol.type != RECODE_CHARSET: + list.append(symbol.name) + symbol = symbol.next + return list def concise_charset(self, format=NO_FORMAT): ok = list_concise_charset(self.outer, NULL, format) @@ -868,6 +500,11 @@ cdef class Outer: if not ok: raise error + def set_libiconv(self, flag): + previous = self.outer.libiconv_pivot.ignore == 0 + self.outer.libiconv_pivot.ignore = int(not flag) + return previous + # Recode library at REQUEST level. cdef class Request: @@ -879,14 +516,34 @@ cdef class Request: def __dealloc__(self): recode_delete_request(self.request) - def scan_request(self, char *text): + def set_verbose(self, flag): + previous = self.request.verbose_flag != 0 + self.request.verbose_flag = int(flag) + return previous + + def scan(self, char *text): ok = recode_scan_request(self.request, text) if not ok: raise error + def pair_sequence(self): + list = [] + cdef recode_step step + cdef unsigned counter + for counter from 0 <= counter < self.request.sequence_length: + step = self.request.sequence_array[counter] + list.append((step.before.name, step.after.name)) + return list + def format_table(self, int language, char *charset): + cdef RECODE_OUTER outer + cdef bool saved + outer = self.request[0].outer + saved = outer.libiconv_pivot.ignore + outer.libiconv_pivot.ignore = true ok = recode_format_table( self.request, language, charset) + outer.libiconv_pivot.ignore = saved if not ok: raise error @@ -912,8 +569,9 @@ cdef class Request: # Lazy, all in one call. global_outer = Outer() +#global_outer.set_libiconv(False) def recode(char *text, char *string): request = Request(global_outer) - request.scan_request(text) + request.scan(text) return request.string(string) diff --git a/tests/ChangeLog b/tests/ChangeLog index b367fe1..f695108 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,7 @@ +2008-02-23 François Pinard + + * t90_bigauto.py: New, replacing bigauto.py. + 2008-02-22 François Pinard Swithching from Autotest to Pytest. @@ -13,7 +17,7 @@ * t40_testdump.py: Rewrite of testdump.m4. * t40_utf7.py: Rewrite of utf7.m4 and utf7-data. * t40_utf8.py: Rewrite of utf8.m4. - * t60_methods.py: Rewrite of methods.m4. + * t50_methods.py: Rewrite of methods.m4. * Makefile: Adjusted. 2005-03-06 François Pinard diff --git a/tests/Makefile.am b/tests/Makefile.am index b3adfbf..5555828 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -19,27 +19,14 @@ AUTOMAKE_OPTIONS = gnits -SUITE = african.m4 base64.m4 combine.m4 dumps.m4 lists.m4 methods.m4 \ -names.m4 quoted.m4 testdump.m4 utf7.m4 utf8.m4 +SUITE = t21_names.py t22_lists.py t25_subsets.py t30_base64.py \ +t30_dumps.py t30_quoted.py t40_african.py t40_combine.py t40_testdump.py \ +t40_utf7.py t40_utf8.py t50_methods.py t90_bigauto.py -EXTRA_DIST = common.py $(SUITE) bigauto.py +EXTRA_DIST = NOTES pytest common.py $(SUITE) PYTHON = python check-local: PATH=../src:$$PATH PYTHONPATH=$(srcdir) \ $(PYTHON) $(srcdir)/pytest $(srcdir)/t*.py - -bigtest: warning - PATH=../src:$$PATH $(PYTHON) $(srcdir)/bigauto.py - -bigtest-strict: warning - PATH=../src:$$PATH $(PYTHON) $(srcdir)/bigauto.py -s - -warning: - @echo 1>&2 '===============================================' - @echo 1>&2 "The \`bigauto' test may take hours. Be patient." - @echo 1>&2 '===============================================' - -clean-local: - rm -f input output big-s-off.res big-s-on.res diff --git a/tests/Makefile.in b/tests/Makefile.in index 4eff14e..cf83f37 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -197,10 +197,11 @@ target_alias = @target_alias@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = gnits -SUITE = african.m4 base64.m4 combine.m4 dumps.m4 lists.m4 methods.m4 \ -names.m4 quoted.m4 testdump.m4 utf7.m4 utf8.m4 +SUITE = t21_names.py t22_lists.py t25_subsets.py t30_base64.py \ +t30_dumps.py t30_quoted.py t40_african.py t40_combine.py t40_testdump.py \ +t40_utf7.py t40_utf8.py t50_methods.py t90_bigauto.py -EXTRA_DIST = common.py $(SUITE) bigauto.py +EXTRA_DIST = NOTES pytest common.py $(SUITE) PYTHON = python all: all-am @@ -304,7 +305,7 @@ maintainer-clean-generic: @echo "it deletes files that may require special tools to rebuild." clean: clean-am -clean-am: clean-generic clean-libtool clean-local mostlyclean-am +clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile @@ -359,9 +360,9 @@ uninstall-am: .MAKE: install-am install-strip .PHONY: all all-am check check-am check-local clean clean-generic \ - clean-libtool clean-local distclean distclean-generic \ - distclean-libtool distdir dvi dvi-am html html-am info info-am \ - install install-am install-data install-data-am install-dvi \ + clean-libtool distclean distclean-generic distclean-libtool \ + distdir dvi dvi-am html html-am info info-am install \ + install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-pdf install-pdf-am install-ps install-ps-am \ @@ -374,20 +375,6 @@ uninstall-am: check-local: PATH=../src:$$PATH PYTHONPATH=$(srcdir) \ $(PYTHON) $(srcdir)/pytest $(srcdir)/t*.py - -bigtest: warning - PATH=../src:$$PATH $(PYTHON) $(srcdir)/bigauto.py - -bigtest-strict: warning - PATH=../src:$$PATH $(PYTHON) $(srcdir)/bigauto.py -s - -warning: - @echo 1>&2 '===============================================' - @echo 1>&2 "The \`bigauto' test may take hours. Be patient." - @echo 1>&2 '===============================================' - -clean-local: - rm -f input output big-s-off.res big-s-on.res # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/tests/NOTES b/tests/NOTES index 93cdead..479a0cc 100644 --- a/tests/NOTES +++ b/tests/NOTES @@ -11,11 +11,8 @@ t40 - Individual charsets. Missing tests for atarist, next, ebcdic, tables, applemac, ibmpc, iconqnx, cdcnos, bangbang, ascii, flat, html, latex, texinfo, texte and utf16. -t60 - Inter-step mechanics. +t50 - Inter-step mechanics. -t80 - Big auto tests. -echo -echo "WARNING: The \`bigauto' test will be skipped, as it takes a long time to" -echo " complete. To launch it, get into the build \`tests/' directory" -echo " and do either \`make bigtest' or \`make bigtest-strict'. The" -echo " later forces \`-s' on all \`recode' calls." +t70 - Regression tests. + +t90 - Special tests. diff --git a/tests/pytest b/tests/pytest index 3bff838..49075fd 100755 --- a/tests/pytest +++ b/tests/pytest @@ -306,7 +306,7 @@ class Main: return instance = classe() if hasattr(instance, u'setup_class'): - self.delayed_setup_module = module.setup_class, classe + self.delayed_setup_module = instance.setup_class, classe for _, name, method, generator in sorted(collection): self.handle_function(prefix + u'/' + name, getattr(instance, name), generator, instance) diff --git a/tests/t60_methods.py b/tests/t50_methods.py similarity index 100% rename from tests/t60_methods.py rename to tests/t50_methods.py diff --git a/tests/bigauto.py b/tests/t90_bigauto.py old mode 100755 new mode 100644 similarity index 79% rename from tests/bigauto.py rename to tests/t90_bigauto.py index 8785d23..466a0bf --- a/tests/bigauto.py +++ b/tests/t90_bigauto.py @@ -4,6 +4,11 @@ # François Pinard , 1997. """\ +NOTE: This script has not been revised yet as a main program. Currently, +it is only meant as a part of the Recode test suite. + +-------------------------------------------------------------------------- + Produce statistics from the results of the bigauto check. Usage: bigauto [RECODE_OPTION]... [CHARSET_OPTION]... @@ -23,6 +28,39 @@ argument, all possible possible recodings are considered. """ import os, sys +import common + +class Test: + avoid_as_before = 'count-characters', 'dump-with-names', 'flat' + + def test_1(self): + if common.Recode is None: + raise common.SkipTest + self.outer = common.Recode.Outer(strict=False) + self.charsets = sorted(self.outer.all_charsets()) + for before in self.charsets: + if before not in self.avoid_as_before: + yield self.validate, before + + def test_2(self): + if common.Recode is None: + raise common.SkipTest + self.outer = common.Recode.Outer(strict=True) + self.charsets = sorted(self.outer.all_charsets()) + for before in self.charsets: + if before not in self.avoid_as_before: + yield self.validate, before + + def validate(self, before): + # As a compromise between being too terse or too verbose, we + # consider as a single test, one "before" against all "after"s. + # However, without a Recode module, we do not know how many + # "before"s exist, and the skip count is then be rather small. + print before + for after in self.charsets: + if after is not before: + request = common.Recode.Request(self.outer) + request.scan('%s..%s' % (before, after)) def main(*arguments): recode_options = [] @@ -32,40 +70,9 @@ def main(*arguments): recode_options.append(argument) else: charset_options.append(argument) - work_name = '/tmp/bigauto-data' - if os.path.exists(work_name): - os.remove(work_name) - create_data(work_name, recode_options, charset_options) report = Report() report.digest_data(file(work_name).readline) report.produce_report(sys.stdout.write) - os.remove(work_name) - -def create_data(name, recode_options, charset_options): - # Get the list of charsets. - if charset_options: - charsets = charset_options - else: - charsets = [] - for line in os.popen('recode -l'): - charset = line.split()[0] - if charset[0] in ':/': - continue - charsets.append(charset) - # Do the work, calling a subshell once per `before' value. - recode_call = "recode >%s 2>&1' % name, 'w').write - write('export LANGUAGE; LANGUAGE=C\n' - 'export LANG; LANG=C\n' - 'export LC_ALL; LC_ALL=C\n') - for after in charsets: - if after != before: - write("%s '%s..%s'\n" % (recode_call, before, after)) class Report: