--- /dev/null
+0.7: 7 May 2006
+- Made error handling much better by getting rid of at_eof and adding
+ last_read_result to take its place.
+
+0.6:
+- create project
--- /dev/null
+This code, like the re2c library, is released into the public domain.
+
+Nevertheless, please consider contributing any changes you make to
+this code or documentation back to the re2c project.
+
--- /dev/null
+# Doxyfile 1.3.7
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = "re2c Library"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY = docs
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 2 levels of 10 sub-directories under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of source
+# files, where putting all generated files in the same directory would otherwise
+# cause performance problems for the file system.
+
+CREATE_SUBDIRS = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch,
+# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en
+# (Japanese with English messages), Korean, Korean-en, Norwegian, Polish, Portuguese,
+# Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian.
+
+OUTPUT_LANGUAGE = English
+
+# This tag can be used to specify the encoding used in the generated output.
+# The encoding is not always determined by the language that is chosen,
+# but also whether or not the output is meant for Windows or non-Windows users.
+# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES
+# forces the Windows encoding (this is the default for the Windows binary),
+# whereas setting the tag to NO uses a Unix-style encoding (the default for
+# all platforms other than Windows).
+
+USE_WINDOWS_ENCODING = NO
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is used
+# as the annotated text. Otherwise, the brief description is used as-is. If left
+# blank, the following values are used ("$name" is automatically replaced with the
+# name of the entity): "The $name class" "The $name widget" "The $name file"
+# "is" "provides" "specifies" "contains" "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited
+# members of a class in the documentation of that class as if those members were
+# ordinary class members. Constructors, destructors and assignment operators of
+# the base classes will not be shown.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like the Qt-style comments (thus requiring an
+# explicit @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF = YES
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member
+# documentation.
+
+DETAILS_AT_TOP = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = YES
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources
+# only. Doxygen will then generate output that is more tailored for Java.
+# For instance, namespaces will be presented as packages, qualified scopes
+# will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING = YES
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# users are advised to set this option to NO.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text.
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT =
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp
+# *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm
+
+FILE_PATTERNS = *.c *.h *.re
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories
+# that are symbolic links (a Unix filesystem feature) are excluded from the input.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+
+EXCLUDE_PATTERNS =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+
+INPUT_FILTER =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES (the default)
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES (the default)
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION = YES
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET =
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
+# generated containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
+# probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH = 250
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader. This is useful
+# if you want to understand what is going on. On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_PREDEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed.
+
+PREDEFINED =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse the
+# parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base or
+# super classes. Setting the tag to NO turns the diagrams off. Note that this
+# option is superseded by the HAVE_DOT option below. This is only a fallback. It is
+# recommended to install and use dot, since it yields more powerful graphs.
+
+CLASS_DIAGRAMS = YES
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT = YES
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will
+# generate a call dependency graph for every global function or class method.
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command.
+
+CALL_GRAPH = YES
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found on the path.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS =
+
+# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_WIDTH = 1024
+
+# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height
+# (in pixels) of the graphs generated by dot. If a graph becomes larger than
+# this value, doxygen will try to truncate the graph, so that it fits within
+# the specified constraint. Beware that most browsers cannot cope with very
+# large images.
+
+MAX_DOT_GRAPH_HEIGHT = 1024
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes that
+# lay further from the root node will be omitted. Note that setting this option to
+# 1 or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that a graph may be further truncated if the graph's image dimensions are
+# not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH and MAX_DOT_GRAPH_HEIGHT).
+# If 0 is used for the depth value (the default), the graph is not depth-constrained.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE = NO
--- /dev/null
+# Makefile
+# Scott Bronson
+# 27 Dec 2004
+
+VERSION=0.7
+
+COPTS=-g -Wall -Werror
+
+CSRC+=$(wildcard re2c/*.c)
+CHDR+=$(wildcard re2c/*.h)
+
+all: examples
+ $(MAKE) -C test
+
+examples:
+ $(MAKE) -C examples
+
+test:
+ $(MAKE) -C test test
+
+clean:
+ rm -f numscan tags
+ $(MAKE) -C examples clean
+ $(MAKE) -C test clean
+
+distclean: clean
+ rm -rf docs
+
+# need to make clean before making docs.
+# otherwise doxygen will parse .c and .re files and get very confused.
+docs:
+ $(MAKE) clean
+ doxygen
+ perl -pi -e "s/VERSION/$(VERSION)/" docs/html/index.html
+
+release: distclean
+ if find . | egrep "~|/\\."; then echo "INVISIBLE OR BAD FILES! Won't finish." && exit 1; else exit 0; fi
+ rm -rf /tmp/libre2c-$(VERSION) /tmp/libre2c-docs-$(VERSION)
+ cp -r . /tmp/libre2c-$(VERSION)
+ perl -pi -e "s/VERSION/$(VERSION)/" /tmp/libre2c-$(VERSION)/re2c/*
+ (cd /tmp && tar zcf libre2c-$(VERSION).tar.gz libre2c-$(VERSION))
+ (cd /tmp/libre2c-$(VERSION) && make docs && mv docs/html ../libre2c-docs-$(VERSION) && rmdir docs)
+
+tags:
+ ctags -R
+
+.PHONY: docs clean distclean examples test release
--- /dev/null
+re2c Library README
+
+
+This is a library intended to make it easier to use re2c scanners.
+
+To use the library, just copy the re2c directory into your project tree.
+See examples/skeleton for a quick way to get started.
+
+To view the documentation:
+
+ $ make docs
+ $ www-browser docs/html/index.html
+
+You can also view the documentation online at http://u32.net/re2c
+
+All code in this library, like re2c itself, is released to the public domain.
+
--- /dev/null
+- Write a script that will update the re2c library in your project.
+ ./update-libre2c
+ That will download files directly out of the re2c repository
+ (maybe using the viewcvs interface)?
+
+- Readprocs should probably have a strerror call that translates
+ a negative return value into a human-readable string. I don't
+ see an easy way to add this to the API however.
+
+- Change scanners so that cursor and limit are automatic variables.
+ This should help the compiler optimize. Don't do this optimization
+ without performance numbers and profiling though!
+
+- Is there a way to decouple scanstates and readprocs? It would
+ certainly help when piggybacking a readproc if it didn't have
+ to know about the entire scanstate.
+ In addition, it would be good for the comparison scanner -- you
+ could set up TWO readprocs and then pass them to a comparison
+ routine that would return the final result.
+
+- Is there a way to standardize destructors? How can we tell
+ each of the procs that the scanstate is going
+ away and they need to release their resources?
+
+ Yes, we can set up a linked list of destructor elements.
+ If a readproc, scanproc, etc wants to have its destructor called, it
+ just adds a malloc'd linked list item. Then, on destruction:
+ so, struct { destproc *proc, destelem *next, void *refcon } destelem;
+ void destroy(destelem *dd) {
+ destroy(dd->next);
+ (*dd->proc)(dd->refcon);
+ free(dd);
+ }
+ But is this worth it? None of the builtins need this sort of
+ heavyweight functionality.
+
+- Make the re2c library handle push mode.
+
+
+Ideas about re2c itself:
+
+re2c should follow include files! How hard would it be to just
+ send files to the preprocessor before scanning them? Easy?
+ Just fork and process the child's stdout.
+Get rid of the one scanner per file limit. I should be able to
+ start a new scanner and use all declarations from scanners
+ defined previously in the file. That would even allow you
+ to put shared tokens into header files.
+Add an option to output a state transition graph to graphviz
+ (maybe it would be possible to parse re2c's output?)
+Allow C++ comments inside the re2c section
+capturing parentheses -- this would be really handy.
+
--- /dev/null
+all:
+ $(MAKE) -C skeleton
+ $(MAKE) -C numscan-coupled
+ $(MAKE) -C numscan-modular
+ $(MAKE) -C startstate
+ $(MAKE) -C compare
+
+clean:
+ $(MAKE) -C skeleton clean
+ $(MAKE) -C numscan-coupled clean
+ $(MAKE) -C numscan-modular clean
+ $(MAKE) -C startstate clean
+ $(MAKE) -C compare clean
--- /dev/null
+This directory contains a number of programs that demonstrate how
+to use the re2c library.
+
+
+skeleton: Get started quick by filling in the blanks.
+
+numscan-coupled: a very simple scanner and parser contained in a
+ single file. Its documentation is inteded to be used as a tutorial.
+
+numscan-modular: Separates numscan-coupled into separate a separate
+ scanner and parser. Also shows how to read data using fopen/fread.
+ Also intended to be used as a tutorial.
+
+startstate: How to use start states. Implements a scanner that recognizes
+ C-style comments by swiching start states depending on whether it's
+ currently parsing a comment or not.
+
+compare: The re2c library can be useful even if you don't actually need
+ a scanner. For example, recently I needed to compare two bytestreams
+ without reading either one entirely into memory. Readprocs are a
+ good way of implementing this surprisingly complex task.
--- /dev/null
+COPTS=-g -Wall -Werror
+
+CSRC+=$(wildcard ../../libre2c/*.c)
+CHDR+=$(wildcard ../../libre2c/*.h)
+
+
+all: compare
+
+# the perl command removes the #line directives because they
+# get in the way of source-level debuggers.
+%.c: %.re
+ re2c $(REOPTS) $< > $@
+ perl -pi -e 's/^\#line.*$$//' $@
+
+compare: cmpmain.c compare.c compare.h $(CHDR)
+ $(CC) -I../.. $(COPTS) -o $@ $(CSRC) cmpmain.c compare.c
+
+clean:
+ rm -f compare
--- /dev/null
+/** @file cmpmain.c
+ *
+ * This file demonstrates how to use the comparison scanner.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libre2c/scan.h"
+#include "libre2c/readfp.h"
+#include "compare.h"
+
+
+/** Performs the actual comparison.
+ */
+
+compare_result compare(FILE *fp1, FILE *fp2, int brief)
+{
+ // NOTE!!
+ // This code is meant to be simple so I take shortcuts like
+ // exiting without cleaning up. Sorry.
+
+ char buf1[39], buf2[8]; // these sizes are totally arbitrary
+ scanstate ssrec, *ss=&ssrec;
+ int n, cont;
+
+ scanstate_init(ss, buf1, sizeof(buf1));
+ readfp_attach(ss, fp1);
+ compare_attach(ss);
+
+ do {
+ n = fread(buf2, 1, sizeof(buf2), fp2);
+ if(n <= 0) {
+ if(ferror(fp2)) {
+ fprintf(stderr, "Error reading second file.\n");
+ exit(17);
+ }
+ if(feof(fp2)) {
+ break;
+ }
+ }
+
+ cont = compare_continue(ss, buf2, n);
+ if(cont < 0) {
+ fprintf(stderr, "Error reading first file.\n");
+ exit(17);
+ }
+ } while(cont == 0);
+
+ return brief ? compare_check(ss) : compare_check_newlines(ss);
+}
+
+
+/** Exercises the compare scanner.
+ *
+ * This program sort of like diff --brief. It just compares two files.
+ *
+ * Exit code is:
+ * - 0: the files were the same
+ * - 1: they differed.
+ *
+ * If you don't specify -b then you also get these codes:
+ * - 2: First file has 1 more newline than the second file
+ * - 3: Second file has 1 more newline than the first file.
+ *
+ * See compare_attach() and test_tiny_buffer().
+ */
+
+int main(int argc, char **argv)
+{
+ compare_result result;
+ FILE *fp1, *fp2;
+ int brief = 0;
+
+ // Check for the brief argument:
+ if(argv[1] && (!strcmp(argv[1],"-b") || !strcmp(argv[1],"--brief"))) {
+ brief = 1;
+ argc--, argv++;
+ }
+ // Ensure we have the right number of arguments.
+ if(argc != 3) {
+ fprintf(stderr, "You must specify two files to compare.\n");
+ exit(13);
+ }
+
+
+ // Open the first file unless it's - in which case we use stdin
+ fp1 = (strcmp(argv[1],"-") ? fopen(argv[1], "r") : stdin);
+ if(!fp1) {
+ fprintf(stderr, "Could not open %s\n", argv[2]);
+ exit(15);
+ }
+
+ // Open the second file unless it's - in which case we use stdin
+ fp2 = (strcmp(argv[2],"-") ? fopen(argv[2], "r") : stdin);
+ if(!fp2) {
+ fprintf(stderr, "Could not open %s\n", argv[2]);
+ exit(16);
+ }
+
+ // do the check
+ result = compare(fp1, fp2, brief);
+
+ if(fp1 != stdin) fclose(fp1);
+ if(fp2 != stdin) fclose(fp2);
+
+ // determine the result
+ switch(result) {
+ case cmp_full_match:
+ printf("Files are identical.\n");
+ return 0;
+ case cmp_no_match:
+ case cmp_ss_has_more_data:
+ printf("Files differ.\n");
+ return 1;
+ case cmp_ptr_has_extra_nl:
+ case cmp_ptr_has_more_nls:
+ printf("Second file has 1 more newline than the first file.\n");
+ return 2;
+ case cmp_ss_has_extra_nl:
+ printf("First file has 1 more newline than the second file.\n");
+ return 3;
+ default:
+ fprintf(stderr, "Unknown comparison code: %d??\n", (int)result);
+ exit(19);
+ }
+
+ fprintf(stderr, "How the heck did this get executed?\n");
+ exit(20);
+}
+
--- /dev/null
+/* compare.c
+ * Scott Bronson
+ * 31 Dec 2004
+ *
+ * This file, like re2c itself, is placed in the public domain.
+ */
+
+
+/** @file compare.c
+ *
+ * See \ref cmpscan for a description.
+ *
+ * \page cmpscan Comparison Scanner
+ *
+ * File comparison that uses libre2c without using re2c itself.
+ *
+ * This uses all re2c's mechanisms for creating buffers and loading
+ * them with data, but does not use the re2c executable to generate
+ * a scanner.
+ *
+ * This file keeps meticulous track of the newline status, causing
+ * it to be significantly more complex than it otherwise has to be.
+ * (this functionality is required by tmtest, the program that
+ * this example was ripped from).
+ *
+ * \section cmpdesc How to Use
+ *
+ * Comparing two totally unrelated streams of data can be surprisingly
+ * difficult. It takes a bit of effort to ensure each stream is
+ * buffered correctly and to compare the corresponding offsets from
+ * each buffer. re2c's readprocs and buffering make this task almost
+ * trivial.
+ *
+ * Here is how to use the comparison scanner:
+ *
+ * - Create the scanstate
+ * - Attach a readproc that provides the first stream of data.
+ * - Call compare_attach() to attach the comparison scanner.
+ * - Call compare_continue() multiple times to supply the second stream
+ * of data. (note: do NOT call scan_next_token() -- this would
+ * be nonsensical).
+ * - Call compare_check() or compare_check_newlines() to discover
+ * the result of the scan.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "compare.h"
+
+
+#define STATE (*(int*)&(ss)->scanref)
+
+/**
+ * Sets up the scanstate for a new comparison.
+ *
+ * Here's how you use the comparison code:
+ * - Create a scanstate attached to one stream and pass it to compare_start().
+ * - Read some data from the other stream and pass it to compare_continue().
+ * - Keep reading until you're out of data or compare_continue() returns 1.
+ * (no harm to keep passing data except that you're just wasting time)
+ * - When you're out of data, call compare_end to obtain the result.
+ */
+
+void compare_attach(scanstate *ss)
+{
+ // STATE is -1 while ss still has data. If not -1, then it tells
+ // us how many bytes ago it ran out of data.
+ STATE = cmp_in_progress;
+}
+
+
+// Returns true if and only if the stream has exactly one character
+// in it, a newline.
+
+static int has_extra_nl(const char *ptr, size_t len)
+{
+ if(len == 1 && ptr[0] == '\n') {
+ return 1;
+ }
+
+ return 0;
+}
+
+
+
+/**
+ * Feeds more bytes to the comparison engine. After you have attached
+ * a reader on one stream to the scanstate, you pass data from the other
+ * stream to this routine.
+ *
+ * @param ss The scanstate from compare_attach.
+ * @param ptr The start of the data to compare.
+ * @param len The number of bytes to compare, from 0 to MAXINT.
+ *
+ * @returns 0 if we still don't have an answer, 1 if the match
+ * has failed, or a negative value if the readproc returned an error.
+ */
+
+int compare_continue(scanstate *ss, const char *ptr, size_t len)
+{
+ int prev_had_nl = 0;
+ int n;
+
+ if(STATE != cmp_in_progress) {
+ if(len > 0) {
+ // if the only difference to this point was a \n, state
+ // is has_extra_nl. If there's more data, though, then no match.
+ STATE = cmp_no_match;
+ }
+ return 1;
+ }
+
+ while(len > 0) {
+ n = ss->limit - ss->cursor;
+ if(!n) {
+
+ // need to remember if the previous (and possibly the last)
+ // buffer ended in a newline so we can set the proper flag.
+ if(ss->cursor > ss->bufptr && ss->cursor[-1] == '\n') {
+ prev_had_nl = 1;
+ }
+
+ ss->token = ss->cursor;
+ n = (*ss->read)(ss);
+ ss->line += n;
+ if(n < 0) {
+ // the readproc returned an error
+ return n;
+ }
+ if(n == 0) {
+ // banged into the EOF
+ if(has_extra_nl(ptr,len)) {
+ if(prev_had_nl) {
+ STATE = cmp_ptr_has_more_nls;
+ } else {
+ STATE = cmp_ptr_has_extra_nl;
+ }
+ } else {
+ STATE = cmp_no_match;
+ }
+ return 1;
+ }
+ }
+
+ if(len < n) {
+ n = len;
+ }
+
+ // compare
+ if(memcmp(ptr, ss->cursor, n) != 0) {
+ STATE = cmp_no_match;
+ return 1;
+ }
+
+ ptr += n;
+ ss->cursor += n;
+ len -= n;
+ }
+
+ return 0;
+}
+
+
+/**
+ * Returns an appropriate code for how well matched the two streams
+ * are. Assumes that you're at EOF on the ptr stream.
+ *
+ * @returns
+ * - cmp_no_match: the streams are different
+ * - cmp_full_match: the streams are identical
+ * - cmp_ss_has_more_data: there's still more data from the readproc
+ * to process. If you're out of data to pass to compare_continue,
+ * this means the files differ. Otheriwse, keep calling compare_continue.
+ */
+
+compare_result compare_check(scanstate *ss)
+{
+ if(STATE != cmp_in_progress) {
+ return STATE ? cmp_no_match : cmp_full_match;
+ }
+
+ if(scan_is_finished(ss)) {
+ return cmp_full_match;
+ } else {
+ return cmp_ss_has_more_data;
+ }
+}
+
+
+/**
+ * This is a little complex... It checks the newline status of the
+ * streams. If one stream had exactly one more newline at this point
+ * than the other, it returns a custom value. Otherwise, if the streams
+ * are byte-for-byte identical, it returns cmp_full_match,
+ * otherwise compare_no_match.
+ *
+ * If you call this routine when neither stream is at EOF then it
+ * always returns compare_no_match because it can't be sure that
+ * either stream will match. The lesson? Only call this function
+ * when one of the streams is out of data.
+ *
+ * @param ss the comparison scanner set up by compare_attach().
+ *
+ * @returns
+ * - cmp_no_match: the streams are different
+ * - cmp_full_match: the streams are identical
+ * - cmp_ss_has_more_data: there's still more data from the readproc
+ * to process. If you're out of data to pass to compare_continue,
+ * this means the files differ. Otheriwse, keep calling compare_continue.
+ * - ::cmp_ptr_has_extra_nl, ::cmp_ptr_has_more_nls
+ * - ::cmp_ss_has_extra_nl
+ */
+
+compare_result compare_check_newlines(scanstate *ss)
+{
+ if(STATE != cmp_in_progress) {
+ return STATE;
+ }
+
+ if(scan_is_finished(ss)) {
+ return cmp_full_match;
+ } else if(has_extra_nl(ss->cursor,ss->limit-ss->cursor)) {
+ return cmp_ss_has_extra_nl;
+ } else {
+ return cmp_ss_has_more_data;
+ }
+}
+
+
+#ifdef ZUTEST
+
+#include "zutest.h"
+#include "libre2c/readmem.h"
+#include "libre2c/readrand.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+static void test_empty()
+{
+ scanstate ssrec, *ss=&ssrec;
+
+ readmem_init_str(ss, "");
+ compare_attach(ss);
+ AssertEq(compare_check(ss), cmp_full_match);
+
+ readmem_init_str(ss, "");
+ compare_attach(ss);
+ compare_continue(ss, "", 0);
+ AssertEq(compare_check(ss), cmp_full_match);
+}
+
+
+static void test_standard()
+{
+ scanstate ssrec, *ss=&ssrec;
+
+ readmem_init_str(ss, "123");
+ compare_attach(ss);
+ compare_continue(ss, "12", 2);
+ compare_continue(ss, "3", 1);
+ AssertEq(compare_check(ss), cmp_full_match);
+}
+
+
+static void test_large()
+{
+ char buf[BUFSIZ];
+ scanstate ssrec, *ss=&ssrec;
+ unsigned int seed = 47;
+ int num, i;
+
+ scanstate_init(ss, buf, BUFSIZ);
+ readrand_attach(ss, seed);
+ compare_attach(ss);
+ for(i=0; i<10; i++) {
+ num = rand_r(&seed);
+ compare_continue(ss, (char*)&num, sizeof(num));
+ }
+
+ // compare_check will never return cmp_full_match because
+ // the random reader will never run out of data.
+ AssertEq(compare_check(ss), cmp_ss_has_more_data);
+}
+
+
+static void test_strings(scanstate *ss, const char *s1, const char *s2)
+{
+ readmem_init(ss, s1, strlen(s1));
+ compare_attach(ss);
+ compare_continue(ss, s2, strlen(s2));
+}
+
+
+static compare_result check_newlines(const char *s1, const char *s2)
+{
+ scanstate ssrec, *ss=&ssrec;
+ test_strings(ss, s1, s2);
+ return compare_check_newlines(ss);
+}
+
+
+static void test_newlines()
+{
+ AssertEq(check_newlines("Unix\n", "Unix\n" ), cmp_full_match);
+ AssertEq(check_newlines("Unix", "Unix\n" ), cmp_ptr_has_extra_nl);
+ AssertEq(check_newlines("Unix\n", "Unix" ), cmp_ss_has_extra_nl);
+ AssertEq(check_newlines("Unix", "Unix" ), cmp_full_match);
+
+ AssertEq(check_newlines("Unix\n\n", "Unix\n" ), cmp_ss_has_extra_nl);
+ AssertEq(check_newlines("Unix\n", "Unix\n\n"), cmp_ptr_has_more_nls);
+
+ // empty buffers (except for newlines)
+ AssertEq(check_newlines("\n", "" ), cmp_ss_has_extra_nl);
+ AssertEq(check_newlines("", "\n" ), cmp_ptr_has_extra_nl);
+ AssertEq(check_newlines("\n\n", "" ), cmp_ss_has_more_data);
+ AssertEq(check_newlines("", "\n\n" ), cmp_no_match);
+}
+
+static void test_inc()
+{
+ // Tries to ensure that packetization won't mess us up.
+
+ scanstate ssrec, *ss=&ssrec;
+
+ readmem_init_str(ss, "12");
+ compare_attach(ss);
+ compare_continue(ss, "1", 1);
+ compare_continue(ss, "2", 1);
+ compare_continue(ss, "\n", 1);
+ AssertEq(compare_check_newlines(ss), cmp_ptr_has_extra_nl);
+ AssertEq(compare_check(ss), cmp_no_match);
+
+ readmem_init_str(ss, "123");
+ compare_attach(ss);
+ compare_continue(ss, "1", 1);
+ compare_continue(ss, "2", 1);
+ compare_continue(ss, "\n", 1);
+ AssertEq(compare_check_newlines(ss), cmp_no_match);
+ AssertEq(compare_check(ss), cmp_no_match);
+}
+
+
+static void test_inc_newlines()
+{
+ // Tries to ensure packetization won't mess up the newline checking.
+
+ scanstate ssrec, *ss=&ssrec;
+
+ readmem_init_str(ss, "123");
+ compare_attach(ss);
+ compare_continue(ss, "1", 1);
+ compare_continue(ss, "2", 1);
+ compare_continue(ss, "3", 1);
+ compare_continue(ss, "\n", 1);
+ AssertEq(compare_check_newlines(ss), cmp_ptr_has_extra_nl);
+
+ readmem_init_str(ss, "123\n");
+ compare_attach(ss);
+ compare_continue(ss, "1", 1);
+ compare_continue(ss, "2", 1);
+ compare_continue(ss, "3", 1);
+ AssertEq(compare_check_newlines(ss), cmp_ss_has_extra_nl);
+
+ readmem_init_str(ss, "");
+ compare_attach(ss);
+ compare_continue(ss, "\n", 1);
+ AssertEq(compare_check_newlines(ss), cmp_ptr_has_extra_nl);
+
+ readmem_init_str(ss, "\n");
+ compare_attach(ss);
+ AssertEq(compare_check_newlines(ss), cmp_ss_has_extra_nl);
+}
+
+
+// readmem_buffered should probably be generalized into a generic
+// readproc. It reads data from a memory block into the scanner
+// buffer. The one problem is that it requires a global variable,
+// readmem_buffered_remaining. No need for the global if we were
+// to use a null to terminate the buffer... that's probably what
+// we should do. TODO TODO turn this code into a builtin readproc.
+
+
+int readmem_buffered_remaining;
+
+static ssize_t readmem_buffered(scanstate *ss)
+{
+ int avail = read_shiftbuf(ss);
+ Assert(avail);
+
+ if(readmem_buffered_remaining == 0) {
+ // we return EOF once.
+ readmem_buffered_remaining = -1;
+ return 0;
+ }
+
+ AssertPositive(readmem_buffered_remaining);
+ if(readmem_buffered_remaining < avail) {
+ avail = readmem_buffered_remaining;
+ }
+
+ memcpy((void*)ss->limit, ss->readref, avail);
+ ss->limit += avail;
+
+ ss->readref = (void*)((char*)ss->readref + avail);
+ readmem_buffered_remaining -= avail;
+ AssertGe(readmem_buffered_remaining, 0);
+
+ return avail;
+}
+
+scanstate* readmem_buffered_attach(scanstate *ss, const char *data, size_t len)
+{
+ ss->readref = (void*)data;
+ ss->read = readmem_buffered;
+ readmem_buffered_remaining = len;
+ return ss;
+}
+
+
+
+// The point of this test is that it requires a lot of buffer refilling.
+// And that the buffer being passed to compare_continue is LARGER than
+// the scanner buffer.
+
+static void test_tiny_buffer()
+{
+ const char data[] =
+ "12345678123456781234567812345678""12345678123456781234567812345678"
+ "12345678123456781234567812345678""12345678123456781234567812345678"
+ "12345678123456781234567812345678""12345678123456781234567812345678"
+ "12345678123456781234567812345678""12345678123456781234567812345678";
+
+ const char buffer[5];
+ scanstate ssrec, *ss=&ssrec;
+ compare_result result;
+ int cont;
+
+ scanstate_init(ss, buffer, sizeof(buffer));
+ readmem_buffered_attach(ss, data, sizeof(data)-1);
+ compare_attach(ss);
+
+ cont = compare_continue(ss, data, sizeof(data)-1);
+ if(cont != 0) {
+ Fail("compare_continue incorrectly reported a difference: %d!", cont);
+ }
+
+ result = compare_check(ss);
+ if(result != cmp_full_match) {
+ Fail("compare_check incorrectly reported %d instead of a full match\n", (int)result);
+ }
+}
+
+
+zutest_proc compare_tests[] = {
+ test_empty,
+ test_standard,
+ test_large,
+ test_newlines,
+ test_inc,
+ test_inc_newlines,
+ test_tiny_buffer,
+ NULL
+};
+
+#endif
+
--- /dev/null
+/* compare.h
+ * Scott Bronson
+ * 31 Dec 2004
+ *
+ * This file, like re2c, is released into the public domain.
+ */
+
+#include "libre2c/scan.h"
+
+
+/**
+ * Unless you get a return value of cmp_full_match, the streams
+ * were not exactly equal.
+ *
+ * As with everything else, equality is not absolute. There are
+ * a number of codes that
+ */
+
+typedef enum {
+ cmp_in_progress = -1, ///< internal state; will never be returned.
+ cmp_full_match = 0, ///< data matches exactly
+ cmp_no_match, ///< data doesn't match at all
+ cmp_ptr_has_extra_nl, ///< ss doesn't end in a newline, ptr ends in a single newline, otherwise they are identical.
+ cmp_ptr_has_more_nls, ///< ss ends in a newline, ptr ends in one more newline than ss, otheriwse they are identical.
+ cmp_ss_has_extra_nl, ///< ss exactly matches ptr, except that ss has an extra newline.
+ cmp_ss_has_more_data ///< ptr and ss matched up to now but we haven't seen the EOF on ss so we can't be sure the two files are exactly equal.
+} compare_result;
+
+
+/** Sets the scanner up to be a comparison scanner.
+ * @param the scanner to attach the comparison engine to.
+ */
+
+void compare_attach(scanstate *ss);
+
+/** Feeds data to the comparison.
+ *
+ * The comparison scanner compares the data that you pass in
+ * with the data supplied by its readproc.
+ *
+ * @param ss the comparison scanner set up by compare_attach().
+ * @param ptr A pointer to the next block of data to compare.
+ * @param len The size in bytes of the data pointed to by ptr.
+ * @returns 0 if the match is in progress, 1 if the files didn't
+ * match, or a negative number if the readproc fails.
+ */
+
+int compare_continue(scanstate *ss, const char *ptr, size_t len);
+
+/** Final check to see if the files were equal.
+ *
+ * This tells the scanner that we have no more data to feed it.
+ *
+ * @returns cmp_no_match, cmp_full_match, or cmp_ss_has_more_data.
+ * cmp_ss_has_more_data is returned if you call compare continue
+ * but there is more data in the scan buffer (i.e. the stream
+ * attached to the readproc hasn't yet hit EOF). Of course, if
+ * the second stream is at EOF but compare_continue returns
+ * cmp_ss_has_more_data, this means that the two streams do
+ * not match.
+ *
+ * There is no harm
+ * to calling compare_check() multiple times. It will just
+ * keep returning cmp_ss_has_more_data until the first stream
+ * hits eof.
+ */
+
+compare_result compare_check(scanstate *ss);
+
+
+/** Like compare_check() but checks for newline status too.
+ *
+ * compare_check_newlines()
+ * determines if the files would have been equal except
+ * for trailing newlines.
+ *
+ * If one file has a trailing newline and the other doesn't,
+ * compare_check() will report the files as being differnt.
+ * However, many programs need to know if the files WOULD
+ * HAVE matched if the trailing newlines didn't matter.
+ * Thus the compare_check_newlines() routine.
+
+ @returns one of:
+ - cmp_full_match -- see compare_continue()
+ - cmp_no_match -- see compare_continue()
+ - cmp_ptr_has_extra_nl -- the first stream (attached to the readproc) doesn't end in a newline, but the second stream ends in a single newline. Except for that, they are identical.
+ - cmp_ptr_has_more_nls -- the first stream ends in a newline AND the second stream ends in one more newline than ss. Otheriwse they are identical.
+ - cmp_ss_has_extra_nl -- the first stream exactly matches ptr, except that it has one extra newline that the second stream doesn't.
+ - cmp_ss_has_more_data -- the first stream is not yet at EOF.
+
+ */
+
+compare_result compare_check_newlines(scanstate *ss);
+
+
+#ifdef ZUTEST
+#include "zutest.h"
+extern zutest_proc compare_tests[];
+#endif
--- /dev/null
+$compare
+STDERR:
+You must specify two files to compare.
--- /dev/null
+$compare arg1
+STDERR:
+You must specify two files to compare.
--- /dev/null
+MKFILE file <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+$compare $file - <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+STDOUT:
+Files are identical.
--- /dev/null
+MKFILE file1 <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+MKFILE file2 <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+$compare $file1 $file2
+STDOUT:
+Files are identical.
--- /dev/null
+MKFILE file <<-EOL
+ Blah
+EOL
+
+$compare $file - <<-EOL
+ Bloh
+EOL
+
+STDOUT:
+Files differ.
--- /dev/null
+MKFILE file <<EOL
+Bogus
+EOL
+
+echo -n Bogus | $compare $file -
+
+STDOUT:
+First file has 1 more newline than the second file.
--- /dev/null
+MKFILE file <<EOL
+Bogus
+EOL
+
+echo -n Bogus | $compare - $file
+
+STDOUT:
+Second file has 1 more newline than the first file.
--- /dev/null
+MKFILE file <<-EOL
+ This is
+ some
+ garbage.
+
+EOL
+
+$compare $file - <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+STDOUT:
+First file has 1 more newline than the second file.
--- /dev/null
+MKFILE file <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+$compare $file - <<-EOL
+ This is
+ some
+ garbage.
+
+EOL
+
+STDOUT:
+Second file has 1 more newline than the first file.
--- /dev/null
+MKFILE file <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+$compare --brief $file - <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+STDOUT:
+Files are identical.
--- /dev/null
+MKFILE file <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+$compare --brief $file - <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+STDOUT:
+Files are identical.
--- /dev/null
+MKFILE file <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+$compare --brief $file - <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+STDOUT:
+Files are identical.
--- /dev/null
+MKFILE file <<-EOL
+ This is
+ some
+ garbage.
+
+EOL
+
+$compare -b $file - <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+STDOUT:
+Files differ.
--- /dev/null
+MKFILE file <<-EOL
+ This is
+ some
+ garbage.
+EOL
+
+$compare --brief $file - <<-EOL
+ This is
+ some
+ garbage.
+
+EOL
+
+STDOUT:
+Files differ.
--- /dev/null
+You need tmtest to run the testfiles in this directory.
+ http://tmtest.berlios.de
+
+tmtest is still very very prerelease. You probably don't want to
+use it until it reaches 0.98 or so.
+
+
+TODO: should write functional tests to ensure that this code handles
+ read errors correctly. That requires a custom readproc that
+ piggybacks onto the original readproc, returns a certain number
+ of bytes, then returns an error. Not hard, but I won't do it
+ tonight...
--- /dev/null
+# This is a tmtest configuration file.
+# For more on tmtest, see http://tmtest.berlios.de
+
+# Tell the testfiles where to find the compare executable.
+compare="$MYDIR/compare"
+
--- /dev/null
+COPTS=-g -Wall -Werror
+
+CSRC+=$(wildcard ../../libre2c/*.c)
+CHDR+=$(wildcard ../../libre2c/*.h)
+
+
+all: numscan
+
+numscan.c: numscan.re
+ re2c $(REOPTS) $< > $@
+ perl -pi -e 's/^\#line.*$$//' $@
+
+numscan: numscan.c $(CHDR)
+ $(CC) -I ../.. $(COPTS) -o $@ $< $(CSRC)
+
+clean:
+ rm -f numscan.c numscan
--- /dev/null
+/* numscan.re
+ * Scott Bronson
+ * 27 Dec 2004
+ */
+
+/** @file numscan.re
+ *
+ * Example that demonstrates how to scan data from a string constant.
+ *
+ * In this example, a simple scanner is embedded directly in the file that
+ * uses it. See modnumscan.re to see how the scanner can be factored
+ * out so that any file may use it.
+ *
+ * This file needs to be processed by re2c, http://re2c.org
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libre2c/scan.h"
+#include "libre2c/readmem.h"
+
+
+const char *data = "Zero=0000\none=1 two is 22222222\netc.\n";
+
+/** These are the tokens that the numscan scanner recognizes. */
+enum numscan_tokens {
+ EOFTOK, ///< re2c scanners always return 0 when they hit the EOF.
+ NUMBER, ///< a number is a consecutive string of digits.
+ STRING, ///< a string is anything that isn't a number or a newline.
+ NEWLINE, ///< a single newline (so tokens don't span multiple lines).
+};
+const char *tokname[] = { "EOF", "NUM", "STR", "NEWLINE"};
+
+
+/** This is the re2c scanner. */
+
+int numscan(scanstate *ss)
+{
+ scanner_enter(ss);
+
+/*!re2c
+[0-9]+ { return NUMBER; }
+[^0-9\n]+ { return STRING; }
+[\n] { ss->line++; return NEWLINE; }
+*/
+}
+
+
+int main()
+{
+ scanstate ss;
+ int tok;
+
+ // Initialize the scanner to read from this string constant.
+ readmem_init_str(&ss, data);
+ ss.line = 1;
+
+ // Pull all the tokens from the scanner
+ do {
+ tok = numscan(&ss);
+ if(tok < 0) {
+ printf("Scanner returned an error: %d\n", tok);
+ break;
+ }
+
+ printf("%2d: %-12s",ss.line, tokname[tok]);
+ if(tok != NEWLINE) {
+ // don't want to actually print the text of the newline
+ // token -- it makes the output ugly.
+ printf("<<%.*s>>", scan_token_length(&ss),
+ scan_token_start(&ss));
+ }
+ printf("\n");
+ } while(tok);
+
+ // Because we allocated everything on the stack, there's
+ // nothing to free.
+
+ return 0;
+}
+
+
+
+/** \page nscp Numscan Coupled Example
+
+\section numscancoupled Numscan Coupled
+
+In this example, we'll build a scanner and hook it up to scan from
+a C string. The next tutorial, \ref numscanmodular
+will turn this oversimplified example into a full-blown re2c scanner
+and parser.
+
+You can read the complete example in numscan.re (click on the source code link).
+
+\section nsccreate 1. Write the Scanner
+
+Here is the re2c code that we'll use. It groups contiguous
+number characters and contiguous non-number characters.
+
+<pre>
+ [0-9]+ { return NUMBER; }
+ [^0-9\\n]+ { return STRING; }
+ [\\n] { return NEWLINE; }
+</pre>
+
+To turn this re2c code into a scanner, put it in a ::scanproc function and
+insert a call to scanner_enter() at the beginning. It's that easy.
+
+<pre>
+ int numscan(scanstate *ss)
+ {
+ scanner_enter(ss);
+
+ /-!re2c
+ [0-9]+ { return NUMBER; }
+ [^0-9\\n]+ { return STRING; }
+ [\\n] { return NEWLINE; }
+ -/
+ }
+</pre>
+
+\section nscsetup 2. Set Up the Scanner
+
+The ::scanstate data structure stores all state for a single scan run.
+We'll store it as an automatic variable:
+
+<pre>
+ void main()
+ {
+ scanstate ss;
+</pre>
+
+Now we need to decide how to feed the scanner with data.
+The re2c library uses ::readproc routines to do this.
+In this example we'll use readmem.h to scan from a C string constant.
+Normally we would initialize the scanstate and then attach
+the desired readproc:
+
+<pre>
+ scanstate_init(&ss, NULL, 0);
+ readmem_attach(&ss, data, strlen(data));
+</pre>
+
+However, scanning from a string constant is a common enough task that
+the re2c library includes a
+macro that combines this into a single call:
+
+<pre>
+ readmem_init_str(&ss, data);
+</pre>
+
+\section nscuse 3. Use the Scanner
+
+Simply call the scanner until it returns either 0 indicating EOF
+or a negative number indicating an error.
+The meaning of the error depends on the readproc being used.
+
+<pre>
+ do {
+ tok = numscan(&ss);
+ if(tok < 0) {
+ printf("Scanner returned an error: %d\n", tok);
+ break;
+ }
+</pre>
+
+Now we need to do something with the tokens.
+This example just prints them out:
+
+<pre>
+ printf("%2d: %-12s", ss.line, tokname[tok]);
+ if(tok != NEWLINE) {
+ // don't want to actually print the text of the newline
+ // token because it makes the output look ugly.
+ printf("<<%.*s>>", token_length(&ss), token_start(&ss));
+ }
+ printf("\n");
+
+ } while(tok);
+</pre>
+
+Now read \ref numscanmodular to see how to split the monolithic
+scanner/parser we just built into a modular scanner and a parser
+that calls it.
+
+*/
+
+// vi:syn=c
--- /dev/null
+# Ensure that the numscan example program produces the correct result.
+
+$MYDIR/numscan
+
+STDOUT:
+ 1: STR <<Zero=>>
+ 1: NUM <<0000>>
+ 2: NEWLINE
+ 2: STR <<one=>>
+ 2: NUM <<1>>
+ 2: STR << two is >>
+ 2: NUM <<22222222>>
+ 3: NEWLINE
+ 3: STR <<etc.>>
+ 4: NEWLINE
+ 4: EOF <<>>
--- /dev/null
+COPTS=-g -Wall -Werror
+
+CSRC+=$(wildcard ../../libre2c/*.c)
+CHDR+=$(wildcard ../../libre2c/*.h)
+
+
+all: modnumscan
+
+# the perl command removes the #line directives because they
+# get in the way of source-level debuggers.
+%.c: %.re
+ re2c $(REOPTS) $< > $@
+ perl -pi -e 's/^\#line.*$$//' $@
+
+
+modnumscan: modnumscanner.c modnumparser.c $(CHDR)
+ $(CC) -I ../.. $(COPTS) -o $@ $(CSRC) modnumscanner.c modnumparser.c
+
+clean:
+ rm -f modnumscan modnumscanner.c
--- /dev/null
+/** @file modnumparser.c
+ *
+ * This file demonstrates how to parse tokens from a modular scanner.
+ * It's very much like numscan-coupled except:
+ * - does not contain its scanner -- it attaches it
+ * - reads data from a file rather than a memory block
+ * - uses a dynamically-allocated scanstate
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libre2c/scan.h"
+#include "libre2c/readfp.h"
+#include "modnumscanner.h"
+
+
+const char *tokname[] = { "EOF", "NUM", "STR", "NEWLINE"};
+
+
+int main(int argc, char **argv)
+{
+ int tok;
+ char buffer[4096];
+ scanstate scanner, *ss=&scanner;
+
+ scanstate_init(ss, buffer, sizeof(buffer));
+ readfp_attach(ss, stdin);
+ modnumscan_attach(ss);
+
+ // Pull all the tokens from the scanner
+ do {
+ tok = scan_next_token(ss);
+ if(tok < 0) {
+ printf("Scanner returned an error: %d\n", tok);
+ break;
+ }
+
+ printf("%2d: %-12s", ss->line, tokname[tok]);
+ if(tok != NEWLINE) {
+ // don't want to actually print the text of the newline
+ // token -- it makes the output ugly.
+ printf("<<%.*s>>", scan_token_length(ss),
+ scan_token_start(ss));
+ }
+ printf("\n");
+ } while(tok);
+
+ // you only call readfp_close if you previously called readfp_open.
+ // We didn't, so there's nothing to deallocate.
+
+ return 0;
+}
+
+// vi:syn=c
--- /dev/null
+/** @file modnumscanner.h
+ *
+ * This file defines the interface to the numscanner.
+ */
+
+struct scanstate;
+
+
+/** These are the tokens that the numscan scanner recognizes. */
+enum modnumscan_tokens {
+ EOFTOK, ///< re2c scanners always return 0 when they hit the EOF.
+ NUMBER, ///< a number is a consecutive string of digits.
+ STRING, ///< a string is anything that isn't a number or a newline.
+ NEWLINE, ///< a single newline (so tokens don't span multiple lines).
+};
+
+
+/** This prepares the given scanstate to be a numscanner */
+scanstate* modnumscan_attach(scanstate *ss);
+
--- /dev/null
+/** @file modnumscanner.re
+ *
+ * This shows how to create a modular scanner. Modular scanners
+ * typicalliy consist of a single public routine to prepare a
+ * scanner. After that, tokens are retrieved using scan_next_token().
+ *
+ * This file needs to be processed by re2c, http://re2c.org
+ */
+
+#include "libre2c/scan.h"
+#include "modnumscanner.h"
+
+
+/**
+ * This routine defines the scanner itself.
+ */
+
+static int modnumscan_start(scanstate *ss)
+{
+ scanner_enter(ss);
+
+/*!re2c
+[0-9]+ { return NUMBER; }
+[^0-9\n]+ { return STRING; }
+[\n] { ss->line++; return NEWLINE; }
+*/
+}
+
+
+/**
+ * This routine turns the given scanstate structure into a numscanner.
+ */
+
+scanstate* modnumscan_attach(scanstate *ss)
+{
+ if(ss) {
+ ss->state = modnumscan_start;
+ ss->line = 1;
+ }
+
+ return ss;
+}
+
+
+/** \page nscm Numscan Modular Example
+
+\section numscanmodular Numscan Modular
+
+In this example, we'll take the scanner that we built in
+\ref numscancoupled and make it modular.
+
+This example consists of three files:
+- modnumscanner.h
+- modnumscanner.re
+- modnumparser.c
+
+\section nsmheader 1. Create the Header File
+
+The header file defines all the tokens that the scanner uses.
+
+<pre>
+ enum modnumscan_tokens {
+ EOFTOK, ///< re2c scanners always return 0 when they hit the EOF.
+ NUMBER, ///< a number is a consecutive string of digits.
+ STRING, ///< a string is anything that isn't a number or a newline.
+ NEWLINE, ///< a single newline (so tokens don't span multiple lines).
+ };
+</pre>
+
+It also declares the routine used to attach the scanner to a
+::scanstate structure.
+
+<pre>
+ struct scanstate;
+ scanstate* modnumscan_attach(scanstate *ss);
+</pre>
+
+Notably absent is any sort of routine to actually return tokens.
+Instead, you call scan_next_token(). This
+allows the scanner to define its own \ref startstates.
+
+\section nsmscanner 2. Create the Scanner
+
+The scanner is contained in an .re file. First, we need to
+include scan.h and the header file for our scanner.
+
+<pre>
+ # include "libre2c/scan.h"
+ # include "modnumscanner.h"
+</pre>
+
+The scanner itself is exactly the same as the one we created
+in \ref numscancoupled.
+
+<pre>
+ static int modnumscan_start(scanstate *ss)
+ {
+ scanner_enter(ss);
+
+ /-!re2c
+ [0-9]+ { return NUMBER; }
+ [^0-9\\n]+ { return STRING; }
+ [\\n] { ss->line++; return NEWLINE; }
+ -/
+ }
+</pre>
+
+Finally, we need to define the routine that attaches this scanner
+to the scanstate. The scanstate::state field contains the entrypoint
+for the scanner. The simplest attach routine just sets ss->state
+to its entrypoint. Read \ref linenos for a discussion of line numbers.
+Although this scanner is too simple to need it, it could also use the
+scanstate::scanref field for whatever it wants.
+
+<pre>
+ scanstate* modnumscan_attach(scanstate *ss)
+ {
+ if(ss) {
+ ss->state = modnumscan_start;
+ ss->line = 1;
+ }
+
+ return ss;
+ }
+</pre>
+
+There. Our scanner is complete. To summarize, you just needed to:
+- Create a header file containing the token definitions and attach routine prototype(s).
+- Create a .re file containing the definitions of the re2c scanner and the attach routine(s).
+
+\section nsmparser 3. Create the Parser
+
+Our parser will read data from stdin. It first allocates the scan
+buffer and the scanner on the stack. ss is just a convenience variable
+so that later, if we decide to dynamically allocate the scanstate using
+a function such as readfp_open(), we don't have to change all our
+dereferences from . to ->.
+
+<pre>
+ char buffer[4096];
+ scanstate scanner, *ss=&scanner;
+</pre>
+
+First we initialize the scanstate, telling it what scan buffer to use.
+Then we attach a reader (readfp offers good portability) and the
+scanner that we created in the previous step.
+
+<pre>
+ scanstate_init(ss, buffer, sizeof(buffer));
+ readfp_attach(ss, stdin);
+ modnumscan_attach(ss);
+</pre>
+
+Scanning is exactly the same as the \ref numscancoupled example.
+
+<pre>
+ do {
+ tok = scan_next_token(ss);
+ if(tok < 0) { ... }
+ // process the token
+ } while(tok);
+</pre>
+
+And we're done. Because we allocated everything on the stack and
+didn't open any files, there's nothing to clena up. If we had
+used readfp_open to open the file and allocate a scanner, however,
+we would need to call readfp_close() here.
+
+*/
+
+// vi:syn=c
+
--- /dev/null
+# Ensures that a file consisting of a single newline works.
+
+$test <<EOL
+one: 1
+two: 22
+eight: 88888888 ten: 10 00
+EOL
+
+STDOUT:
+ 1: STR <<one: >>
+ 1: NUM <<1>>
+ 2: NEWLINE
+ 2: STR <<two: >>
+ 2: NUM <<22>>
+ 3: NEWLINE
+ 3: STR <<eight: >>
+ 3: NUM <<88888888>>
+ 3: STR << ten: >>
+ 3: NUM <<10>>
+ 3: STR << >>
+ 3: NUM <<00>>
+ 4: NEWLINE
+ 4: EOF <<>>
--- /dev/null
+# Ensures that a file consisting of a single newline works.
+
+$test < /dev/null
+
+STDOUT:
+ 1: EOF <<>>
--- /dev/null
+# Ensures that a file consisting of a single newline works.
+
+echo | $test
+
+STDOUT:
+ 2: NEWLINE
+ 2: EOF <<>>
--- /dev/null
+# This file is automatically included by all the testfiles in this
+# directory. It ensures that all the tests run the correct executable.
+
+test=$MYDIR/../modnumscan
--- /dev/null
+COPTS=-g -Wall -Werror
+
+CSRC+=$(wildcard ../../libre2c/*.c)
+CHDR+=$(wildcard ../../libre2c/*.h)
+
+
+all: scanner
+
+# the perl command removes the #line directives because they
+# get in the way of source-level debuggers.
+%.c: %.re
+ re2c $(REOPTS) $< > $@
+ perl -pi -e 's/^\#line.*$$//' $@
+
+
+scanner: scanner.c main.c $(CHDR)
+ $(CC) -I ../.. $(COPTS) -o $@ $(CSRC) scanner.c main.c
+
+clean:
+ rm -f scanner scanner.c
--- /dev/null
+/** @file main.c
+ *
+ * This file is inteded to be a skeleton project where you can
+ * just fill in the blanks.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libre2c/scan.h"
+#include "libre2c/readfp.h"
+#include "scanner.h"
+
+#ifndef BUFSIZ
+#define BUFSIZ 8192
+#endif
+
+
+
+/** This routine is called once for each token in the input file. */
+
+void process_token(const char *filename, int lineno,
+ int tokno, const char *tokstart, int toklen)
+{
+ // tokstart is not null terminated. The length of the string
+ // in tokstart is given by toklen.
+
+
+ // ADD YOUR CODE HERE TO PROCESS THE TOKENS
+ printf("%s:%03d\t%d\t%.*s\n", filename, lineno, tokno,
+ toklen < 40 ? toklen : 40, tokstart);
+}
+
+
+/** Opens and scans the file named on the command line. */
+
+int main(int argc, char **argv)
+{
+ const char *filename;
+ scanstate scanner, *ss = &scanner;
+ char buffer[BUFSIZ];
+
+ int token;
+ FILE *fp;
+
+ filename = argv[1];
+ if(filename == NULL) {
+ fprintf(stderr, "You must supply the file to open!\n");
+ exit(1);
+ }
+
+ fp = fopen(filename, "r");
+ if(!fp) {
+ fprintf(stderr, "Could not open file %s\n", filename);
+ exit(1);
+ }
+
+ // Create the scanner
+ scanstate_init(ss, buffer, sizeof(buffer));
+ readfp_attach(ss, fp); // attach the readproc
+ scanner_attach(ss); // attach the scanner
+
+
+ do {
+ token = scan_next_token(ss);
+ if(token < 0) {
+ printf("Scanner's readproc returned an error: %d\n", token);
+ break;
+ }
+
+ process_token(filename, ss->line, token,
+ scan_token_start(ss), scan_token_length(ss));
+ } while(token);
+
+
+ // No need to deallocate the scanner because everything
+ // was allocated on the stack.
+
+ fclose(fp);
+
+ return 0;
+}
+
+// vi:syn=c
--- /dev/null
+/** @file scanner.h
+ *
+ * This file defines the interface to your scanner.
+ */
+
+struct scanstate;
+
+
+/** These are the tokens that your scanner recognizes. */
+
+// 1. Add the tokens that your scanner recognizes here:
+enum modnumscan_tokens {
+ EOFTOK, // re2c scanners always return 0 when they hit the EOF.
+ TOKEN1, // ADD ...
+ TOKEN2, // YOUR ...
+ TOKEN3, // TOKENS ...
+ TOKEN4, // HERE.
+};
+
+
+// 2. Rename this function (i.e. zipcode_scanner_attach)
+scanstate* scanner_attach(scanstate *ss);
+
--- /dev/null
+/** @file scanner.re
+ *
+ * This file is a skeleton where you can simply fill in your re2c
+ * code and produce a working modular re2c scanner.
+ */
+
+#include "libre2c/scan.h"
+#include "scanner.h"
+
+
+/** Replace the re2c code in this file with your own scanner */
+
+static int scanner_start(scanstate *ss)
+{
+ scanner_enter(ss);
+
+// 1. FILL IN YOUR SCANNER HERE:
+/*!re2c
+[0-9]+ { return TOKEN1; }
+[^0-9\n]+ { return TOKEN2; }
+[\n] { ss->line++; return TOKEN3; }
+*/
+}
+
+
+// 2. Rename this function as you did in the header file.
+scanstate* scanner_attach(scanstate *ss)
+{
+ if(ss) {
+ ss->state = scanner_start;
+ ss->line = 1;
+ }
+
+ return ss;
+}
+
--- /dev/null
+COPTS=-g -Wall -Werror
+
+CSRC+=$(wildcard ../../libre2c/*.c)
+CHDR+=$(wildcard ../../libre2c/*.h)
+
+
+all: comments
+
+# the perl command removes the #line directives because they
+# get in the way of source-level debuggers.
+%.c: %.re
+ re2c $(REOPTS) $< > $@
+ perl -pi -e 's/^\#line.*$$//' $@
+
+comments: cscan.c cparse.c $(CHDR)
+ $(CC) -I ../.. $(COPTS) -o $@ $(CSRC) cscan.c cparse.c
+
+clean:
+ rm -f comments cscan.c
--- /dev/null
+/* This is a comment */
+This is not.
+/* This is a
+multi-line comment */ and not /* and after */
+/* com */ data */ data.
+/* com /* com /* com */ data.
+// c++ comment
+//
+/**/
+/*****/
+/
+*
+/* **/
+/* * ** */
--- /dev/null
+/** @file cparse.c
+ *
+ * This file simply prints the tokens returned by the comment scanner.
+ * See \ref numscanmodular for a description of how this routine works.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libre2c/scan.h"
+#include "libre2c/readfp.h"
+#include "cscan.h"
+
+
+const char *tokname[] = { "EOF", "LABEL", "DATA", "CPCOM",
+ "COMBEG", "COMMENT", "COMEND", "NEWLINE"};
+
+
+int main(int argc, char **argv)
+{
+ int tok;
+ scanstate *ss;
+
+ ss = readfp_open("comtest", BUFSIZ);
+ if(!ss) {
+ fprintf(stderr, "Could not create scanner. Missing comtest file?\n");
+ exit(1);
+ }
+ commentscan_attach(ss);
+
+ // Pull all the tokens from the scanner
+ do {
+ tok = scan_next_token(ss);
+ if(tok < 0) {
+ printf("Scanner returned an error: %d\n", tok);
+ break;
+ }
+
+ printf("%2d: %-12s", ss->line, tokname[tok]);
+ if(tok != NEWLINE) {
+ // don't want to actually print the text of the newline
+ // token -- it makes the output ugly.
+ printf("<<%.*s>>", scan_token_length(ss),
+ scan_token_start(ss));
+ }
+ printf("\n");
+ } while(tok);
+
+ readfp_close(ss);
+
+ return 0;
+}
+
+// vi:syn=c
--- /dev/null
+/** @file cscan.h
+ *
+ * This file defines the interface to the numscanner.
+ */
+
+struct scanstate;
+
+
+/** These are the tokens that the numscan scanner recognizes. */
+enum commentscan_tokens {
+ EOFTOK, ///< re2c scanners always return 0 when they hit the EOF.
+ LABEL, ///< a number is a consecutive string of digits.
+ DATA, ///< a string is anything that isn't a number or a newline.
+ CPCOMMENT, ///< A C++ comment: //(.*)$
+
+ COMBEG, ///< the start of a comment, "/*"
+ COMMENT, ///< data inside a comment
+ COMEND, ///< the ending clause of a comment
+
+ NEWLINE, ///< a single newline, returned in either state
+};
+
+
+/** This prepares the given scanstate to be a numscanner */
+scanstate* commentscan_attach(scanstate *ss);
+
--- /dev/null
+/** @file cscan.re
+ *
+ * Example: how to use start states to switch the state of your scanner.
+ *
+ * The technique demonstrated in this file, implementing start states
+ * by manipulating the scanner's entrypoint, is but one method
+ * of implementing start states. The re2c manpage shows another
+ * technique.
+ *
+ * This file needs to be processed by re2c, http://re2c.org
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libre2c/scan.h"
+#include "cscan.h"
+
+// forward declaration
+static int comment(scanstate *ss);
+
+
+/**
+ * This is the entrypoint for the scanner. It recognizes all
+ * data and C++ comments. When it recognizes a C comment, it
+ * switches to the "comment" state.
+ */
+
+static int initial(scanstate *ss)
+{
+ scanner_enter(ss);
+
+ // unfortunately doxygen eats the re2c declarations
+ // because it uses the same block delimiter.
+/*!re2c
+"//"[^\n]* / [\n] { return CPCOMMENT; }
+[/][*] { ss->state = comment; return COMBEG; }
+[^/\n]+ { return DATA; }
+"/"/[^/*] { return DATA; }
+[\n] { ss->line++; return NEWLINE; }
+*/
+}
+
+
+/**
+ * This is the entrypoint for the "comment" state. It returns
+ * COMMENT tokens until it finds the end of the comment.
+ */
+
+static int comment(scanstate *ss)
+{
+ scanner_enter(ss);
+
+ // unfortunately doxygen eats the re2c declarations
+ // because it uses the same block delimiter.
+/*!re2c
+[*][/] { ss->state = initial; return COMEND; }
+[^*\n]+ { return COMMENT; }
+"*"/[^/] { return COMMENT; }
+[\n] { ss->line++; return NEWLINE; }
+*/
+}
+
+
+/**
+ * This routine turns the given scanstate structure into a comment scanner.
+ */
+
+scanstate* commentscan_attach(scanstate *ss)
+{
+ if(ss) {
+ ss->state = initial;
+ ss->line = 1;
+ }
+
+ return ss;
+}
+
+
+/** \page sspage Start States
+
+\section startstates Comment Matcher
+
+This example demonstrates how to use start states in your
+re2c scanners. It creates a scanner that looks
+for C comments. This scanner has two states:
+- the initial state that parses all non-comment data. When it sees
+ slash-star, it switches to:
+- the comment state that parses the contents of a C-style comment.
+
+The source code to this example can be found in:
+- cscan.re
+- cscan.h
+- cparse.c
+
+A scanner with start states is just like any other modular scanner
+(see \ref numscanmodular), except that it has multiple entrypoints.
+Each entrypoint corresponds to a state.
+
+<pre>
+ # include "re2c/scan.h"
+ # include "cscan.h"
+
+ // forward declaration
+ static int comment(scanstate *ss);
+</pre>
+
+Our comment scanner
+has two states: the initial state, and the comment state.
+The scanner starts in the initial state. When it notices
+slash-star indicating the start of a comment, it returns the COMBEG
+token and switches to the comment state.
+
+<pre>
+ static int initial(scanstate *ss)
+ {
+ scanner_enter(ss);
+
+ /-!re2c
+ "//"[^\\n]* / [\\n] { return CPCOMMENT; }
+ [/][*] { ss->state = comment; return COMBEG; }
+ [^/\\n]+ { return DATA; }
+ "/"/[^\057*] { return DATA; }
+ [\\n] { ss->line++; return NEWLINE; }
+ -/
+ }
+</pre>
+
+You can switch your scanner's state at any time, but you would typically
+do it right before you return the token. The next time the scanner is
+called, it is called through the new scanproc.
+
+The scanner is in the comment state while it is scanning a comment.
+
+<pre>
+ static int comment(scanstate *ss)
+ {
+ scanner_enter(ss);
+
+ /-!re2c
+ [*][/] { ss->state = initial; return COMEND; }
+ [^*\\n]+ { return COMMENT; }
+ "*"/[^/] { return COMMENT; }
+ [\\n] { ss->line++; return NEWLINE; }
+ -/
+ }
+</pre>
+
+Finally, we attach the scanner as we always do.
+
+<pre>
+ scanstate* commentscan_attach(scanstate *ss)
+ {
+ if(ss) {
+ ss->state = initial;
+ ss->line = 1;
+ }
+
+ return ss;
+ }
+</pre>
+
+People familiar with Lex may want to define BEGIN(x) (ss->state=(x))
+to make this feel more Lexish.
+
+The technique described here is but one way of implementing start
+states. The re2c manpage shows another way to emulate start states
+using goto statements.
+
+*/
+
+// vi:syn=c
+
--- /dev/null
+/* read.c
+ * Scott Bronson
+ * 28 Dec 2004
+ *
+ * Version VERSION
+ */
+
+
+/**
+ * @file read.c
+ *
+ * Defines functions intended to be used by read routines.
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "scan.h"
+
+
+
+/** Rearrange the scan buffer. Only called by readprocs.
+ *
+ * This moves all in-flight data to the bottom of the buffer
+ * to free up more room.
+ *
+ * Your readproc should read as much as it can between ss->limit
+ * and ss->buf+ss->bufsiz. It should adjust ss->limit to point
+ * to the new end of data (the end of the buffer if it was able to
+ * execute a complete read).
+ *
+ * Returns the number of bytes available to read in the buffer.
+ */
+
+/* TODO: could optimize for the fast case (no data that needs
+ * to be shifted). However, there's no burning need -- it wouldn't
+ * offer much speedup Bigger buffers would help a lot more.
+ * NOTE: the middle line of this macro is incomplete! Don't use!
+
+#define read_shiftbuf(ss) (ss->token == ss->limit ? \
+ (ss->token = ss->cursor = ss->bufptr, 0) : \
+ _read_shiftbuf(ss))
+
+ */
+
+
+// cnt tells how many bytes need to be shifted downward.
+// The bytes that need to be shifted are those between the token
+// and the limit.
+
+ssize_t read_shiftbuf(scanstate *ss)
+{
+ const char *min;
+ ssize_t cnt;
+
+ min = ss->token;
+ if(ss->marker && ss->marker < min) {
+ min = ss->marker;
+ }
+
+ // this tells how many bytes need to be shifted.
+ cnt = ss->limit - min;
+ if(cnt) {
+ ssize_t delta = min - ss->bufptr;
+ memmove((void*)ss->bufptr, min, cnt);
+ ss->cursor -= delta;
+ ss->token -= delta;
+ if(ss->marker) ss->marker -= delta;
+ ss->limit -= delta;
+
+ assert(ss->limit >= ss->bufptr);
+ assert(ss->cursor >= ss->bufptr);
+ assert(ss->cursor <= ss->limit);
+ } else {
+ // nothing to shift so we reset the buffer to maximum size.
+ ss->cursor = ss->bufptr;
+ ss->token = ss->bufptr;
+ if(ss->marker) ss->marker = ss->bufptr;
+ ss->limit = ss->bufptr;
+ }
+
+ return ss->bufsiz - (ss->limit - ss->bufptr);
+}
+
--- /dev/null
+/* read.h
+ * Scott Bronson
+ * 27 Dec 2004
+ *
+ * Version VERSION
+ */
+
+/** @file read.h
+ *
+ * Defines utilities intended to be used by ::readproc routines.
+ */
+
+
+#ifndef R2READ_H
+#define R2READ_H
+
+#include "scan.h"
+
+
+/** Removes obsolete data from the front of the scan buffer to
+ * make room for new data added to the back.
+ *
+ * @param ss The scanner whose buffer you want to modify.
+ * @returns The number of bytes available at the end of the
+ * buffer that can accept data.
+ */
+
+ssize_t read_shiftbuf(scanstate *ss);
+
+#endif
+
--- /dev/null
+/* readfd.c
+ * Scott Bronson
+ * 28 Dec 2004
+ *
+ * Version VERSION
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "readfd.h"
+#include "scan-dyn.h"
+
+/** @file readfd.c
+ *
+ * This file provides a readproc that reads from Unix file descriptors.
+ */
+
+
+/**
+ * The internal function that performs the read. You never call
+ * it directly. Instead, it is called automatically by the scanner
+ * whenever it needs more data.
+ */
+
+static ssize_t readfd_read(scanstate *ss)
+{
+ int n, avail;
+
+ avail = read_shiftbuf(ss);
+
+ // ensure we get a full read
+ do {
+ n = read((int)ss->readref, (void*)ss->limit, avail);
+ } while(n < 0 && errno == EINTR);
+ ss->limit += n;
+
+ return n;
+}
+
+
+/** Attaches the existing fd to the existing scanstate object.
+ * Note that this routine checks the fd and if it's less than 0
+ * (indicating an error) it returns null.
+ *
+ * If you pass this routine valid arguments, there's no way for it to fail.
+ *
+ * Note that the re2c library does not work very well with file descriptors
+ * that may return short reads such as pipes and sockets. You may want to
+ * try to use the new re2c push model to fix this.
+ */
+
+scanstate* readfd_attach(scanstate *ss, int fd)
+{
+ if(!ss || fd < 0) {
+ return 0;
+ }
+
+ ss->readref = (void*)fd;
+ ss->read = readfd_read;
+ return ss;
+}
+
+
+/* Opens the file and creates a new scanner to scan it.
+ * This is just a convenience routine. You can create a scanner
+ * yourself and attach to it using readfd_attach().
+ *
+ * If you do use this routine, you should call readfd_close() to close
+ * the file and deallocate the scanner.
+ *
+ * Bufsiz tells how big in bytes the scan buffer will be. No single
+ * token may be larger than bufsiz.
+ *
+ * This is the approximate equivalent to:
+ * - dynscan_create(bufsiz);
+ * - readfd_attach(ss, open(path));
+ */
+
+scanstate* readfd_open(const char *path, size_t bufsiz)
+{
+ scanstate *ss;
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if(fd < 0) {
+ return NULL;
+ }
+
+ ss = dynscan_create(bufsiz);
+ if(!ss) {
+ close(fd);
+ return NULL;
+ }
+
+ return readfd_attach(ss, fd);
+}
+
+
+/**
+ * Closes the file and deallocates the memory allocated by readfd_open().
+ */
+
+void readfd_close(scanstate *ss)
+{
+ close((int)ss->readref);
+ dynscan_free(ss);
+}
+
--- /dev/null
+/* readfd.h
+ * Scott Bronson
+ * 30 Dec 2004
+ *
+ * Version VERSION
+ */
+
+#include "read.h"
+
+
+/** @file readfd.h
+ *
+ * This file provides a readproc that reads from Unix file descriptors.
+ */
+
+
+/** Sets the scanner to scan the data contained in the given file descriptor.
+ *
+ * @param ss The scanner to manipulate.
+ * @param fd The file descriptor that the scanner should pull data from.
+ */
+
+scanstate* readfd_attach(scanstate *ss, int fd);
+
+
+/** Creates a scanner to scan the given file.
+ *
+ * This routine dynamically allocates the scanstate and its buffer.
+ * It then opens the given Unix file and attaches it to the scanner.
+ *
+ * @param path The Unix path to the file to scan.
+ * @param bufsiz The size in bytes of the scanner buffer to allocate.
+ */
+
+scanstate* readfd_open(const char *path, size_t bufsiz);
+
+/** Disposes of the scanner created by readfd_open().
+ *
+ * This routine closes the file and deallocates the scanner and buffer.
+ * It frees all resourcs allocated by readfd_open().
+ *
+ * @param ss The scanner to destroy.
+ */
+
+void readfd_close(scanstate *ss);
+
--- /dev/null
+/* readfp.c
+ * Scott Bronson
+ * 28 Dec 2004
+ *
+ * Version VERSION
+ */
+
+#include <stdio.h>
+#include <assert.h>
+
+#include "scan-dyn.h"
+#include "readfp.h"
+
+
+static ssize_t readfp_read(scanstate *ss)
+{
+ ssize_t n, avail;
+
+ avail = read_shiftbuf(ss);
+ n = fread((void*)ss->limit, 1, avail, ss->readref);
+ ss->limit += n;
+
+ if(n <= 0) {
+ if(feof(ss->readref)) {
+ return 0;
+ }
+ if(ferror(ss->readref)) {
+ return -1;
+ }
+
+ // this implies some sort of internal consistency error
+ assert(!"Not eof or error. I have no idea what happened!");
+ return -3;
+ }
+
+ return n;
+}
+
+
+scanstate* readfp_attach(scanstate *ss, FILE *fp)
+{
+ if(!ss || !fp) {
+ return 0;
+ }
+
+ ss->readref = fp;
+ ss->read = readfp_read;
+ return ss;
+}
+
+
+/** Creates a scanstate object that can read from the given file.
+ * Returns NULL and prints to STDERR if an error ocurrs.
+ * Ensure that you call readfp_close() when you're finished.
+ * Uses the given buffer size, or BUFSIZ if bufsiz is 0.
+ * Ensure that the buffer size will fit into a signed
+ * int on the current machine architecture.
+ *
+ * This call is the approximate equivalent to:
+ * - dynscan_create(bufsiz);
+ * - readfp_attach(ss, fopen(path));
+ */
+
+scanstate* readfp_open(const char *path, size_t bufsiz)
+{
+ scanstate *ss;
+ FILE *fp;
+
+ // open the file
+ fp = fopen(path, "r");
+ if(!fp) {
+ return NULL;
+ }
+
+ // create the dynamic scanstate
+ ss = dynscan_create(bufsiz);
+ if(!ss) {
+ fclose(fp);
+ return NULL;
+ }
+
+ return readfp_attach(ss, fp);
+}
+
+
+/** Releases the resources allocated by readfp_open()
+ */
+
+void readfp_close(scanstate *ss)
+{
+ fclose(ss->readref);
+ dynscan_free(ss);
+}
+
+
--- /dev/null
+/* readfp.h
+ * Scott Bronson
+ * 28 Dec 2004
+ *
+ * Version VERSION
+ *
+ * This allows you to feed an re2c scanner directly from a
+ * std C fileptr.
+ */
+
+
+#include <stdio.h>
+#include "read.h"
+
+
+scanstate* readfp_attach(scanstate *ss, FILE *fp);
+scanstate* readfp_open(const char *filename, size_t bufsiz);
+void readfp_close(scanstate *ss);
+
--- /dev/null
+/* re2c.c
+ * Scott Bronson
+ * 28 Dec 2004
+ *
+ * Version VERSION
+ *
+ * Shows how to scan from an in-memory buffer. We ignore the buffer
+ * and scan directly out of the string. The read proc
+ * just returns eof when the scanner hits the end of the string.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "readmem.h"
+
+
+/** Returns EOF because we're out of data.
+ */
+
+static ssize_t readmem_read(scanstate *ss)
+{
+ return 0;
+}
+
+
+/** Initializes the given scanstate to read from the given string.
+ */
+
+scanstate* readmem_init(scanstate *ss, const char *data, size_t len)
+{
+ scanstate_init(ss, data, len);
+ ss->limit = ss->bufptr + ss->bufsiz;
+ ss->read = readmem_read;
+ return ss;
+}
+
+
+/** Attaches the scanner to the given string. Use this
+ * if you've already malloc'd a scanstate. It calls scanstate_init
+ * so it will blow everything already in the scanstate away.
+ *
+ * TODO: this is unlike all other attach routines, none of whom
+ * blow away the scanstate. CHANGE THIS.
+ */
+
+scanstate* readmem_attach(scanstate *ss, const char *data, size_t len)
+{
+ // ensure we don't stomp on a pre-existing buffer
+ if(ss->bufsiz) {
+ return NULL;
+ }
+
+ return readmem_init(ss, data, len);
+}
+
--- /dev/null
+/* re2c-mem.c
+ * Scott Bronson
+ * 30 Dec 2004
+ *
+ * Version VERSION
+ *
+ * Allows you to feed an re2c scanner from a memory block.
+ */
+
+#include "read.h"
+
+
+scanstate* readmem_init(scanstate *ss, const char *data, size_t len);
+scanstate* readmem_attach(scanstate *ss, const char *data, size_t len);
+
+
+/**
+ * Initializes a readmem from a C string. The string must exist for
+ * as long as the scanner -- the scanner doesn't make a copy.
+ */
+
+#define readmem_init_str(ss,str) readmem_init(ss,str,strlen(str))
--- /dev/null
+/* readrand.c
+ * Scott Bronson
+ * 6 Mar 2006
+ *
+ * Version VERSION
+ *
+ * This reader feeds an re2c scanner with a pseudo-endless * stream of random binary data.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "readrand.h"
+
+
+/**
+ * The internal function that performs the read. You never call
+ * it directly. Instead, it is called automatically by the scanner
+ * whenever it needs more data.
+ *
+ * This reader will never hit EOF. It just keeps supplying data
+ * from rand_r(3).
+ */
+
+static ssize_t readrand_read(scanstate *ss)
+{
+ int avail = read_shiftbuf(ss);
+ int orig = avail;
+ int num;
+
+ while(avail >= sizeof(int)) {
+ // There appears to be some sort of cast bug in GCC4...?
+ // Anyhow, the memcpy works, casting pointers didn't.
+ // Hopefully the compiler optimizes the memcpy away.
+ num = rand_r((unsigned int*)&ss->readref);
+ memcpy((char*)ss->limit, &num, sizeof(int));
+ ss->limit += sizeof(int);
+ avail -= sizeof(int);
+ }
+
+ return orig-avail;
+}
+
+
+/**
+ * Attaches the random reader to the given scanner.
+ *
+ * It is legal to reattach a scanner.
+ * This just updates the random seed to the new value.
+ *
+ * This routine cannot possibly fail.
+ */
+
+scanstate* readrand_attach(scanstate *ss, int seed)
+{
+ *(unsigned int*)&ss->readref = seed;
+ ss->read = readrand_read;
+ return ss;
+}
+
--- /dev/null
+/* readrand.h
+ * Scott Bronson
+ * 6 Mar 2006
+ *
+ * Version VERSION
+ *
+ * This reader feeds an re2c scanner with a pseudo-endless
+ * stream of random binary data.
+ */
+
+
+#include "read.h"
+
+
+/**
+ * Causes the scanner to read a pseudorandom stream of bytes.
+ *
+ * This readproc will never return EOF. You need to use another
+ * technique to determine when to stop reading.
+ *
+ * @param ss The scanner to feed.
+ * @param seed The random seed to use. The same seed is guaranteed
+ * to always produce the same stream of bytes (see rand_r(3) for
+ * details).
+ */
+
+scanstate* readrand_attach(scanstate *ss, int seed);
+
--- /dev/null
+/* scan-dyn.c
+ * Scott Bronson
+ * 28 Dec 2004
+ *
+ * Version VERSION
+ *
+ * Routines to dynamically allocate and free scanners and their buffers.
+ * The scanner will be initialized, but you still need to attach
+ * it to a particular type of reader.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "scan-dyn.h"
+
+/** Allocate a scanstate data structure and its associated buffer.
+ * Ensure the buffer is large enough to hold the longest token.
+ * Pass 0 for bufsiz if you don't want to allocate a buffer
+ * (i.e. you're scanning from a contiguous memory block).
+ * Ensure you call dynscan_free() when you're done with the scanner.
+ */
+
+/** Dynamically allocates a new scanstate data structure
+ *
+ * Allocates a buffer of the given size for the scanner using malloc.
+ * Note that the size is an int: the number of bytes in the buffer
+ * must fit into a signed integer on the current architecture.
+ * Returns the new scanstate or NULL if there was a memory allocation problem.
+ *
+ * Remember to call dynscan_free() when you're done scanning.
+ */
+
+scanstate* dynscan_create(size_t bufsiz)
+{
+ scanstate *ss;
+ char *bufptr;
+
+ ss = malloc(sizeof(scanstate));
+ if(!ss) {
+ return NULL;
+ }
+
+ if(bufsiz) {
+ bufptr = malloc(bufsiz);
+ if(!bufptr) {
+ free(ss);
+ return NULL;
+ }
+ } else {
+ bufptr = 0;
+ }
+
+ scanstate_init(ss, bufptr, bufsiz);
+ return ss;
+}
+
+
+/** Frees a scanstate allocated by dynscan_create().
+ */
+
+void dynscan_free(scanstate *ss)
+{
+ if(ss->bufptr) {
+ free((void*)ss->bufptr);
+ }
+
+ free(ss);
+}
+
--- /dev/null
+/* scan-dyn.h
+ * Scott Bronson
+ * 30 Dec 2004
+ *
+ * Version VERSION
+ *
+ * Creates a scanner entirely located on the heap. You must make
+ * sure to eventually call dynscan_free for every scanner created
+ * with dynscan_create.
+ */
+
+#include "scan.h"
+
+
+scanstate* dynscan_create(size_t bufsiz);
+void dynscan_free(scanstate *ss);
+
--- /dev/null
+/* scan.c
+ * Scott Bronson
+ * 28 Dec 2004
+ *
+ * Version VERSION
+ */
+
+#include <string.h>
+#include <assert.h>
+#include "scan.h"
+
+/** @file scan.c
+ *
+ * This file contains routines to manipulate scanstate structures.
+ */
+
+
+/** Defines the version number of this library.
+ *
+ * This is filled in by the release script.
+ */
+
+const char *re2c_library_version = "VERSION";
+
+
+/** Initializes the scanstate structure.
+ *
+ * @param ss The scanstate structure to initialize.
+ * @param bufptr The buffer that the scanner should use.
+ * @param bufsiz The size in bytes of the buffer passed in bufptr.
+ *
+ * Initializes the given scanstate structure and tells it to use
+ * the provided buffer.
+ *
+ * If you don't need to buffer the data
+ * (for instance, when using readmem.h), pass NULL for bufptr
+ * and 0 for bufsiz.
+ *
+ * You probably want to attach a reader after calling this routine.
+ *
+ * Also see scanstate_reset() and dynscan_create().
+ */
+
+void scanstate_init(scanstate *ss, const char *bufptr, size_t bufsiz)
+{
+ ss->cursor = bufptr;
+ ss->limit = bufptr;
+ ss->marker = NULL;
+ ss->token = bufptr;
+ ss->line = 0;
+ ss->last_read = 1;
+ ss->bufptr = bufptr;
+ ss->bufsiz = bufsiz;
+ ss->readref = NULL;
+ ss->read = NULL;
+ ss->scanref = NULL;
+ ss->state = NULL;
+ ss->userref = NULL;
+ ss->userproc = NULL;
+}
+
+
+/** Resets the given scanstate as if it was just created but doesn't
+ * modify the scanner or the readproc.
+ *
+ * Doesn't modify:
+ * - the reader or the readref.
+ * - the scanner or the scanref.
+ * - the userproc or userref.
+ * - the buffer or buffer size
+ *
+ * If you want to reset the reader and scanner, you probably want
+ * to detach and reattach them.
+ */
+
+void scanstate_reset(scanstate *ss)
+{
+ ss->cursor = ss->bufptr;
+ ss->limit = ss->bufptr;
+ ss->marker = NULL;
+ ss->token = ss->bufptr;
+ ss->line = 0;
+ ss->last_read = 1;
+}
+
--- /dev/null
+/* scan.h
+ * Scott Bronson
+ * 27 Dec 2004
+ *
+ * Version VERSION
+ *
+ * This part of support code to make writing re2c scanners much easier.
+ *
+ * TODO: add dispose procs. Normally these will just be null but
+ * if they're set, they will ensure that all resources are collected.
+ *
+ * TODO: probably want to split the re2c-specific code from the general
+ * code. This file is overall very useful, but it's got a few limitations
+ * imposed by re2c that should probably be placed in its own layer.
+ * That way, future versions of re2c won't have to suffer the same
+ * limitations.
+ */
+
+// to pull in the definition for size_t
+#include <sys/types.h>
+
+
+/** @file scan.h
+ *
+ * This file contains the definition of the scanstate, the most
+ * important data structure for a scanner. It also contains some
+ * support macros.
+ *
+ * TERMINOLOGY
+ *
+ * allocate: scanstates can be dynamically (dynamicscan_create()) or
+ * statically (scanstate_init()) allocated. The buffers they use to
+ * hold data may also be either dynamic or static. Of course, any
+ * time you allocate something dynamically, you must call the
+ * corresponding free routine when you're done with it.
+ *
+ * attach: when the scanner is first initialized (scanstate_init())
+ * or allocated (dynamicscan_create()), it is not associated with
+ * a reader (to supply data) or a scanner (to scan the data).
+ * You first need to attach both a ::scanproc and a ::readproc.
+ *
+ * initialize: prepare an already-allocated scanner for use.
+ * After initializing it, you must ATTACH the scanner to a
+ * READER.
+ *
+ * reader: See readproc
+ *
+ * ::readproc:: reads data into the scanstate for the scanner.
+ * Examples are readmem.c (read from a contiguous block in
+ * memory), readfp.c (read from a FILE*), readfd.c (read
+ * from a Unix file descriptor), etc.
+ *
+ * scanner The function (or functions, see \ref startstates)
+ * that actually performs the scanning.
+ * The scanner need not actually be written with the assistance
+ * of re2c
+ * It accepts a scanstate data structure and returns the next
+ * token in the stream.
+ *
+ * scanproc:
+ *
+ * ::scanstate: the data structure that retains complete state for the
+ * scanner. Scanners are thread safe: they never, ever use global
+ * state.
+ */
+
+
+#ifndef R2SCAN_H
+#define R2SCAN_H
+
+
+// for re2c...
+#define YYCTYPE char
+#define YYCURSOR (ss->cursor)
+#define YYLIMIT (ss->limit)
+#define YYMARKER (ss->marker)
+
+/** Fills the scan buffer with more data.
+ *
+ * This routine needs to force a return if 0 bytes were read because
+ * otherwise the re2c scanner will end up scanning garbage way off
+ * the end of the buffer. There's no (good) way to tell the scanner
+ * "the file is at eof so just finish the token that you're on" (right?).
+ * It will always lose the token at the end of the file unless the file
+ * ends in a token delimiter (usually a newline).
+ *
+ * We ignore n because there can be less than n bytes left in the file,
+ * yet one or more tokens will still match. Therefore, we should always
+ * read as much data as we can, and we should return success even if we
+ * have less than n bytes in the buffer. N is totally useless.
+ *
+ * The last line is the limitation. If it weren't there, YYFILL would
+ * return with an empty buffer so re2c would know it's at EOF and
+ * shut down gracefully. But re2c can't handle that.
+ *
+ * If you're using the re2c lib but writing your own re2c scanners,
+ * call ss->read directly.
+ */
+
+#define YYFILL(n) do { \
+ if(ss->last_read > 0) ss->last_read = (*ss->read)(ss); \
+ if(ss->last_read < 0) return ss->last_read; \
+ if((ss)->cursor >= (ss)->limit) return 0; \
+ } while(0);
+
+
+// forward declaration
+struct scanstate;
+
+
+/** Readprocs are provide the scanners with data.
+ *
+ * Each scanner has a single readproc that feeds it with data.
+ * The re2c library includes some standard readprocs
+ * (see \ref readproc_builtins) and makes it easy to write your own
+ * if the builtins don't do what you want (see \ref writing_readprocs).
+ *
+ * @returns: A positive return value specifies the number of bytes
+ * that were returned. Zero means EOF. Negative means an error
+ * occurred (the exact meaning of the error code depends on the
+ * readproc).
+ *
+ * re2c doesn't work well with partial reads so be sure your readproc
+ * fills the entire buffer if possible (see \ref readproc_shortreads).
+ *
+ * See \ref error_handling for more on how errors are propagated
+ * through the re2c library.
+ */
+
+typedef ssize_t (*readproc)(struct scanstate *ss);
+
+
+/** Scans the data and returns the next token.
+ *
+ * A scanner is simply one or more functions that accept a scanstate
+ * object and return the next token in that stream.
+ * The scanproc will typically be generated with the
+ * assistance of re2c, but the re2c library can be useful
+ * even when re2c isn't used (see \ref cmpscan).
+ *
+ * Once you have created the scanstate data structure,
+ * pass it to the scanner. If the scanner returns 0,
+ * you hit EOF. If the scanner returns a negative number,
+ * then some sort of error was encountered. Or, if you're
+ * doing nonblocking I/O, then it just might mean that this
+ * there's not enough data available to determine the next
+ * token.
+ */
+
+typedef int (*scanproc)(struct scanstate *ss);
+
+
+
+/** Contains all state for a single scan session.
+ *
+ * This structure is used by a scanner to preserve its state.
+ *
+ * All charptrs are declared const to help ensure that you don't
+ * accidentally end up modifying the buffer as it's being scanned.
+ * This means that any time you want to read data into the buffer,
+ * you need to cast the pointers to be nonconst.
+ */
+
+struct scanstate {
+ const char *cursor; ///< The current character being looked at by the scanner. This is the same as re2c's YYCURSOR.
+ const char *limit; ///< The last (uppermost) valid character in the current buffer. This is the same as re2c's YYLIMIT.
+ const char *marker; ///< Used internally by re2c engine to handle backtracking. This is the same as re2c's YYMARKER.
+
+ const char *token; ///< The start of the current token.
+ int line; ///< The scanner may or may not maintain the current line number in this field. Typically a scanner's attach routine sets this field to 1 if it properly keeps track of line numbers and leaves it set to 0 if it doesn't. See \ref linenos for more.
+ ssize_t last_read; ///< The return value from the last time the ::readproc was called. If last_read is 0 (eof) or negative (error), then the readproc should not be called.
+
+ const char *bufptr; ///< The buffer currently in use.
+ size_t bufsiz; ///< The maximum number of bytes that the buffer can hold.
+
+ void *readref; ///< Data specific to the ::readproc (i.e. for readfp_attach(), readref contains the FILE*).
+ readproc read; ///< Routine that refills the scan buffer. See ::readproc.
+
+ void *scanref; ///< Data specific to the scanner. Only the scanner may use this field.
+ scanproc state; ///< The entrypoint for the scanning routine. More complex scanners are made up of multiple individual scan routines -- \ref startstates -- and they store their state here.
+
+ void *userref; ///< Never touched by any re2c routines. Well, except scanstate_init(), which clears both this field and userproc to 0. It could be used to associate a parser with this scanstate.
+ void *userproc; ///< Never touched by any re2c routines. See scanstate::userref.
+};
+typedef struct scanstate scanstate;
+
+
+/** Initializes a given scanstate structure.
+ *
+ * Call this to prepare a scanner for use. Some calls, such as readmem_init()
+ * and dynscan_create
+ *
+ * @param ss The scanstate to initialize. There is no need to clear this
+ * memory first -- scanstate_init initializes every field.
+ * @param bufptr The scan buffer. Pass NULL if you don't care to specify
+ * a scan buffer, such as when attaching a readproc that includes its
+ * own buffer (readmem_attach()).
+ * @param bufsiz Size, in bytes, of bufptr. Pass 0 when bufptr is NULL.
+ */
+
+void scanstate_init(scanstate *ss, const char *bufptr, size_t bufsiz);
+void scanstate_reset(scanstate *ss);
+
+
+/** Returns true when there's no more data to be scanned.
+ *
+ * It is much better to just call scan_next_token() until it returns
+ * 0 (EOF) or a negative number (a readproc error).
+ * While there are a few cases where it's useful to reliably
+ * check for EOF without having to scan a token, this is often a
+ * sign of bad design. Therefore, try to use scan_is_finished() sparingly.
+ *
+ * There's a slim chance that this routine will call the readproc.
+ * If the buffer
+ * contains no more data but the file is not at eof, we must execute
+ * a read to discover if there's any more data available.
+ * If the read returns EOF or an error, scan_is_finished will return 1.
+ *
+ * How this macro works:
+ *
+ * - If there's still more data in the buffer, then we're not finished.
+ * - If there's no data in the buffer and the previous read returned
+ * EOF or an error, then we're finished.
+ * - If there's no data in the buffer but we're not at eof, then we need
+ * to execute a read to see if there's more data available.
+ * - If the previous read returned EOF or error, then we're finished.
+ * Otherwise, there's now more data in the buffer so we're not done.
+ */
+
+#define scan_is_finished(ss) \
+ (((ss)->cursor < (ss)->limit) ? 0 : \
+ ((ss)->last_read <= 0 || ((*(ss)->read)(ss) <= 0)) \
+ )
+
+
+/** Fetches the next token in the stream from the scanner.
+ *
+ * This routine causes the scanner to actually scan.
+ * Here is an example of how to call it:
+ *
+ * <pre>
+ * do {
+ * int token = scan_next_token(ss);
+ * if(token < 0) {
+ * // scanner's readproc returned an error
+ * break;
+ * }
+ * handle_token(token);
+ * } while(token);
+ * </pre>
+ *
+ * handle_token() must properly handle eof (token == 0).
+ * You generally want to pass the EOF to the parser consuming
+ * the tokenized data. This allows it to complain if it's
+ * in a bad state, such as when parsing an unterminated string
+ * constant, etc.
+ *
+ * @param ss The scanstate with a readproc and a scanner attached.
+ *
+ * @returns The next token from the input stream (tokens are always
+ * greater than 0). Returns 0 if the input stream is at EOF and
+ * there are no more tokens. Returns a negative value if the
+ * ::readproc returned an error.
+ */
+
+#define scan_next_token(ss) ((*((ss)->state))(ss))
+
+
+/** Returns the text of the most recently scanned token.
+ *
+ * This returns all the text of the most recently matched token.
+ * Note that this data is only valid until the next time
+ * you call scan_next_token.
+ *
+ * EXAMPLE:
+ *
+ * <pre>
+ * printf("Token is: %.*s\n", token_length(ss), token_start(ss));
+ * </pre>
+ *
+ * See also scan_token_end() and scan_token_length().
+ * You can also use scan_token_dup() to access the current token.
+ */
+
+#define scan_token_start(ss) ((ss)->token)
+
+/** Returns a pointer to the end of the most recently scanned token.
+ *
+ * Returns a pointer to the character following the last character of the
+ * most recently scanned token.
+ *
+ * token_end(ss) - token_start(ss) == token_length(ss)
+ *
+ * See scan_token_start().
+ */
+
+#define scan_token_end(ss) ((ss)->cursor)
+
+/** Returns the length in bytes of the most recently scanned token.
+ *
+ * See the example in scan_token_start().
+ */
+
+#define scan_token_length(ss) ((ss)->cursor - (ss)->token)
+
+/** Copies the text of the current token into a malloc'd memory buffer.
+ *
+ * Because it copies the token, the data in the buffer will be valid
+ * until you call free(3) to release it.
+ *
+ * This macro just calls strdup(3) internally. Make sure to
+ * include <string.h> if you use this macro in your own code.
+ *
+ * Because it calls malloc, this routine is quite slow.
+ * See scan_token_start() for a speedy way to access the
+ * text of the current token.
+ */
+
+#define scan_token_dup(ss) strndup(token_start(ss), token_length(ss))
+
+
+/** Pushes the current token back onto the stream
+ *
+ * Calling scan_pushback returns the scanner to the state it was in
+ * immediately prior to returning the current token. If you decide that
+ * you don't want to handle this particular token right now,
+ * you can push it back
+ * onto the scanner. It will be returned the next time scan_token()
+ * is called.
+ *
+ * Note that you can only push back a single token.
+ * Also, some scanners may become confused by pushing a token back.
+ * Generally, if the scanner maintains any sort of state on its own,
+ * you cannot use scan_pushback on it.
+ *
+ * Finally, this doesn't back the line number up. Because most tokens
+ * don't span multiple lines, this is generally not a problem. However,
+ * if you're pushing a token back and want to ensure the correct line
+ * number is maintained, you'll have to do something like this:
+ *
+ * <pre>
+ * // First ensure that the scanner you're using doesn't
+ * // have internal state that will be screwed up if you
+ * // re-scan the current token!
+ *
+ * oldline = ss->line;
+ * tok = scan_next_token(ss);
+ * if(tok == push_me_back) {
+ * scan_pushback(ss);
+ * ss->line = oldline;
+ * }
+ * </pre>
+ *
+ * Yes, it takes some effort to call this function safely.
+ * But it is all worth it when you really need it.
+ */
+
+#define scan_pushback(ss) ((ss)->cursor = (ss)->token)
+
+
+/**
+ * Sets the current line number in the scanner to the given value.
+ *
+ * It's generally better to use this macro to manipulate the
+ * line number because it's more visible and easier to grep for.
+ */
+
+#define scan_set_line(ss,n) ((ss)->line=(n));
+
+
+/** Increments the current line number by 1.
+ *
+ * It's generally better to use this macro to manipulate the
+ * line number because it's more visible and easier to grep for.
+ */
+
+#define scan_inc_line(ss) ((ss)->line++);
+
+
+/** Prepares a scanner to scan the next token.
+ *
+ * This macro must be called by scanners only! See
+ * \ref writing_scanners for more.
+ *
+ * Scanners must call scanner_enter() at the beginning of each ::scanproc
+ * to prepare the scanner to scan a new token.
+ */
+
+#define scanner_enter(ss) ((ss)->token = (ss)->cursor)
+
+
+/** This gives the version number of the re2c library currently being used.
+ */
+
+extern const char *re2c_library_version;
+
+#endif
+
--- /dev/null
+/** @file mainpage.c
+ *
+ * Contains the content for the main page of the documentation.
+ * Also has a number of pages that don't really fit anywhere else.
+ *
+ * There's no point in compiling this file since it only contains
+ * documentation but there's no harm either.
+ */
+
+
+
+
+/** @mainpage
+
+libre2c tries to make it much easier use re2c scanners
+sacrificing re2c's amazing flexibility. This
+documentation is for re2c version VERSION.
+
+
+\section examples Examples
+
+The best way to learn about libre2c is to read some examples.
+These examples are arranged in order from simplest to strangest.
+
+- \ref numscancoupled -- A very simple scanner and parser
+ contained in a single file.
+
+- \ref numscanmodular -- Continues the previous example.
+ It separates numscan-coupled into a separate
+ scanner and parser. Also shows how to read data using fopen/fread.
+
+- \ref startstates -- A scanner that demonstrates switching between
+ start states while looking for C and C++ comments.
+
+- \ref cmpscan -- A utility that compares two data sources byte-for-byte.
+ This example shows that libre2c can be useful even if you
+ don't use re2c to generate a scanner.
+
+There is also the skeleton project in the examples directory.
+It's a project where you can just
+fill in the blanks to create an re2c scanner.
+
+\section reference Reference
+
+- \ref writing_parsers
+
+- \ref writing_scanners
+
+- \ref writing_readprocs
+
+\subsection readproc_builtins Builtin Readprocs
+
+A ::readproc feeds data to the scanner. Here are some routines that
+libre2c provides to use the built-in readprocs.
+If none of these fits your needs, see \ref writing_readprocs.
+
+- readfd_attach() -- attaches an already-open file descriptor to an already-created scanner.
+- readfd_open() -- creates a scanner and sets it up to read from a filepath.
+- readfp_attach() -- attaches an already-open FILE* to an already-created scanner.
+- readfp_open() -- creates a scanner and sets it up to read from a filepath.
+- readmem_attach() -- causes a scanner to read directly from a memory block.
+- readmem_init_str() -- initializes a scanner to read from a C string.
+- readrand_attach() -- feeds the scanner a pseudorandom byte stream.
+
+*/
+
+
+
+
+
+/** @page writing_parsers How to Use a Scanner
+
+This page tells you how to set up your code to call an re2c scanner.
+See \ref writing_scanners to learn how to write re2c scanners.
+See \ref numscancoupled and \ref numscanmodular for a tutorial.
+
+Using a re2c scanner is quite easy:
+
+* decide how you'll read the data. You'll use a readproc for this.
+ See either \ref readproc_builtins or \ref writing_readprocs).
+
+* decide how you'll buffer the scanner data. Will you malloc a buffer?
+ buffer? Will you create it on the stack? Are you using a readproc
+ that doesn't even require a buffer (such as readmem.c)?
+
+Then:
+
+* Create the buffer if needed.
+* Create the scanstate, point it at the buffer.
+* Attach the readproc.
+* Attach the scanner.
+
+Now you're ready to scan.
+
+* Call scan_next_token() until it returns either 0 (eof) or negative (error).
+
+See the above two tutorials for working code examples.
+
+ */
+
+
+
+
+
+/** @page writing_scanners Writing Scanners
+
+This page tells you how to write a libre2c scanner.
+See \ref writing_parsers to learn how to call these scanners.
+See \ref numscancoupled and \ref numscanmodular for a tutorial.
+
+Scanners consist of code generated by re2c, surrounded by
+a tiny bit of boilerplate support code to ensure tokens are
+properly maintained.
+
+Scanners typically consist of two files:
+
+- A header file defining the tokens and the routine(s) to attach
+the scanner to the readproc.
+
+- A source file containing the re2c source and a small amount
+of bolierplate.
+
+You can use scanner.h and scanner.re as a starting point for your scanners.
+
+ */
+
+
+
+/** @page writing_readprocs Writing Readprocs
+
+The readproc is the routine responsible for filling the re2c
+scanner's buffer with data.
+
+libre2c includes some readprocs that read from most common
+data sources (libc FILE*, Unix file descriptors, a memory block, etc).
+See \ref readproc_builtins for a list of readprocs included with
+libre2c.
+
+If a builtin readproc does everything you need, then there is no need
+to read this page further. If you need to write a custom readproc,
+however, this page should tell you everything you need.
+See \ref writing_scanners to learn how to write an re2c scanner.
+
+\section readproc_arguments Arguments
+
+\section readproc_retval Return Value
+
+See \ref error_handling for more information on how libre2c handles errors.
+
+\section readproc_shortreads Short Reads
+
+re2c uses a pull model, where the scanner calls the readproc whenever
+it needs more data. re2c has recently been modified to support a
+push model as well but libre2c does not support this yet.
+This means that it is not easy to parse data from any source that
+may return short reads (such as a pipe, socket, fifo, Unix device,
+etc). Your readproc should never return a short read so you'll
+need to use buffering if the underlying data source does.
+
+Until the push mode was added, re2c didn't not handle any data
+source that would return short reads well. libre2c has
+not (yet) been updated to take advantage of push mode.
+
+How does the scanner tell the difference between EOF and EAGAIN? If re2c
+notices that it might run out of data before scanning a token, it
+calls the readproc once to ensure that there is as much data in the
+buffer as possible. This ensures that you can always scan a token
+exactly as big as the buffer.
+
+You only need to know this if you're writing your own read functions.
+
+Your readproc must first make room in the buffer for more data.
+The read_shiftbuffer() routine is the best way to do this.
+Once you have freed up room at the end of the buffer, fill it
+with data.
+
+@returns
+- 0 when there's no more data (EOF).
+- positive: the number of bytes read (note that re2c does not work
+well with short reads).
+- negative: an error value. The value returned depends on the readproc.
+Many readprocs return Unix error codes such as -EWOULDBLOCK.
+
+
+
+All charptrs in the scanstate structure are declared const to help
+ensure that scanner or parser writers don't
+accidentally end up modifying the buffer as it is being scanned.
+This means that your read routine must cast them to be mutable
+(char*) before reading them. It is a libre2c convetion
+that only the readproc may modify a scanner's buffer.
+
+The caller assumes that the read routine will always fill the buffer
+up as much as possible. Therefore, if the buffer isn't entirely full,
+then it knows that the EOF is probably at the end of the data.
+There is currently no way in re2c for a token to span a short read.
+This is fine for files but not so good for pipes, network
+sockets, anything that is packetized or works in realtime.
+
+The shift technique was chosen over a ringbuffer because we should rarely
+have to shift data. If you find that your file has gigantic tokens
+and you're burning a lot of cpu shifting partial tokens from the end
+of the buffer to the start, you might be tempted to use a ring buffer
+instead of a shift buffer. Alas, re2c itself can't handle ringbuffers or
+tokens split across buffer boundaries (and neither can most scanners
+that I'm aware of).
+
+ */
+
+
+
+
+
+/** @page linenos Line Numbers
+
+Because it costs CPU cycles and many scanners don't need it,
+neither re2c scanners nor libre2c keep track of line numbers.
+
+It is trivial, however, to modify your scanner to keep track of line
+numbers itself. And, if it won't cause a significant performance
+drop, it's highly suggested. All you do is ensure that your scanner
+matches \\n as a single token and increment scanstate::line every time
+it is matched.
+
+A scanner's attach routine normally leaves scanstate::line set to 0 if
+it doesn't properly handle line numbers, or sets it to 1 if it does.
+
+Here is a scanner that doesn't properly keep track of line numbers.
+It groups numbers and non-numbers into contiguous tokens. Newlines
+are included along with every other non-digit character in the STRING
+token. This means that STRING tokens can span multiple lines.
+
+<pre>
+ [0-9]+ { return NUMBER; }
+ [^0-9]+ { return STRING; }
+</pre>
+
+To fix this, we first set the line number in the attach proc.
+
+<pre>
+scanstate* numscan_attach(scanstate *ss)
+{
+ ss->state = numscan_start;
+ scan_set_line(ss, 1);
+}
+</pre>
+
+Then, in the re2c scanner, we specify that "\\n" is a token and increase the
+line number whenever this token is matched:
+
+<pre>
+ [0-9]+ { return NUMBER; }
+ [^0-9\\n]+ { return STRING; }
+ [\\n] { scan_inc_line(ss); return NEWLINE }
+</pre>
+
+The \ref numscanmodular example shows a full
+scanner that properly keeps track of line numbers.
+
+*/
+
+
+
+
+/** @page error_handling Error Handling
+
+libre2c is meticulous at checking errors so you don't have to be.
+
+If an error occurs while scanning, it is stored within the scanner.
+From now on, any time you try to pull a token, the error code will
+be returned. This means that you can defer all error checking until
+you're finished scanning.
+In addition to making your code smaller and more readable, this
+allows you to handle all your error
+
+<pre>
+ i1 = scan_next_token(ss);
+ // An error may have occurred but we don't need worry about it here.
+ i2 = scan_next_token(ss);
+ // If i1 returned an error, we're guaranteed that i2 contains the same error.
+ i3 = scan_next_token(ss);
+ if(i3 > 0) {
+ // We know that i1 and i2 were valid too.
+ } else if(i3 == 0) {
+ // We know that i3 was not returned because we hit EOF.
+ // i1 and i2 may or may not be 0 as well.
+ } else { // i3 < 0
+ // There was a read error during parsing!
+ }
+</pre>
+
+\section errvals Error Values
+
+There are only two types of errors that may occur during scanning.
+The readproc may encounter OS-level trouble, such as an I/O error, and
+the scanner may encounter a text sequence that sends it into an invalid
+state. Both of these errors are reported using the return value from
+scan_next_token():
+
+- Positive return values are tokens or scanner errors.
+- Negative return values are ::readproc errors (the exact meaning of the negative value depends on the readproc).
+- 0 indicates a normal EOF.
+
+If a token is negative, it represents an error reported by a readproc.
+Scanners always return positive tokens, even if the token represents
+an error state.
+This ensures that the scanner and
+readproc error spaces will never clash, and that the scanner will not
+cause the readproc's error handling to enter into a weird state.
+
+\subsection readerrvals Readproc Errors
+
+When the readproc reports an error, the ::scanstate enters an error
+state that can only be reset by scanstate_reset. Readproc errors
+cause the scanning process to halt permanently.
+
+Readproc errors are always negative numbers.
+
+\subsection scanerrvals Scanner Errors
+
+The scanner decides how it will handle error reporting. Some scanners
+may find it useful to latch into an error state when an error token
+is returned. Others may return the error token, then try to resume
+scanning as further tokens are fetched.
+
+Scanner errors are always positive numbers defined by the scanner
+as errors. Read the particular scanner's documentation to see how
+it is handled.
+
+*/
--- /dev/null
+CSRC+=$(wildcard ../libre2c/*.c)
+CHDR+=$(wildcard ../libre2c/*.h)
+
+COPTS=-Wall -Werror -g
+CMPLOC=../examples/compare
+
+all: tester
+
+tester: zutest.c zutest.h retest.c retest.h tester.c $(CSRC) $(CHDR) $(CMPLOC)/compare.c $(CMPLOC)/compare.h
+ gcc $(COPTS) -I. -I.. -I$(CMPLOC) zutest.c retest.c tester.c $(CMPLOC)/compare.c $(CSRC) $(CHDR) -DZUTEST -o tester
+
+test: tester
+ ./tester
+
+clean:
+ rm -f tester
--- /dev/null
+This directory contains the unit tests for the re2c library.
+To run the tests, simply run the executable with no arguments.
+
+ zutest: the unit tests for the zutest unit test library.
+ (note that some tests should fail)
+
+ retest: the unit tests for the re2c library.
+
+Right now only the compare example contains unit tests.
+TODO: write more. In particular, read_shiftbuf seems a
+good candidate for unit tests. Also maybe the error handling.
+
--- /dev/null
+// This file contains unit tests that test the re2c library itself.
+
+#include "retest.h"
+#include "libre2c/scan.h"
+#include <stddef.h>
+
+
+static ssize_t test_last_readproc(scanstate *ss)
+{
+ if(ss->readref) {
+ Fail("readproc was called after returning an error!");
+ return -2; // should not be executed.
+ }
+
+ ss->readref = test_last_readproc;
+ return -1;
+}
+
+
+static int test_last_scanner(scanstate *ss)
+{
+ scanner_enter(ss);
+ YYFILL(1);
+ Fail("Readproc should have returned an error!");
+ return -3;
+}
+
+
+/**
+ * This test ensures that an error value is latched when encountered
+ * and that the readproc isn't called anymore.
+ */
+
+static void test_last_read()
+{
+ scanstate state, *ss=&state;
+
+ scanstate_init(ss, NULL, 0);
+ ss->readref = NULL;
+ ss->read = test_last_readproc; // attach reader
+ ss->state = test_last_scanner; // attach scanner
+
+ AssertEq(scan_next_token(ss), -1);
+ AssertEq(scan_next_token(ss), -1);
+ AssertEq(scan_next_token(ss), -1);
+ AssertEq(scan_next_token(ss), -1);
+
+ // error value appears latched and readproc was only called
+ // once (otherwise the readproc would have failed).
+}
+
+
+zutest_proc re2c_tests[] = {
+ test_last_read,
+ NULL
+};
+
--- /dev/null
+#include "zutest.h"
+
+extern zutest_proc re2c_tests[];
--- /dev/null
+#include "zutest.h"
+#include "retest.h"
+#include "compare.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+
+
+zutest_suite all_unit_tests[] = {
+ zutest_tests,
+ re2c_tests,
+ compare_tests,
+ NULL
+};
+
+
+int main()
+{
+ run_unit_tests(all_unit_tests);
+ exit(1);
+}
+
--- /dev/null
+/* zutest.c
+ * Scott Bronson
+ * 6 Mar 2006
+ *
+ * Version 0.6, 26 Apr 2006
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <setjmp.h>
+#include "zutest.h"
+
+
+/** @file zutest.c
+ *
+ * This file contains all of the test mechanisms provided by the
+ * Zutest unit testing framework.
+ *
+ * A single function is called a test. If any of the asserts fail
+ * within a test, the test itself is stopped and printed as a failure
+ * but all other tests in the current test suite, and all other test
+ * suites, will still be run.
+ *
+ * A test suite consists of a number of tests. Typically a C file
+ * will include a test suite that lists all the tests in the file.
+ *
+ * TODO: print test results, test suites, etc as they run.
+ * Add a quiet flag that will suppress printing unless a test fails.
+ * quiet=0, full printing
+ * quiet=1, test results not printed
+ * quiet=2, suite results not printed
+ * quiet=3, summary not printed.
+ */
+
+
+static jmp_buf test_bail; ///< If a test fails, this is where we end up.
+int zutest_assertions; ///< A goofy statistic, updated by the assertion macros
+static int tests_run; ///< The number of tests that we have run. successes+failures==tests_run (if not, then there's a bug somewhere).
+static int successes; ///< The number of successful tests run
+static int failures; ///< The number of failed tests run.
+static jmp_buf *inversion; ///< Where to go if the assertion fails. This is NULL except when running Zutest's internal unit tests. See test_fail().
+
+
+void zutest_fail(const char *file, int line, const char *func,
+ const char *msg, ...)
+{
+ va_list ap;
+
+ // If inversion is set, then an assert correctly failed.
+ if(inversion) {
+ longjmp(*inversion, 1);
+ }
+
+ fprintf(stderr, "FAIL %s at %s line %d:\n\t", func, file, line);
+ va_start(ap, msg);
+ vfprintf(stderr, msg, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+
+ longjmp(test_bail, 1);
+}
+
+
+void run_zutest_suite(const zutest_suite suite)
+{
+ const zutest_proc *test;
+
+ for(test=suite; *test; test++) {
+ tests_run += 1;
+ if(!setjmp(test_bail)) {
+ (*test)();
+ successes += 1;
+ } else {
+ failures += 1;
+ }
+ }
+}
+
+
+void run_zutest_suites(const zutest_suites suites)
+{
+ zutest_suite *suite;
+
+ for(suite=suites; *suite; suite++) {
+ run_zutest_suite(*suite);
+ }
+}
+
+
+void print_zutest_results()
+{
+ if(failures == 0) {
+ printf("All OK. %d test%s run, %d successe%s (%d assertion%s).\n",
+ successes, (successes == 1 ? "" : "s"),
+ successes, (successes == 1 ? "" : "s"),
+ zutest_assertions, (zutest_assertions == 1 ? "" : "s"));
+ return;
+ }
+
+ printf("ERROR: %d failure%s in %d test%s run!\n",
+ failures, (failures == 1 ? "" : "s"),
+ tests_run, (tests_run == 1 ? "" : "s"));
+}
+
+
+/** Runs all the unit tests in all the passed-in test suites.
+ */
+
+void run_unit_tests(const zutest_suites suites)
+{
+ run_zutest_suites(suites);
+ print_zutest_results();
+ exit(failures < 100 ? failures : 100);
+}
+
+
+/**
+ * Examines the command-line arguments. If "--run-unit-tests" is
+ * the first argument, then it runs the unit tests (further arguments
+ * may affect how the tests are processed). This routine exits with
+ * a nonzero result code if any test fails; otherwise it exits with 0.
+ * It never returns.
+ *
+ * If --run-unit-tests is not on the command line, this routine returns
+ * without doing anything.
+ */
+
+void unit_test_check(int argc, char **argv, const zutest_suites suites)
+{
+ if(argc > 1 && strcmp(argv[1],"--run-unit-tests") == 0) {
+ run_unit_tests(suites);
+ }
+}
+
+
+
+
+
+#if defined(ZUTEST) || defined(ZUTEST_MAIN)
+
+/* This code runs the zutest unit tests to ensure that zutest itself
+ * is working properly.
+ */
+
+
+/** This macro is used to reverse the sense of the tests.
+ *
+ * To properly test Zutest, we need to ensure that the Assert macros
+ * handle failures too. Therefore, we occasionally want to reverse
+ * the sense of the macro, where a failure indicates a successful test
+ * and a passing assert means that the test has failed.
+ *
+ * This macro inverts the sense of the contained assertion.
+ * test_failure(AssertEq(a,b)) causes the test to pass
+ * ONLY IF the assertion fails (i.e. when a != b).
+ *
+ *
+ */
+
+#define test_failure(test) \
+ do { \
+ jmp_buf jb; \
+ int val = setjmp(jb); \
+ if(val == 0) { \
+ inversion = &jb; \
+ do { test; } while(0); \
+ inversion = NULL; \
+ Fail("This test should have failed: " #test); \
+ } \
+ inversion = NULL; \
+ } while(0)
+
+
+
+void test_assert_int()
+{
+ int a=4, b=3, c=4, z=0, n=-1;
+
+ AssertEq(a,c);
+ AssertNe(a,b);
+ AssertGt(a,b);
+ AssertGe(a,b);
+ AssertGe(a,c);
+ AssertLt(b,a);
+ AssertLe(b,a);
+ AssertLe(c,a);
+
+ AssertZero(z);
+ test_failure( AssertZero(a) );
+ AssertNonzero(a);
+ test_failure( AssertNonzero(z) );
+ AssertPositive(a);
+ test_failure( AssertPositive(z) );
+ AssertNegative(n);
+ test_failure( AssertNegative(z) );
+
+ test_failure( AssertEq(a,b) );
+ test_failure( AssertNe(a,c) );
+ test_failure( AssertGt(a,c) );
+ test_failure( AssertGt(b,c) );
+ test_failure( AssertGe(b,a) );
+ test_failure( AssertLt(c,a) );
+ test_failure( AssertLt(c,b) );
+ test_failure( AssertLe(a,b) );
+
+ AssertIntEq(a,c);
+ AssertIntNe(a,b);
+ AssertIntGt(a,b);
+ AssertIntGe(a,b);
+ AssertIntGe(a,c);
+ AssertIntLt(b,a);
+ AssertIntLe(b,a);
+ AssertIntLe(c,a);
+
+ test_failure( AssertIntEq(a,b) );
+ test_failure( AssertIntNe(a,c) );
+ test_failure( AssertIntGt(a,c) );
+ test_failure( AssertIntGt(b,c) );
+ test_failure( AssertIntGe(b,a) );
+ test_failure( AssertIntLt(c,a) );
+ test_failure( AssertIntLt(c,b) );
+ test_failure( AssertIntLe(a,b) );
+
+ AssertEqHex(a,c);
+ AssertNeHex(a,b);
+ AssertGtHex(a,b);
+ AssertGeHex(a,b);
+ AssertGeHex(a,c);
+ AssertLtHex(b,a);
+ AssertLeHex(b,a);
+ AssertLeHex(c,a);
+
+ test_failure( AssertEqHex(a,b) );
+ test_failure( AssertNeHex(a,c) );
+ test_failure( AssertGtHex(a,c) );
+ test_failure( AssertGtHex(b,c) );
+ test_failure( AssertGeHex(b,a) );
+ test_failure( AssertLtHex(c,a) );
+ test_failure( AssertLtHex(c,b) );
+ test_failure( AssertLeHex(a,b) );
+}
+
+
+void test_assert_ptr()
+{
+ int a, b;
+ int *ap = &a;
+ int *bp = &b;
+ int *cp = &a;
+ int *n = NULL;
+
+ AssertPtr(ap);
+ AssertNull(n);
+
+ test_failure( AssertPtr(n) );
+ test_failure( AssertNull(ap) );
+
+ AssertPtrEq(ap,cp);
+ AssertPtrNe(ap,bp);
+ AssertPtrGt(ap,bp);
+ AssertPtrGe(ap,bp);
+ AssertPtrGe(ap,cp);
+ AssertPtrLt(bp,ap);
+ AssertPtrLe(bp,ap);
+ AssertPtrLe(cp,ap);
+
+ test_failure( AssertPtrEq(ap,bp) );
+ test_failure( AssertPtrNe(ap,cp) );
+ test_failure( AssertPtrGt(ap,cp) );
+ test_failure( AssertPtrGt(bp,cp) );
+ test_failure( AssertPtrGe(bp,ap) );
+ test_failure( AssertPtrLt(cp,ap) );
+ test_failure( AssertPtrLt(cp,bp) );
+ test_failure( AssertPtrLe(ap,bp) );
+}
+
+
+void test_assert_float()
+{
+ float a=0.0004, b=0.0003, c=0.0004;
+
+ AssertFloatEq(a,c);
+ AssertFloatNe(a,b);
+ AssertFloatGt(a,b);
+ AssertFloatGe(a,b);
+ AssertFloatGe(a,c);
+ AssertFloatLt(b,a);
+ AssertFloatLe(b,a);
+ AssertFloatLe(c,a);
+
+ test_failure( AssertFloatEq(a,b) );
+ test_failure( AssertFloatNe(a,c) );
+ test_failure( AssertFloatGt(a,c) );
+ test_failure( AssertFloatGt(b,c) );
+ test_failure( AssertFloatGe(b,a) );
+ test_failure( AssertFloatLt(c,a) );
+ test_failure( AssertFloatLt(c,b) );
+ test_failure( AssertFloatLe(a,b) );
+
+ AssertDblEq(a,c);
+ AssertDblNe(a,b);
+ AssertDblGt(a,b);
+ AssertDblGe(a,b);
+ AssertDblGe(a,c);
+ AssertDblLt(b,a);
+ AssertDblLe(b,a);
+ AssertDblLe(c,a);
+
+ test_failure( AssertDblEq(a,b) );
+ test_failure( AssertDblNe(a,c) );
+ test_failure( AssertDblGt(a,c) );
+ test_failure( AssertDblGt(b,c) );
+ test_failure( AssertDblGe(b,a) );
+ test_failure( AssertDblLt(c,a) );
+ test_failure( AssertDblLt(c,b) );
+ test_failure( AssertDblLe(a,b) );
+
+ AssertDoubleEq(a,c);
+ AssertDoubleNe(a,b);
+ AssertDoubleGt(a,b);
+ AssertDoubleGe(a,b);
+ AssertDoubleGe(a,c);
+ AssertDoubleLt(b,a);
+ AssertDoubleLe(b,a);
+ AssertDoubleLe(c,a);
+
+ test_failure( AssertDoubleEq(a,b) );
+ test_failure( AssertDoubleNe(a,c) );
+ test_failure( AssertDoubleGt(a,c) );
+ test_failure( AssertDoubleGt(b,c) );
+ test_failure( AssertDoubleGe(b,a) );
+ test_failure( AssertDoubleLt(c,a) );
+ test_failure( AssertDoubleLt(c,b) );
+ test_failure( AssertDoubleLe(a,b) );
+}
+
+
+void test_assert_strings()
+{
+ const char *a = "Bogozity";
+ const char *b = "Arclamp";
+ const char *c = "Bogozity";
+
+ AssertStrEq(a,c);
+ AssertStrNe(a,b);
+ AssertStrGt(a,b);
+ AssertStrGe(a,b);
+ AssertStrGe(a,c);
+ AssertStrLt(b,a);
+ AssertStrLe(b,a);
+ AssertStrLe(c,a);
+
+ test_failure( AssertStrEq(a,b) );
+ test_failure( AssertStrNe(a,c) );
+ test_failure( AssertStrGt(a,c) );
+ test_failure( AssertStrGt(b,c) );
+ test_failure( AssertStrGe(b,a) );
+ test_failure( AssertStrLt(c,a) );
+ test_failure( AssertStrLt(c,b) );
+ test_failure( AssertStrLe(a,b) );
+}
+
+
+zutest_proc zutest_tests[] = {
+ test_assert_int,
+ test_assert_ptr,
+ test_assert_float,
+ test_assert_strings,
+ NULL
+};
+
+
+// Ensure that zutest doesn't crash if handed an empty suite.
+zutest_proc zutest_empty_suite[] = {
+ NULL
+};
+
+
+zutest_suite all_zutests[] = {
+ zutest_empty_suite,
+ zutest_tests,
+ NULL
+};
+
+
+#ifdef ZUTEST_MAIN
+int main(int argc, char **argv)
+{
+ run_unit_tests(all_zutests);
+ return 0;
+}
+#endif
+
+#endif
+
--- /dev/null
+/* zutest.h
+ * Scott Bronson
+ * 6 Mar 2006
+ *
+ * Version 0.61, 30 Apr 2006
+ */
+
+
+/* @file zutest.h
+ *
+ * This file contains the declarations and all the Assert macros
+ * required to use Zutest in your own applications.
+ *
+ * Zutest is a ground-up rewrite of Asim Jalis's "CuTest" library.
+ * It is released under the MIT License.
+ *
+ * To compile Zutest to run its own unit tests, do this:
+ *
+ * <pre>
+ * $ cc -DZUTEST_MAIN zutest.c -o zutest
+ * $ ./zutest
+ * 4 tests run, 4 successes (132 assertions).
+ * </pre>
+ *
+ * If your non-gcc compiler complains about a missing __func__ macro,
+ * add -D__func__='"test"' to the compile command line.
+ *
+ * See ::zutest_tests for instructions on how to add zutest's
+ * built-in unit tests to your application's test suite.
+ */
+
+
+#ifndef ZUTEST_H
+#define ZUTEST_H
+
+// Note that Fail can't increment zutest_assertions (the number of assertions
+// that have been made). You might want to increment zutest_assertions
+// manually if you care about this number. Normally you won't.
+#define Fail(...) zutest_fail(__FILE__, __LINE__, __func__, __VA_ARGS__)
+
+
+// If the expression returns false, it is printed in the failure message.
+#define Assert(x) do { zutest_assertions++; \
+ if(!(x)) { Fail(#x); } } while(0)
+// If the expression returns false, the given format string is printed.
+#define AssertFmt(x,...) do { zutest_assertions++; \
+ if(!(x)) { Fail(__VA_ARGS__); } } while(0)
+
+
+// On failure the expression is printed followed by the format string.
+#define AssertExp(ex,...) AssertFmt(ex,#ex __VA_ARGS__)
+// Like AssertExp but enforces a type while performing the comparison.
+#define AssertExpType(x,y,op,type,fmt) \
+ AssertExp((type)x op (type)y," failed because "#x"=="fmt" and "#y"=="fmt"!",\
+ (type)x,(type)y)
+
+// Same as above but all 8 digits are printed
+// If you give me a 64 bit computer, I will give you 16 digits!
+#define AssertHexOp(x,y,op) AssertExpType(x,y,op,long,"0x%lX")
+
+
+// These work with integers
+#define AssertEq(x,y) AssertOp(x,y,==)
+#define AssertNe(x,y) AssertOp(x,y,!=)
+#define AssertGt(x,y) AssertOp(x,y,>)
+#define AssertGe(x,y) AssertOp(x,y,>=)
+#define AssertLt(x,y) AssertOp(x,y,<)
+#define AssertLe(x,y) AssertOp(x,y,<=)
+
+#define AssertZero(x) AssertEq(x,0)
+#define AssertNonzero(x) Assert(x)
+#define AssertNonZero(x) AssertNonzero(x)
+#define AssertPositive(x) AssertGt(x,0);
+#define AssertNegative(x) AssertLt(x,0);
+
+#define AssertIntEq(x,y) AssertOp(x,y,==)
+#define AssertIntNe(x,y) AssertOp(x,y,!=)
+#define AssertIntGt(x,y) AssertOp(x,y,>)
+#define AssertIntGe(x,y) AssertOp(x,y,>=)
+#define AssertIntLt(x,y) AssertOp(x,y,<)
+#define AssertIntLe(x,y) AssertOp(x,y,<=)
+#define AssertOp(x,y,op) AssertExpType(x,y,op,long,"%ld")
+
+// Same as above but the values in the error report
+// are printed in hex rather than decimal.
+#define AssertEqHex(x,y) AssertHexOp(x,y,==)
+#define AssertNeHex(x,y) AssertHexOp(x,y,!=)
+#define AssertGtHex(x,y) AssertHexOp(x,y,>)
+#define AssertGeHex(x,y) AssertHexOp(x,y,>=)
+#define AssertLtHex(x,y) AssertHexOp(x,y,<)
+#define AssertLeHex(x,y) AssertHexOp(x,y,<=)
+
+
+#define AssertPtr(p) AssertFmt(p != NULL, \
+ #p" != NULL but "#p"==0x%08lX!", (unsigned long)p)
+#define AssertNull(p) AssertFmt(p == NULL, \
+ #p" == NULL but "#p"==0x%08lX!", (unsigned long)p)
+#define AssertNonNull(p) AssertPtr(p)
+
+#define AssertPtrNull(p) AssertNull(p)
+#define AssertPtrNonNull(p) AssertNonNull(p)
+#define AssertPtrEq(x,y) AssertPtrOp(x,y,==)
+#define AssertPtrNe(x,y) AssertPtrOp(x,y,!=)
+#define AssertPtrGt(x,y) AssertPtrOp(x,y,>)
+#define AssertPtrGe(x,y) AssertPtrOp(x,y,>=)
+#define AssertPtrLt(x,y) AssertPtrOp(x,y,<)
+#define AssertPtrLe(x,y) AssertPtrOp(x,y,<=)
+#define AssertPtrOp(x,y,op) AssertExpType(x,y,op,unsigned long,"0x%lX")
+
+// These work with floats and doubles
+#define AssertFloatEq(x,y) AssertFloatOp(x,y,==)
+#define AssertFloatNe(x,y) AssertFloatOp(x,y,!=)
+#define AssertFloatGt(x,y) AssertFloatOp(x,y,>)
+#define AssertFloatGe(x,y) AssertFloatOp(x,y,>=)
+#define AssertFloatLt(x,y) AssertFloatOp(x,y,<)
+#define AssertFloatLe(x,y) AssertFloatOp(x,y,<=)
+
+// Dbl is implemented the same as Float internally.
+// We just provide a Dbl and Double names so that the programmer can
+// use whatever name she prefers and the macro can exactly equal the type.
+#define AssertDblEq(x,y) AssertFloatOp(x,y,==)
+#define AssertDblNe(x,y) AssertFloatOp(x,y,!=)
+#define AssertDblGt(x,y) AssertFloatOp(x,y,>)
+#define AssertDblGe(x,y) AssertFloatOp(x,y,>=)
+#define AssertDblLt(x,y) AssertFloatOp(x,y,<)
+#define AssertDblLe(x,y) AssertFloatOp(x,y,<=)
+
+#define AssertDoubleEq(x,y) AssertFloatOp(x,y,==)
+#define AssertDoubleNe(x,y) AssertFloatOp(x,y,!=)
+#define AssertDoubleGt(x,y) AssertFloatOp(x,y,>)
+#define AssertDoubleGe(x,y) AssertFloatOp(x,y,>=)
+#define AssertDoubleLt(x,y) AssertFloatOp(x,y,<)
+#define AssertDoubleLe(x,y) AssertFloatOp(x,y,<=)
+#define AssertFloatOp(x,y,op) AssertExpType(x,y,op,double,"%lf")
+
+// These work with strings
+#define AssertStrEq(x,y) AssertStrOp(x,y,EQ,==)
+#define AssertStrNe(x,y) AssertStrOp(x,y,NE,!=)
+#define AssertStrGt(x,y) AssertStrOp(x,y,GT,>)
+#define AssertStrGe(x,y) AssertStrOp(x,y,GE,>=)
+#define AssertStrLt(x,y) AssertStrOp(x,y,LT,<)
+#define AssertStrLe(x,y) AssertStrOp(x,y,LE,<=)
+#define AssertStrOp(x,y,opn,op) AssertFmt(strcmp(x,y) op 0, \
+ #x" "#opn" "#y" but "#x" is \"%s\" and "#y" is \"%s\"!",x,y)
+
+
+/** Keeps track of how many assertions have been made.
+ * This needs to be updated manually each time an assertion
+ * is made. The Zutest built-in assertion macros all
+ * update this variable properly.
+ */
+
+extern int zutest_assertions;
+
+
+/** A single test
+ *
+ * This routine is called to run the test. If it returns, the test
+ * succeeds. If zutest_fail() is called (either directly or indirectly
+ * via an Assert macro), then the test fails.
+ */
+typedef void (*zutest_proc)();
+
+
+/** A suite of tests
+ *
+ * A zutest_suite is simply a list of tests. Generally, each .c file
+ * in your project will include a test suite that ensures all the tests
+ * contained in the .c file are run. A suite is just a NULL-terminated
+ * list of tests.
+ */
+typedef zutest_proc *zutest_suite;
+
+
+/** A suite of test suites
+ *
+ * Zutests runs through each test suite in your project, running all the
+ * tests in each suite. A suite of suites is just a NULL-terminated list
+ * of suites. This is the topmost data structure used by zutest.
+ */
+typedef zutest_suite *zutest_suites;
+
+
+/** Fails the current test.
+ *
+ * This function may only be called from a ::zutest_proc.
+ *
+ * If none of the built-in Assert macros fit your fancy, you can do the
+ * check on your own and call zutest_fail in the event that it fails.
+ *
+ * Example:
+ *
+ * <pre>
+ * if(my_error) {
+ * zutest_fail(__FILE__, __LINE__, __func__, "Error Message %d", 1);
+ * }
+ * </pre>
+ *
+ * But, really, it's easier just to call the Fail() macro.
+ */
+
+void zutest_fail(const char *file, int line, const char *func,
+ const char *msg, ...);
+
+
+/** Runs all the tests in a suite. */
+void run_zutest_suite(const zutest_suite suite);
+/** Runs all the tests in all the suites passed in. */
+void run_zutest_suites(const zutest_suites suites);
+
+void print_zutest_results();
+
+
+/**
+ *
+ * Call this on the very first line of your application. If the user
+ * ran your program with the first arg of "--run-unit-tests", this will
+ * run the tests and exit. Otherwise your program will run as normal.
+ * If you would rather create a dedicated executable, just call
+ * run_zutest_suites() directly.
+ */
+
+void unit_test_check(int argc, char **argv, const zutest_suites suites);
+
+/**
+ *
+ * This runs all the unit tests supplied and then exits. Use this
+ * if you want to handle the arguments yourself.
+ */
+
+void run_unit_tests(const zutest_suites suites);
+
+
+/** Zutest's built-in test suite.
+ *
+ * This allows you to add the Zutest unit test suite to your application's
+ * test suites. This way, you can ensure that Zutest's unit tests pass
+ * before running your application's. This is for the especially pedantic. :)
+ *
+ * Unfortunately, there is one test that cannot be run if you do this:
+ * ensuring that zutest properly handles empty test suites.
+ * Other than this one test, adding zutest_tests
+ * to your application's test suite is equivalent to causing zutest to
+ * compile and run its unit tests as described in zutest.h.
+ */
+
+extern zutest_proc zutest_tests[];
+
+#endif