From: John Millaway <john43@users.sourceforge.net>
Date: Fri, 16 Aug 2002 19:54:47 +0000 (+0000)
Subject: Prelimary work on tables API.
X-Git-Tag: flex-2-5-15~20
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2bb924b502c41a23ab2bc8a29355f08746f8eb21;p=flex

Prelimary work on tables API.
---

diff --git a/flex.skl b/flex.skl
index f7a3aab..23393ca 100644
--- a/flex.skl
+++ b/flex.skl
@@ -13,6 +13,7 @@
 %#          (See gen.c, etc. for details.)
 %#   %c  -  Begin linkage code that should NOT appear in a ".h" file.
 %#   %e  -  End linkage code. %c and %e are used for building a header file.
+%#   %t  -  Toggle output of Tables API.
 %#
 %#   All control-lines EXCEPT comment lines ("%#") will be inserted into
 %#   the generated scanner as a C-style comment. This is to aid those who
@@ -742,6 +743,20 @@ static int yy_top_state YY_PARAMS(( YY_PROTO_ONLY_ARG ));
 %*
 #endif
 
+%t Tables API Structures and Prototypes
+
+/* Load tables from a file descriptor. */
+//int yytables_read YY_PARAMS((int fd, const char * tbl_name  YY_PROTO_LAST_ARG));
+
+/* Load tables from a file stream. */
+int yytables_fread YY_PARAMS((FILE* fp, const char * tbl_name  YY_PROTO_LAST_ARG));
+
+/* Load tables from given bytes. */
+//int yytables_bread YY_PARAMS((void* bytes, const char * tbl_name  YY_PROTO_LAST_ARG));
+
+
+%t End of Tables API Structures and Prototypes
+
 /* Default declaration of generated scanner - a define so the user can
  * easily add parameters.
  */
diff --git a/flex.texi b/flex.texi
index 7478c7e..f72f1e8 100644
--- a/flex.texi
+++ b/flex.texi
@@ -46,6 +46,7 @@ This edition of the @cite{flex Manual} documents @code{flex} version
 * Reentrant::                   
 * Lex and Posix::               
 * Memory Management::           
+* Serialized Tables::           
 * Diagnostics::                 
 * Limitations::                 
 * Bibliography::                
@@ -88,6 +89,13 @@ Memory Management
 * Overriding The Default Memory Management::  
 * A Note About yytext And Memory::  
 
+Serialized Tables
+
+* Creating Serialized Tables:: 
+* Loading Serialized Tables:: 
+* Freeing Serialized Tables::
+* Tables File Format:: 
+
 FAQ
 
 * When was flex born?::         
@@ -4570,6 +4578,202 @@ To prevent memory leaks from strdup'd yytext, you will have to track the memory
 somehow. Our experience has shown that a garbage collection mechanism or a
 pooled memory mechanism will save you a lot of grief when writing parsers.
 
+@node Serialized Tables
+@chapter Serialized Tables
+@cindex serialization
+@cindex memory, serialized tables
+
+A @code{flex} scanner has the ability to save the DFA tables to a file, and
+load them runtime when needed.  The motivation for this feature is to reduce
+the runtime memory footprint.  Traditionally, these tables are compiled into
+the scanner as C arrays, and are sometimes quite large.  Since the tables are
+compiled into the scanner, the memory used by the tables can never be freed.
+This is a waste of memory, especially if an application uses several scanners,
+but none of them at the same time.
+
+The serialization feature allows the tables to be loaded at runtime, before
+scanning begins. The tables may be discarded when scanning is finished.
+
+@menu
+* Creating Serialized Tables::
+* Loading Serialized Tables::
+* Freeing Serialized Tables::
+* Tables File Format::
+@end menu
+
+@node Creating Serialized Tables
+@section Creating Serialized Tables
+@cindex tables, creating serialized
+@cindex creating serialized tables
+
+how to create serialized tables
+
+@node Loading Serialized Tables
+@section Loading Serialized Tables
+@cindex tables, loading
+@cindex loading tables at runtime
+
+how to load serialized tables
+
+@node Freeing Serialized Tables
+@section Freeing Serialized Tables
+@cindex tables, freeing
+@cindex freeing tables
+
+how to free serialized tables
+
+@node Tables File Format
+@section Tables File Format
+@cindex tables, file format
+@cindex file format, serialized tables
+
+This section defines the file format of serialized @code{flex} tables.
+
+The tables format allows for one or more sets of tables to be
+specified, where each set corresponds to a given scanner. Scanners are
+indexed by name, as described below. The file format is as follows:
+
+@example
+@verbatim
+                 TABLE SET
+                +-------------------------------+
+        Header  | uint32          th_magic;     |
+                | uint32          th_hsize;     |
+                | uint32          th_ssize;     |
+                | uint8           th_version[4];|
+                | uint16          th_flags;     |
+                | char            th_name[];    |
+                | char            th_pad64[];   |
+                +-------------------------------+
+        Table 1 | uint16          t_id;         |
+                | uint16          t_flags;      |
+                | uint32          t_len[2];     |
+                | void            t_data[];     |
+                | char            t_pad64[];    |
+                +-------------------------------+
+        Table 2 |                               |
+           .    .                               .
+           .    .                               .
+           .    .                               .
+           .    .                               .
+        Table n |                               |
+                +-------------------------------+
+                 TABLE SET 2
+                      .
+                      .
+                      .
+                 TABLE SET N
+@end verbatim
+@end example
+
+The above diagram shows that a complete set consists of a header followed by
+multiple tables. Furthermore, multiple complete sets may be present in the same
+file, each set with its own header and tables. The sets are contiguous in the
+file. The only way to know if another set follows is to check the next four
+bytes for the magic number. The header and tables sections are padded to 64-bit
+boundaries. Below we describe each field in detail. This format does not
+specify how the scanner will expand the given data, i.e., data may be
+serialized as int16, but expanded to an int32 array.
+Remember, @emph{all integer values are in network byte order}. 
+
+@noindent
+Fields of a table header:
+
+@table @code
+@item th_magic
+Magic number, always 0xF13C57B1.
+@item th_hsize
+Size of this entire header, in bytes.
+@item th_ssize
+Size of this entire set, in bytes.
+@item th_version[4]
+Flex version numbers: major,minor,r1,r2.  Currently flex only specfies three
+revision levels, so 'r2' will simply be zero, e.g. 2.5.13.0
+@item th_flags
+Bit flags for this table set. Currently unused.
+@item th_name[]
+Contains the name of this table set. The default is @samp{yytables},
+and is prefixed accordingly. Must be NUL-terminated.
+@item th_pad64[]
+Zero or more NUL bytes, padding the entire header to the next 64-bit boundary.
+@end table
+
+@noindent
+Fields of a table:
+
+@table @code
+@item t_id
+Specifies the table identifier. Possible values are:
+@table @code
+@item YYT_ID_ACCEPT (0x01)
+@code{yy_accept}
+@item YYT_ID_BASE   (0x02)
+@code{yy_base}
+@item YYT_ID_CHK    (0x03)
+@code{yy_chk}
+@item YYT_ID_DEF    (0x04)
+@code{yy_def}
+@item YYT_ID_EC     (0x05)
+@code{yy_ec }
+@item YYT_ID_META   (0x06)
+@code{yy_meta}
+@item YYT_ID_NUL_TRANS (0x07)
+@code{yy_NUL_trans}
+@item YYT_ID_NXT (0x08)
+@code{yy_nxt}
+@item YYT_ID_RULE_CAN_MATCH_EOL (0x09)
+@code{yy_rule_can_match_eol}
+@item YYT_ID_START_STATE_LIST (0x0A)
+@code{yy_start_state_list}. This array is handled specially because it is an array of
+pointers to srtucts. See the @code{t_flags} field below.
+@item YYT_ID_TRANSITION (0x0B)
+@code{yy_transition}. This array is handled specially because it is an array of
+structs. See the the @code{t_len} field below.
+@end table
+
+@item t_flags
+Bit flags describing how to interpret the data in @code{t_data}.
+The data arrays are one-dimensional by default, but may be
+two dimensional as specified in the @code{t_len} field.
+
+@table @code
+@item YYT_DATA8 (0x01)
+The data is serialized as an array of type int8.
+@item YYT_DATA16 (0x02)
+The data is serialized as an array of type int16.
+@item YYT_DATA32 (0x04)
+The data is serialized as an array of type int32.
+@item YYT_PTRANS (0x08)
+The data is a list of indexes of entries in the expanded @code{yy_transition}
+array.  Each index should be expanded to a pointer to the corresponding entry
+in the @code{yy_transition} array. It is not guaranteed that the
+@code{yy_transition} array has already been seen, but we can still calculate
+the offsets.
+@end table
+
+@item t_len[2]
+Specifies the number of elements in the array.
+If @code{t_len[0]} is non-zero, then the data is a two-dimensional array.
+Otherwise, the data is a one-dimensional array. @code{t_len[0]} contains the
+number of elements in the higher dimensional array, and @code{t_len[1]} contains the number
+of elements in the lowest dimension. Conceptually, @code{t_data} is either 
+@code{int t_data[t_len[1]]}, or @code{int t_data[t_len[0]][t_len[1]]}.
+It is possible for both values of @code{t_len} to be zero, in which case
+@code{t_data} is a zero length array, and no data is loaded, i.e., this table
+is simply skipped. Note that if @code{t_id} equals
+@code{YYT_ID_TRANSITION}, then @code{t_len[1]} is the number of structs in 
+the array. Each struct contains two integer fields, with the @code{yy_verify} field first,
+followed by the @code{yy_nxt} field.
+
+@item t_data[]
+The table data. This array may be a one- or two-dimensional array, of type int8,
+int16, or int32, depending upon the values in the  @code{t_flags} and
+the @code{t_len} fields.
+@item t_pad64[]
+Zero or more NUL bytes, padding the table to the next 64-bit boundary.
+@end table
+
+
 @node Diagnostics
 @chapter Diagnostics
 
diff --git a/misc.c b/misc.c
index 2d5c9e7..ab09657 100644
--- a/misc.c
+++ b/misc.c
@@ -841,7 +841,11 @@ void skelout()
 				case '#':
 					/* a comment in the skel. ignore. */
 					break;
-					
+
+                case 't':
+                    /* %t - toggle tables api */
+                    break;
+
 				default:
 					flexfatal(
 					_( "bad line in skeleton file" ) );