From: Fletcher T. Penney Date: Thu, 19 Jan 2017 03:43:15 +0000 (-0500) Subject: CHANGED: Initial public commit X-Git-Tag: 0.1.0a^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=92ee649816032c49045164b72728a8d2670d5c06;p=multimarkdown CHANGED: Initial public commit --- diff --git a/.gitignore b/.gitignore index c06b87f..2ed54b1 100644 --- a/.gitignore +++ b/.gitignore @@ -19,5 +19,7 @@ profile DerivedData .idea/ +lemon/build + build-xcode build-xcode-debug diff --git a/CMakeLists.txt b/CMakeLists.txt index ddf769a..19d6bcb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,17 +5,17 @@ cmake_minimum_required (VERSION 2.6) # Define Our Project # ================== -set (My_Project_Title "C-Template") -set (My_Project_Description "Boilerplate c project with cmake support, CuTest unit testing, and more.") +set (My_Project_Title "MultiMarkdown 6") +set (My_Project_Description "Lightweight markup processor to produce HTML, LaTeX, and more.") set (My_Project_Author "Fletcher T. Penney") -set (My_Project_Revised_Date "2017-01-16") -set (My_Project_Version_Major 1) -set (My_Project_Version_Minor 0) -set (My_Project_Version_Patch 5) +set (My_Project_Revised_Date "2017-01-18") +set (My_Project_Version_Major 0) +set (My_Project_Version_Minor 1) +set (My_Project_Version_Patch 0a) set (My_Project_Version "${My_Project_Version_Major}.${My_Project_Version_Minor}.${My_Project_Version_Patch}") -set (My_Project_Copyright_Date "2015-2017") +set (My_Project_Copyright_Date "2016 - 2017") set (My_Project_Copyright "Copyright © ${My_Project_Copyright_Date} ${My_Project_Author}.") string(TOUPPER ${My_Project_Title} My_Project_Title_Caps ) @@ -24,6 +24,10 @@ string(REGEX REPLACE " " "_" My_Project_Title_Caps ${My_Project_Title_Caps} ) project (${My_Project_Title}) +# Enable this if you want to be warned about struct alignment +# add_definitions ("-Wpadded") + + # ========================= # Build Submodules (if any) # ========================= @@ -125,9 +129,9 @@ ENDMACRO(ADD_LINKED_FRAMEWORK) file(READ ${PROJECT_SOURCE_DIR}/LICENSE.txt My_Project_License) -string(REGEX REPLACE "\n" "\n\t" My_Project_License_Indent ${My_Project_License}) +string(REGEX REPLACE "\n" "\n\t" My_Project_License_Indented ${My_Project_License}) -string(REGEX REPLACE "\"" "\\\\\"" My_Project_License_Escaped ${My_Project_License_Indent}) +string(REGEX REPLACE "\"" "\\\\\"" My_Project_License_Escaped ${My_Project_License_Indented}) string(REGEX REPLACE "\n" "\\\\n\"\\\\\n\"" My_Project_License_Literal ${My_Project_License_Escaped}) @@ -160,11 +164,38 @@ configure_file ( # src_files are the primary files, and will be included in doxygen documentation set(src_files -# src/foo.c + src/argtable3.c + src/char.c + src/d_string.c + src/html.c + src/lexer.c + src/mmd.c + src/object_pool.c + src/parser.c + src/rng.c + src/scanners.c + src/stack.c + src/token.c + src/token_pairs.c + src/writer.c ) # Primary header files, also for doxygen documentation set(header_files + src/argtable3.h + src/d_string.h + src/char.h + src/html.h + src/lexer.h + src/libMultiMarkdown.h + src/mmd.h + src/object_pool.h + src/scanners.h + src/stack.h + src/token.h + src/token_pairs.h + src/uthash.h + src/writer.h ) # Public headers, will be installed in 'include' @@ -174,11 +205,9 @@ set(public_header_files # Utility source files will not be included in doxygen set(src_utility_files -# src/d_string.c ) set(header_utility_files -# src/d_string.h ${PROJECT_BINARY_DIR}/version.h ) @@ -230,10 +259,10 @@ else() add_test( test ${PROJECT_BINARY_DIR}/run_tests) # valgrind memory testing - find_program (MEMORYCHECK_COMMAND valgrind) - SET (MEMORYCHECK_COMMAND_OPTIONS --leak-check=full --error-exitcode=1) - - add_test( memory_test ${MEMORYCHECK_COMMAND} ${MEMORYCHECK_COMMAND_OPTIONS} ${PROJECT_BINARY_DIR}/run_tests) + # find_program (MEMORYCHECK_COMMAND valgrind) + # SET (MEMORYCHECK_COMMAND_OPTIONS --leak-check=full --error-exitcode=1) + # + # add_test( memory_test ${MEMORYCHECK_COMMAND} ${MEMORYCHECK_COMMAND_OPTIONS} ${PROJECT_BINARY_DIR}/run_tests) endif() endif() @@ -302,32 +331,33 @@ endif (WIN32) # ============== # Create a library? -# add_library(libFOO STATIC -# ${src_files} -# ${src_utility_files} -# ${header_files} -# ${header_utility_files} -# ) +add_library(libMultiMarkdown STATIC + ${src_files} + ${src_utility_files} + ${header_files} + ${header_utility_files} +) + +ADD_PUBLIC_HEADER(libMultiMarkdown src/libMultiMarkdown.h) +ADD_PUBLIC_HEADER(libMultiMarkdown src/d_string.h) # remove the extra "lib" from "liblibFOO" -# SET_TARGET_PROPERTIES(libFOO PROPERTIES PREFIX "") +SET_TARGET_PROPERTIES(libMultiMarkdown PROPERTIES PREFIX "") # Create a command-line app? # if (NOT DEFINED TEST) -# add_executable(main -# src/main.c -# src/d_string.c -# src/d_string.h -# ${header_files} -# ) + add_executable(multimarkdown + src/d_string.c + src/main.c + ) # # Link the library to the app? -# target_link_libraries(main libFOO) + target_link_libraries(multimarkdown libMultiMarkdown) # endif() # Xcode settings for fat binaries -# set_target_properties(libFOO PROPERTIES XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO") -# set_target_properties(main PROPERTIES XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO") +set_target_properties(libMultiMarkdown PROPERTIES XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO") +set_target_properties(multimarkdown PROPERTIES XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO") # ========================== @@ -371,3 +401,27 @@ endif (WIN32) set (CPACK_PACKAGE_INSTALL_DIRECTORY ${PROJECT}) include (CPack) + + +# ====================== +# Integration Test Suite +# ====================== + +enable_testing() + +function(ADD_MMD_TEST NAME FLAGS FOLDER EXTENSION) + add_test ( ${NAME} + ${PROJECT_SOURCE_DIR}/tests/MarkdownTest.pl + --Script=${CMAKE_CURRENT_BINARY_DIR}/multimarkdown + --testdir=${PROJECT_SOURCE_DIR}/tests/${FOLDER} + "--Flags=${FLAGS}" + --ext=${EXTENSION} + ) + +endfunction(ADD_MMD_TEST) + + +# MMD 6 +ADD_MMD_TEST(mmd-6 "" MMD6Tests html) + +ADD_MMD_TEST(mmd-6-compat "-c" MMD6Tests htmlc) diff --git a/LICENSE.txt b/LICENSE.txt index 0f416df..3a559ac 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -The `c-template` project is released under the MIT License. +The `MultiMarkdown 6` project is released under the MIT License.. GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: diff --git a/Makefile b/Makefile index 13802fc..5be146f 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ BUILD_DIR = build +DOC_DIR = documentation XCODE_BUILD_DIR = build-xcode XCODE_DEBUG_BUILD_DIR = build-xcode-debug @@ -18,7 +19,7 @@ zip: $(BUILD_DIR) .PHONY : debug debug: $(BUILD_DIR) cd $(BUILD_DIR); \ - cmake -DTEST=1 .. + cmake -DTEST=1 DCMAKE_BUILD_TYPE=DEBUG .. # analyze target enables use of clang's scan-build (if installed) # will then need to run 'scan-build make' to compile and analyze @@ -27,15 +28,20 @@ debug: $(BUILD_DIR) .PHONY : analyze analyze: $(BUILD_DIR) cd $(BUILD_DIR); \ - scan-build cmake -DTEST=1 .. + scan-build cmake -DTEST=1 DCMAKE_BUILD_TYPE=DEBUG .. + +.PHONY : map +map: + cd $(BUILD_DIR); \ + ../tools/enumsToPerl.pl ../src/libMultiMarkdown.h enumMap.txt; # Create xcode project # You can then build within XCode, or using the commands: # xcodebuild -configuration Debug # xcodebuild -configuration Release .PHONY : xcode -xcode: $(XCOD_BUILD_DIR) - cd $(XCOD_BUILD_DIR); \ +xcode: $(XCODE_BUILD_DIR) + cd $(XCODE_BUILD_DIR); \ cmake -G Xcode .. .PHONY : xcode-debug @@ -69,10 +75,11 @@ windows-zip-32: $(BUILD_DIR) # Build the documentation using doxygen .PHONY : documentation -documentation: $(BUILD_DIR) - cd $(BUILD_DIR); \ +documentation: + -mkdir $(DOC_DIR) 2>/dev/null; \ + cd $(DOC_DIR); \ cmake -DDOCUMENTATION=1 ..; cd ..; \ - doxygen build/doxygen.conf + doxygen $(DOC_DIR)/doxygen.conf .PHONY : gh-pages gh-pages: documentation diff --git a/README.md b/README.md index a3fa6a2..5ef2339 100644 --- a/README.md +++ b/README.md @@ -2,274 +2,516 @@ | | | | ---------- | ------------------------- | -| Title: | C-Template | +| Title: | MultiMarkdown 6 | | Author: | Fletcher T. Penney | | Date: | 2017-01-16 | -| Copyright: | Copyright © 2015-2017 Fletcher T. Penney. | -| Version: | 1.0.5 | +| Copyright: | Copyright © 2016 - 2017 Fletcher T. Penney. | +| Version: | 0.1.0a | -## Introduction ## +## An Announcement! ## -This template was created out of a desire to simplify some of the setup and -configuration that I was doing over and over each time I started a new project. -Additionally, I wanted to try to start encouraging some "better practices" -(though not necessarily "best practices"): +I would like to officially announce that MultiMarkdown version 6 is in public +alpha. It's finally at a point where it is usable, but there are quite a few +caveats. -1. [Test-driven development][tdd] -- My development of MultiMarkdown - focused on integration testing, but really had no unit testing to - speak of. Some newer projects I began working on were a bit math- - heavy, and ensuring that each piece works properly became even more - important. It was also nice to be able to actually develop code that - could do *something* (via the test suite), even though the project as - a whole was nowhere near complete.) To accomplish this, I include the - [CuTest] project to support writing tests for your code. +This post is a way for me to organize some of my thoughts, provide some +history for those who are interested, and to provide some tips and tricks from +my experiences for those who are working on their own products. -2. Use of the [cmake] build system. `cmake` is not perfect by any - means, but it does offer some very useful features and a means for - better integrating the compilation and packaging/installation aspects - of development. Rather than reinventing the wheel each time, this - setup incorporates basic `cmake` functionality to make it easy to - control how your project is compiled, and includes automated generation - of the test command. +But first, some background... -3. Templates -- `cmake` has a reasonable templating system, so that you - can define basic variables (e.g. author, project name, etc.) and allow - `cmake` to combine those elements to ensure consistency across source - code and README files. -4. Documentation -- some default setup to allow for [Doxygen]-generated - documentation. The generated `README.md` file is used as the main - page, and the source c/header files are included. Naturally, Doxygen - is a complex system, so you're responsible for figuring out how to - properly document your code. +### Why a New Version? ### -5. Simplify `git` a touch -- In my larger projects, I make heavy use of - git modules. One project may make use of 20-30 modules, which are - designed to be re-usable across other projects. I found that I was - spending too much time making sure that I had the latest version - of a module checked out, so I created two scripts to help me keep - my modules in line: `link_git_modules` and `update_git_modules`. - You run the `link` script once to ensure that your modules are properly - set up, and can then run the `update` script at any time to be sure - you've pulled the latest version. One advantage of this is that your - modules are set to a branch, rather than just a detached commit. It - may or may not work for your needs, but it saves me a bunch of time - and headache. +MultiMarkdown version 5 was released in November of 2015, but the codebase was +essentially the same as that of v4 -- and that was released in beta in April +of 2013. A few key things prompted work on a new version: +* Accuracy -- MMD v4 and v5 were the most accurate versions yet, and a lot of +effort went into finding and resolving various edge cases. However, it began +to feel like a game of whack-a-mole where new bugs would creep in every time I +fixed an old one. The PEG began to feel rather convoluted in spots, even +though it did allow for a precise (if not always accurate) specification of +the grammar. -[tdd]: https://en.wikipedia.org/wiki/Test-driven_development -[cmake]: http://www.cmake.org/ -[CuTest]: http://cutest.sourceforge.net -[Doxygen]: http://www.stack.nl/~dimitri/doxygen/ +* Performance -- "Back in the day" [peg-markdown] was one of the fastest +Markdown parsers around. MMD v3 was based on peg-markdown, and would leap- +frog with it in terms of performance. Then [CommonMark] was released, which +was a bit faster. Then a couple of years went by and CommonMark became *much* +faster -- in one of my test suites, MMD v 5.4.0 takes about 25 times longer to +process a long document than CommonMark 0.27.0. +[peg-markdown]: https://github.com/jgm/peg-markdown +[CommonMark]: http://commonmark.org/ -## How do I use it? ## +Last spring, I decided I wanted to rewrite MultiMarkdown from scratch, +building the parser myself rather than relying on a pre-rolled solution. (I +had been using [greg](https://github.com/ooc-lang/greg) to compile the PEG +into parser code. It worked well overall, but lacked some features I needed, +requiring a lot of workarounds.) -You can download the source from [github] and get to work. The file "IMPORTANT" -contains instructions on the various build commands you can use. +## First Attempt ## -I recommend using the following script to automatically create a new git repo, -pull in the default project template, and configure git-flow. You simply have -to rename your project directory from `new-project` to whatever you desire: +My first attempt started by hand-crafting a parser that scanned through the +document a line at a time, deciding what to do with each line as it found +them. I used regex parsers made with [re2c](http://re2c.org/index.html) to +help classify each line, and then a separate parser layer to process groups of +lines into blocks. Initially this approach worked well, and was really +efficient. But I quickly began to code my way into a dead-end -- the strategy +was not elegant enough to handle things like nested lists, etc. +One thing that did turn out well from the first attempt, however, was an +approach for handling `` and `` parsing. I've learned over the +years that this can be one of the hardest parts of coding accurately for +Markdown. There are many examples that are obvious to a person, but difficult +to properly "explain" how to parse to a computer. - #!/bin/sh +No solution is perfect, but I developed an approach that seems to accurately +handle a wide range of situations without a great deal of complexity: - git init new-project +1. Scan the documents for asterisks (`*`). Each one will be handled one at a +time. - cd new-project +2. Unlike brackets (`[` and `]`), an asterisk is "ambidextrous", in that it +may be able to open a matched pair of asterisks, close a pair, or both. For +example, in `foo *bar* foo`: - git remote add "template" https://github.com/fletcher/c-template.git + 1. The first asterisk can open a pair, but not close one. - git pull template master + 2. The second asterisk can close a pair, but not open one. - git flow init -d +3. So, once the asterisks have been identified, each has to be examined to +determine whether it can open/close/both. The algorithm is not that complex, +but I'll describe it in general terms. Check the code for more specifics. +This approach seems to work, but might still need some slight tweaking. In +the future, I'll codify this better in language rather than just in code. - git checkout develop + 1. If there is whitespace to the left of an asterisk, it can't close. + 2. If there is whitespace or punctuation to the right it can't open. -Using this approach, you can define your own `origin` remote if you like, but -the `template` remote can be used to update the core project files should any -improvements come about: + 3. "Runs" of asterisks, e.g. `**bar` are treated as a unit in terms of + looking left/right. - git checkout develop - git merge template master + 4. Asterisks inside a word are a bit trickier -- we look at the number of + asterisks before the word, the number in the current run, and the number + of asterisks after the word to determine which combinations, if any, are + permitted. -**NOTE**: `cmake` is a complex suite of utilities, and if you have trouble you -will need to get support elsewhere. If you find errors in this template, by -all means I want to hear about them and fix them, but this is just a basic -framework to get you started. In all likelihood, all but the most basic -projects will need some customization. +4. Once all asterisks have been tagged as able to open/close/both, we proceed +through them in order: + 1. When we encounter a tag that can close, we look to see if there is a + previous opener that has not been paired off. If so, pair the two and + remove the opener from the list of available asterisks. -[github]: https://github.com/fletcher/c-template + 2. When we encounter an opener, add it to the stack of available openers. + + 3. When encounter an asterisk that can do both, see if it can close an + existing opener. If not, then add it to the stack. + +5. After all tokens in the block have been paired, then we look for nesting +pairs of asterisks in order to create `` and `` sets. For +example, assume we have six asterisks wrapped around a word, three in front, +and three after. The asterisks are indicated with numbers: `123foo456`. We +proceed in the following manner: + 1. Based on the pairing algorithm above, these asterisks would be paired as + follows, with matching asterisks sharing numbers -- `123foo321`. + + 2. Moving forwards, we come to asterisk "1". It is followed by an + asterisk, so we check to see if they should be grouped as a ``. + Since the "1" asterisks are wrapped immediately outside the "2" asterisks, + they are joined together. More than two pairs can't be joined, so we now + get the following -- `112foo211`, where the "11" represents the opening + and closing of a ``, and the "2" represents a ``. + +6. When matching a pair, any unclosed openers that are on the stack are +removed, preventing pairs from "crossing" or "intersecting". Pairs can wrap +around each other, e.g. `[(foo)]`, but not intersect like `[(foo])`. In the +second case, the brackets would close, removing the `(` from the stack. + +7. This same approach is used in all tokens that are matched in pairs-- +`[foo]`, `(foo)`, `_foo_`, etc. There's slightly more to it, but once you +figure out how to assign opening/closing ability, the rest is easy. By using +a stack to track available openers, it can be performed efficiently. + +In my testing, this approach has worked quite well. It handles all the basic +scenarios I've thrown at it, and all of the "basic" and "devious" edge cases I +have thought of (some of these don't necessarily have a "right" answer -- but +v6 gives consistency answers that seem as reasonable as any others to me). +There are also three more edge cases I've come up can still stump it, and +ironically they are handled correctly by most implementations. They just +don't follow the rules above. I'll continue to work on this. + +In the end, I scrapped this effort, but kept the lessons learned in the token +pairing algorithm. -## Configuration ## +## Second Attempt ## + +I tried again this past Fall. This time, I approached the problem with lots +of reading. *Lots and lots* of reading -- tons of websites, computer science +journal articles, PhD theses, etc. Learned a lot about lexers, and a lot +about parsers, including hand-crafting vs using parser generators. In brief: + +1. I learned about the [Aho–Corasick algorithm], which is a great way to +efficiently search a string for multiple target strings at once. I used this +to create a custom lexer to identify tokens in a MultiMarkdown text document +(e.g. `*`, `[ `, `{++`, etc.). I learned a lot, and had a good time working +out the implementation. This code efficiently allowed me to break a string of +text into the tokens that mattered for Markdown parsing. + +2. However, in a few instances I really needed some features of regular +expressions to simplify more complex structures. After a quick bit of testing, +using re2c to create a tokenizer was just as efficient, and allowed me to +incorporate some regex functionality that simplified later parsing. I'll keep +the Aho-Corasick stuff around, and will probably experiment more with it +later. But I didn't need it for MMD now. `lexer.re` contains the source for +the tokenizer. -### CMakeLists.txt File ### +[Aho–Corasick algorithm]: https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm -First, you should update the project information under the "Define Our Project" -section, including the title, description, etc. This information will be used -to update the README, as well as to create the `version.h` file so that the -project can have access to its own version number. +I looked long and hard for a way to simplify the parsing algorithm to try and +"touch" each token only once. Ideally, the program could step through each +token, and decide when to create a new block, when to pair things together, +etc. But I'm not convinced it's possible. Since Markdown's grammar varies +based on context, it seems to work best when handled in distinct phases: + +1. Tokenize the string to identify key sections of text. This includes line +breaks, allowing the text to be examined one line at time. + +2. Join series of lines together into blocks, such as paragraphs, code blocks, +lists, etc. + +3. The tokens inside each block can then be paired together to create more +complex syntax such as links, strong, emphasis, etc. + +To handle the block parsing, I started off using the [Aho-Corasick] code to +handle my first attempt. I had actually implemented some basic regex +functionality, and used that to group lines together to create blocks. But +this quickly fell apart in the face of more complex structures such as +recursive lists. After a lot of searching, and *tons* more reading, I +ultimately decided to use a parser generator to handle the task of group lines +into blocks. `parser.y` has the source for this, and it is processed by the +[lemon](http://www.hwaci.com/sw/lemon/) parser generator to create the actual +code. + +I chose to do this because hand-crafting the block parser would be complex. +The end result would likely be difficult to read and understand, which would +make it difficult to update later on. Using the parser generator allows me to +write things out in a way that can more easily be understood by a person. In +all likelihood, the performance is probably as good as anything I could do +anyway, if not better. + +Because lemon is a LALR(1) parser, it does require a bit of thinking ahead +about how to create the grammar used. But so far, it has been able to handle +everything I have thrown at it. + + +## Optimization ## + +One of my goals for MMD 6 was performance. So I've paid attention to speed +along the way, and have tried to use a few tricks to keep things fast. Here +are some things I've learned along the way. In no particular order: + + +### Memory Allocation ### + +When parsing a long document, a *lot* of token structures are created. Each +one requires a small bit of memory to be allocated. In aggregate, that time +added up and slowed down performance. + +After reading for a bit, I ended up coming up with an approach that uses +larger chunks of memory. I allocate pools of of memory in large slabs for +smaller "objects"". For example, I allocate memory for 1024 tokens at a +single time, and then dole that memory out as needed. When the slab is empty, +a new one is allocated. This dramatically improved performance. + +When pairing tokens, I created a new stack for each block. I realized that an +empty stack didn't have any "leftover" cruft to interfere with re-use, so I +just used one for the entire document. Again a sizeable improvement in +performance from only allocating one object instead of many. When recursing +to a deeper level, the stack just gets deeper, but earlier levels aren't +modified. -You will then need to update the various groups in the "Source Files" section -so that Cmake will be able to determine which files are used to build your -project. For reasons that will become clear later, try to follow the -suggestions for the different groups of files. +Speaking of tokens, I realized that the average document contains a lot of +single spaces (there's one between every two words I have written, for +example.) The vast majority of the time, these single spaces have no effect +on the output of Markdown documents. I changed my whitespace token search to +only flag runs of 2 or more spaces, dramatically reducing the number of +tokens. This gives the benefit of needing fewer memory allocations, and also +reduces the number of tokens that need to be processed later on. The only +downside is remember to check for a single space character in a few instances +where it matters. -You then need to define your targets, such as a library, or executable, etc. -Obviously, this will depend on the needs of your project. You can also add -custom steps based on the Target OS (OS X, Windows, *nix, etc.). -You can use CPack to generate installers for your software. This can be -complex, and you will need to modify this section heavily. +### Proper input buffering ### -CuTest is used by default to provide unit testing (see below), but you -can also use CMake/CTest to provide integration testing. Again, this will -be up to you to configure. +When I first began last spring, I was amazed to see how much time was being +spent by MultiMarkdown simply reading the input file. Then I discovered it +was because I was reading it one character at a time. I switched to using a +buffered read approach and the time to read the file went to almost nothing. I +experimented with different buffer sizes, but they did not seem to make a +measurable difference. -### CuTest ### +### Output Buffering ### + +I experimented with different approaches to creating the output after parsing. +I tried printing directly to `stdout`, and even played with different +buffering settings. None of those seemed to work well, and all were slower +than using the `d_string` approach (formerly call `GString` in MMD 5). -[CuTest] provides a means to integrate unit testing with your C source code. -Once you get the hang of it, it's easy to use. +### Fast Searches ### -### Doxygen ### +After getting basic Markdown functionality complete, I discovered during +testing that the time required to parse a document grew exponentially as the +document grew longer. Performance was on par with CommonMark for shorter +documents, but fell increasingly behind in larger tests. Time profiling found +that the culprit was searching for link definitions when they didn't exist. +My first approach was to keep a stack of used link definitions, and to iterate +through them when necessary. In long documents, this performs very poorly. +More research and I ended up using +[uthash](http://troydhanson.github.io/uthash/). This allows me to search for +a link (or footnote, etc.) by "name" rather than searching through an array. +This allowed me to get MMD's performance back to O(n), taking roughly twice as +much time to process a document that is twice as long. -[Doxygen] is used to generate documentation from the source code itself. -Properly configuring your source for this is up to you. You can modify the -`doxygen.conf.in` template with your desired settings as desired, but most -of the basics are handled for you based on your CMake configuration. +### Efficient Utility Functions ### -### GitHub Pages Support ### +It is frequently necessary when parsing Markdown to check what sort of +character we are dealing with at a certain position -- a letter, whitespace, +punctuation, etc. I created a lookup table for this via `char_lookup.c` and +hard-coded it in `char.c`. These routines allow me to quickly, and +consistently, classify any byte within a document. This saved a lot of +programming time, and saved time tracking down bugs from handling things +slightly differently under different circumstances. I also suspect it +improved performance, but don't have the data to back it up. -The `configure-gh-pages` script sets up a `documentation` directory that is -linked to a `gh-pages` branch of the project. You can then run `make gh-pages` -to update the documentation in this directory. Commit and push to your origin, -and your projects gh-page is updated. +### Testing While Writing ### -### Makefile ### +I developed several chunks of code in parallel while creating MMD 6. The vast +majority of it was developed largely in a [test-driven development] approach. +The other code was largely created with extensive unit testing to accomplish +this. -The overall build process is controlled by the master `Makefile`. It provides -the following commands: +[test-driven development]: https://en.wikipedia.org/wiki/Test-driven_development - make - make release +MMD isn't particularly amenable to this approach at the small level, but +instead I relied more on integration testing with an ever-growing collection +of text files and the corresponding HTML files in the MMD 6 test suite. This +allowed me to ensure new features work properly and that old features aren't +broken. At this time, there are 29 text files in the test suite, and many +more to come. -Generate the CMake build files for use or distribution. Once complete you will -need to change to the `build` directory and run `make`, `make test`, and -`cpack` as desired. - make zip +### Other Lessons ### -Direct CPack to create a zip installer rather than a graphical installer. +Some things that didn't do me any good.... - make debug +I considered differences between using `malloc` and `calloc` when initializing +tokens. The time saved by using `malloc` was basically exactly offset by the +initial time required to initialize the token to default null values as +compared to using `calloc`. When trying `calloc` failed to help me out +(thinking that clearing a single slab in the object pool would be faster), I +stuck with `malloc` as it makes more sense to me in my workflow. -Generate build files for [CuTest] unit testing. In the `build` directory, -run `make`, then `make test`. +I read a bit about [struct padding] and reordered some of my structs. It was +until later that I discovered the `-Wpadded` option, and it's not clear +whether my changes modified anything. Since the structs were being padded +automatically, there was no noticeable performance change, and I didn't have +the tools to measure whether I could have improved memory usage at all. Not +sure this would be worth the effort -- much lower hanging fruit available. - make analyze +[struct padding]: http://www.catb.org/esr/structure-packing/ -If you have `clang` installed, this will generate debug build files with the -`scan-build` command. In the `build` directory, run `scan-build -V make` -to compile the software and view the static analysis results. - make xcode +## Differences in MultiMarkdown Itself ## -Build a project file for Xcode on OS X. +MultiMarkdown v6 is mostly about making a better MMD parser, but it will +likely involve a few changes to the MultiMarkdown language itself. - make windows - make windows-zip - make windows-32 - make windows-zip-32 -Use the MinGW software to cross-compile for Windows on a *nix machine. You can -specify the 32 bit option, and also the zip option as indicated. +1. I am thinking about removing Setext headers from the language. I almost +never use them, much preferring to use ATX style headers (`# foo #`). +Additionally, I have never liked the fact that Setext headers allow the +meaning of a line to be completely changed by the following line. It makes +the parsing slightly more difficult on a technical level (requiring some +backtracking at times). I'm not 100% certain on this, but right now I believe +it's the only Markdown feature that doesn't exist in MMD 6 yet. - make documentation +2. Whitespace is not allowed between the text brackets and label brackets in +reference links, images, footnotes, etc. For example `[foo] [bar]` will no +longer be the same as `[foo][bar]`. -Build the [Doxygen]-generated documentation. +3. Link and image titles can be quoted with `'foo'`, `"foo"`, or `(foo)`. - make clean +4. HTML elements are handled slightly differently. There is no longer a +`markdown="1"` feature. Instead, HTML elements that are on a line by +themselves will open an HTML block that will cause the rest of the "paragraph" +to be treated as HTML such that Markdown will not be parsed in side of it. +HTML block-level tags are even "stronger" at starting an HTML block. It is +not quite as complex as the approach used in CommonMark, but is similar under +most circumstances. -Clean out the `build` directory. Be sure to run this before running another -command. + For example, this would not be parsed: +
+ *foo* +
-## Git Submodules ## + But this would be: -Apparently, submodules are a rather controversial feature in git. For me, -however, they have proven invaluable. My most active projects depend on each -other, and the submodule feature allows me to easily keep everything up to -date. That said, however, I quickly realized that submodules don't work very -well using default commands. +
-The problem is that I want to always use the latest version of my submodules. -This is more easily accomplished when the submodule is set to the `master` -branch of the original repository, rather than a detached commit as happens -by default. In order to easily keep all submodules updated, there are two -scripts: + *foo* -1. `link_git_modules` -- this script is generally only run when the master -repository is first cloned, but can also be run after a new submodule is -added. It causes the submodules to automatically track the master branch. -If you need to modify this, there are instructions in the script itself -explaining how to modify it on a per submodule basis. Running this script -more than one time will not hurt anything. +
-2. `update_git_modules` -- this script simply causes each submodule to be -updated to the latest commit in the original repository. Again, running it -multiple times doesn't hurt anything. +5. I haven't worked a lot yet on the MMD-specific features, so there may be +more changes to come. One thing I do anticipate is that if fenced code blocks +stay, they will work slightly differently. Currently, an opening fence +doesn't mean anything unless there is a closing fence that follows it. Again, +this requires backtracking in the parser. I suspect that an opening fence +will definitely open a code block. If there is no closing fence, then the +rest of the document will remain inside the code block. This is the approach +used by CommonMark and it's a reasonable one, IMO. -## Source File Templates ## +## Where Does MultiMarkdown 6 Stand? ## -In the `templates` directory are two files, `template.c.in` and -`template.h.in`. These are used to create default source files that include -the project title, copyright, license, etc. They are also set up to include -some example information for [Doxygen] and [CuTest]. +### Features ### -## License ## +I *think* that all basic Markdown features have been implemented, except for +Setext headers, as mentioned above. Additionally, the following MultiMarkdown +features have been implemented: + +* Automatic cross-reference targets +* Basic Citation support +* CriticMarkup support +* Inline and reference footnotes +* Image and Link attributes (attributes can now be used with inline links as + well as reference links) +* Math support +* Smart quotes (support for languages other than english is not fully + implemented yet) +* Superscripts/subscripts + + +Things that are partially completed: + +* Citations -- still need: + * Syntax for "not cited" entries + * Output format + * HTML --> separate footnotes and citations? + * Locators required? +* CriticMarkup -- need to decide: + * How to handle CM stretches that include blank lines +* Fenced code blocks + + +Things yet to be completed: -The `c-template` project is released under the MIT License. +* Multiple blocks inside of reference footnotes +* Manually specified labels for headers +* Definition lists +* Abbreviations +* Metadata +* Glossaries +* Tables +* Table of Contents +* File Transclusion -GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: - https://github.com/fletcher/MultiMarkdown-4/ +### Accuracy ### -MMD 4 is released under both the MIT License and GPL. +MultiMarkdown v6 successfully parses the Markdown [syntax page], except for +the Setext header at the top. It passes the 29 test files currently in place. +There are a few ad +[syntax page]: https://daringfireball.net/projects/markdown/syntax -CuTest is released under the zlib/libpng license. See CuTest.c for the text -of the license. +### Performance ### -## The MIT License ## +Basic tests show that currently MMD 6 takes about 20-25% longer the CommonMark +0.27.0 to process long files (e.g. 0.2 MB). However, it is around 5% *faster* +than CommonMark when parsing a shorter file (27 kB) (measured by parsing the +same file 200 times over). This test suite is performed by using the Markdown +[syntax page], modified to avoid the use of the Setext header at the top. The +longer files tested are created by copying the same syntax page onto itself, +thereby doubling the length of the file with each iteration. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +The largest file I test is approximately 108 MB (4096 copies of the syntax +page). On my machine (2012 Mac mini with 2.3 GHz Intel Core i7, 16 GB RAM), +it takes approximately 4.4 seconds to parse with MMD 6 and 3.7 seconds with +CommonMark. MMD 6 processes approximately 25 MB/s on this test file. +CommonMark 0.27.0 gets about 29 MB/s on the same machine. -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. +There are some slight variations with the smaller test files (8-32 copies), +but overall the performance of both programs (MMD 6 and CommonMark) are +roughly linear as the test file gets bigger (double the file size and it takes +twice as long to parse, aka O(n)). + +Out of curiosity, I ran the same tests on the original Markdown.pl by Gruber +(v 1.0.2b8). It took approximately 178 seconds to parse 128 copies of the +file (3.4 MB) and was demonstrating quadratic performance characteristics +(double the file size and it takes 2^2 or 4 times longer to process, aka +O(n^2)). I didn't bother running it on larger versions of the test file. For +comparison, MMD 6 can process 128 copies in approximately 140 msec. + +Of note, the throughput speed drops when testing more complicated files +containing more advanced MultiMarkdown features, though it still seems to +maintain linear performance characteristics. A second test file is created by +concatenating all of the test suite files (including the Markdown syntax +file). In this case, MMD gets about 13 MB/s. CommonMark doesn't support +these additional features, so testing it with that file is not relevant. I +will work to see whether there are certain features in particular that are +more challenging and see whether they can be reworked to improve performance. + +As above, I have done some high level optimization of the parse strategy, but +I'm sure there's still a lot of room for further improvement to be made. +Suggestions welcome! + + +## License ## -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. diff --git a/lemon/CMakeLists.txt b/lemon/CMakeLists.txt new file mode 100644 index 0000000..87b42fc --- /dev/null +++ b/lemon/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required (VERSION 2.6) + +# Quick file to build lemon parser generator + +add_executable(lemon + lemon.c +) + +configure_file( + lempar.c ${PROJECT_BINARY_DIR}/lempar.c COPYONLY +) diff --git a/lemon/lemon.c b/lemon/lemon.c new file mode 100644 index 0000000..0fa3d63 --- /dev/null +++ b/lemon/lemon.c @@ -0,0 +1,5437 @@ +/* +** This file contains all sources (including headers) to the LEMON +** LALR(1) parser generator. The sources have been combined into a +** single file to make it easy to include LEMON in the source tree +** and Makefile of another program. +** +** The author of this program disclaims copyright. +*/ +#include +#include +#include +#include +#include +#include + +#define ISSPACE(X) isspace((unsigned char)(X)) +#define ISDIGIT(X) isdigit((unsigned char)(X)) +#define ISALNUM(X) isalnum((unsigned char)(X)) +#define ISALPHA(X) isalpha((unsigned char)(X)) +#define ISUPPER(X) isupper((unsigned char)(X)) +#define ISLOWER(X) islower((unsigned char)(X)) + + +#ifndef __WIN32__ +# if defined(_WIN32) || defined(WIN32) +# define __WIN32__ +# endif +#endif + +#ifdef __WIN32__ +#ifdef __cplusplus +extern "C" { +#endif +extern int access(const char *path, int mode); +#ifdef __cplusplus +} +#endif +#else +#include +#endif + +/* #define PRIVATE static */ +#define PRIVATE + +#ifdef TEST +#define MAXRHS 5 /* Set low to exercise exception code */ +#else +#define MAXRHS 1000 +#endif + +static int showPrecedenceConflict = 0; +static char *msort(char*,char**,int(*)(const char*,const char*)); + +/* +** Compilers are getting increasingly pedantic about type conversions +** as C evolves ever closer to Ada.... To work around the latest problems +** we have to define the following variant of strlen(). +*/ +#define lemonStrlen(X) ((int)strlen(X)) + +/* +** Compilers are starting to complain about the use of sprintf() and strcpy(), +** saying they are unsafe. So we define our own versions of those routines too. +** +** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and +** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). +** The third is a helper routine for vsnprintf() that adds texts to the end of a +** buffer, making sure the buffer is always zero-terminated. +** +** The string formatter is a minimal subset of stdlib sprintf() supporting only +** a few simply conversions: +** +** %d +** %s +** %.*s +** +*/ +static void lemon_addtext( + char *zBuf, /* The buffer to which text is added */ + int *pnUsed, /* Slots of the buffer used so far */ + const char *zIn, /* Text to add */ + int nIn, /* Bytes of text to add. -1 to use strlen() */ + int iWidth /* Field width. Negative to left justify */ +){ + if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){} + while( iWidth>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth--; } + if( nIn==0 ) return; + memcpy(&zBuf[*pnUsed], zIn, nIn); + *pnUsed += nIn; + while( (-iWidth)>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth++; } + zBuf[*pnUsed] = 0; +} +static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){ + int i, j, k, c; + int nUsed = 0; + const char *z; + char zTemp[50]; + str[0] = 0; + for(i=j=0; (c = zFormat[i])!=0; i++){ + if( c=='%' ){ + int iWidth = 0; + lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); + c = zFormat[++i]; + if( ISDIGIT(c) || (c=='-' && ISDIGIT(zFormat[i+1])) ){ + if( c=='-' ) i++; + while( ISDIGIT(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0'; + if( c=='-' ) iWidth = -iWidth; + c = zFormat[i]; + } + if( c=='d' ){ + int v = va_arg(ap, int); + if( v<0 ){ + lemon_addtext(str, &nUsed, "-", 1, iWidth); + v = -v; + }else if( v==0 ){ + lemon_addtext(str, &nUsed, "0", 1, iWidth); + } + k = 0; + while( v>0 ){ + k++; + zTemp[sizeof(zTemp)-k] = (v%10) + '0'; + v /= 10; + } + lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); + }else if( c=='s' ){ + z = va_arg(ap, const char*); + lemon_addtext(str, &nUsed, z, -1, iWidth); + }else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){ + i += 2; + k = va_arg(ap, int); + z = va_arg(ap, const char*); + lemon_addtext(str, &nUsed, z, k, iWidth); + }else if( c=='%' ){ + lemon_addtext(str, &nUsed, "%", 1, 0); + }else{ + fprintf(stderr, "illegal format\n"); + exit(1); + } + j = i+1; + } + } + lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); + return nUsed; +} +static int lemon_sprintf(char *str, const char *format, ...){ + va_list ap; + int rc; + va_start(ap, format); + rc = lemon_vsprintf(str, format, ap); + va_end(ap); + return rc; +} +static void lemon_strcpy(char *dest, const char *src){ + while( (*(dest++) = *(src++))!=0 ){} +} +static void lemon_strcat(char *dest, const char *src){ + while( *dest ) dest++; + lemon_strcpy(dest, src); +} + + +/* a few forward declarations... */ +struct rule; +struct lemon; +struct action; + +static struct action *Action_new(void); +static struct action *Action_sort(struct action *); + +/********** From the file "build.h" ************************************/ +void FindRulePrecedences(); +void FindFirstSets(); +void FindStates(); +void FindLinks(); +void FindFollowSets(); +void FindActions(); + +/********* From the file "configlist.h" *********************************/ +void Configlist_init(void); +struct config *Configlist_add(struct rule *, int); +struct config *Configlist_addbasis(struct rule *, int); +void Configlist_closure(struct lemon *); +void Configlist_sort(void); +void Configlist_sortbasis(void); +struct config *Configlist_return(void); +struct config *Configlist_basis(void); +void Configlist_eat(struct config *); +void Configlist_reset(void); + +/********* From the file "error.h" ***************************************/ +void ErrorMsg(const char *, int,const char *, ...); + +/****** From the file "option.h" ******************************************/ +enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, + OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; +struct s_options { + enum option_type type; + const char *label; + char *arg; + const char *message; +}; +int OptInit(char**,struct s_options*,FILE*); +int OptNArgs(void); +char *OptArg(int); +void OptErr(int); +void OptPrint(void); + +/******** From the file "parse.h" *****************************************/ +void Parse(struct lemon *lemp); + +/********* From the file "plink.h" ***************************************/ +struct plink *Plink_new(void); +void Plink_add(struct plink **, struct config *); +void Plink_copy(struct plink **, struct plink *); +void Plink_delete(struct plink *); + +/********** From the file "report.h" *************************************/ +void Reprint(struct lemon *); +void ReportOutput(struct lemon *); +void ReportTable(struct lemon *, int); +void ReportHeader(struct lemon *); +void CompressTables(struct lemon *); +void ResortStates(struct lemon *); + +/********** From the file "set.h" ****************************************/ +void SetSize(int); /* All sets will be of size N */ +char *SetNew(void); /* A new set for element 0..N */ +void SetFree(char*); /* Deallocate a set */ +int SetAdd(char*,int); /* Add element to a set */ +int SetUnion(char *,char *); /* A <- A U B, thru element N */ +#define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ + +/********** From the file "struct.h" *************************************/ +/* +** Principal data structures for the LEMON parser generator. +*/ + +typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; + +/* Symbols (terminals and nonterminals) of the grammar are stored +** in the following: */ +enum symbol_type { + TERMINAL, + NONTERMINAL, + MULTITERMINAL +}; +enum e_assoc { + LEFT, + RIGHT, + NONE, + UNK +}; +struct symbol { + const char *name; /* Name of the symbol */ + int index; /* Index number for this symbol */ + enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ + struct rule *rule; /* Linked list of rules of this (if an NT) */ + struct symbol *fallback; /* fallback token in case this token doesn't parse */ + int prec; /* Precedence if defined (-1 otherwise) */ + enum e_assoc assoc; /* Associativity if precedence is defined */ + char *firstset; /* First-set for all rules of this symbol */ + Boolean lambda; /* True if NT and can generate an empty string */ + int useCnt; /* Number of times used */ + char *destructor; /* Code which executes whenever this symbol is + ** popped from the stack during error processing */ + int destLineno; /* Line number for start of destructor. Set to + ** -1 for duplicate destructors. */ + char *datatype; /* The data type of information held by this + ** object. Only used if type==NONTERMINAL */ + int dtnum; /* The data type number. In the parser, the value + ** stack is a union. The .yy%d element of this + ** union is the correct data type for this object */ + /* The following fields are used by MULTITERMINALs only */ + int nsubsym; /* Number of constituent symbols in the MULTI */ + struct symbol **subsym; /* Array of constituent symbols */ +}; + +/* Each production rule in the grammar is stored in the following +** structure. */ +struct rule { + struct symbol *lhs; /* Left-hand side of the rule */ + const char *lhsalias; /* Alias for the LHS (NULL if none) */ + int lhsStart; /* True if left-hand side is the start symbol */ + int ruleline; /* Line number for the rule */ + int nrhs; /* Number of RHS symbols */ + struct symbol **rhs; /* The RHS symbols */ + const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ + int line; /* Line number at which code begins */ + const char *code; /* The code executed when this rule is reduced */ + const char *codePrefix; /* Setup code before code[] above */ + const char *codeSuffix; /* Breakdown code after code[] above */ + int noCode; /* True if this rule has no associated C code */ + int codeEmitted; /* True if the code has been emitted already */ + struct symbol *precsym; /* Precedence symbol for this rule */ + int index; /* An index number for this rule */ + int iRule; /* Rule number as used in the generated tables */ + Boolean canReduce; /* True if this rule is ever reduced */ + Boolean doesReduce; /* Reduce actions occur after optimization */ + struct rule *nextlhs; /* Next rule with the same LHS */ + struct rule *next; /* Next rule in the global list */ +}; + +/* A configuration is a production rule of the grammar together with +** a mark (dot) showing how much of that rule has been processed so far. +** Configurations also contain a follow-set which is a list of terminal +** symbols which are allowed to immediately follow the end of the rule. +** Every configuration is recorded as an instance of the following: */ +enum cfgstatus { + COMPLETE, + INCOMPLETE +}; +struct config { + struct rule *rp; /* The rule upon which the configuration is based */ + int dot; /* The parse point */ + char *fws; /* Follow-set for this configuration only */ + struct plink *fplp; /* Follow-set forward propagation links */ + struct plink *bplp; /* Follow-set backwards propagation links */ + struct state *stp; /* Pointer to state which contains this */ + enum cfgstatus status; /* used during followset and shift computations */ + struct config *next; /* Next configuration in the state */ + struct config *bp; /* The next basis configuration */ +}; + +enum e_action { + SHIFT, + ACCEPT, + REDUCE, + ERROR, + SSCONFLICT, /* A shift/shift conflict */ + SRCONFLICT, /* Was a reduce, but part of a conflict */ + RRCONFLICT, /* Was a reduce, but part of a conflict */ + SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ + RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ + NOT_USED, /* Deleted by compression */ + SHIFTREDUCE /* Shift first, then reduce */ +}; + +/* Every shift or reduce operation is stored as one of the following */ +struct action { + struct symbol *sp; /* The look-ahead symbol */ + enum e_action type; + union { + struct state *stp; /* The new state, if a shift */ + struct rule *rp; /* The rule, if a reduce */ + } x; + struct symbol *spOpt; /* SHIFTREDUCE optimization to this symbol */ + struct action *next; /* Next action for this state */ + struct action *collide; /* Next action with the same hash */ +}; + +/* Each state of the generated parser's finite state machine +** is encoded as an instance of the following structure. */ +struct state { + struct config *bp; /* The basis configurations for this state */ + struct config *cfp; /* All configurations in this set */ + int statenum; /* Sequential number for this state */ + struct action *ap; /* List of actions for this state */ + int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ + int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ + int iDfltReduce; /* Default action is to REDUCE by this rule */ + struct rule *pDfltReduce;/* The default REDUCE rule. */ + int autoReduce; /* True if this is an auto-reduce state */ +}; +#define NO_OFFSET (-2147483647) + +/* A followset propagation link indicates that the contents of one +** configuration followset should be propagated to another whenever +** the first changes. */ +struct plink { + struct config *cfp; /* The configuration to which linked */ + struct plink *next; /* The next propagate link */ +}; + +/* The state vector for the entire parser generator is recorded as +** follows. (LEMON uses no global variables and makes little use of +** static variables. Fields in the following structure can be thought +** of as begin global variables in the program.) */ +struct lemon { + struct state **sorted; /* Table of states sorted by state number */ + struct rule *rule; /* List of all rules */ + struct rule *startRule; /* First rule */ + int nstate; /* Number of states */ + int nxstate; /* nstate with tail degenerate states removed */ + int nrule; /* Number of rules */ + int nsymbol; /* Number of terminal and nonterminal symbols */ + int nterminal; /* Number of terminal symbols */ + struct symbol **symbols; /* Sorted array of pointers to symbols */ + int errorcnt; /* Number of errors */ + struct symbol *errsym; /* The error symbol */ + struct symbol *wildcard; /* Token that matches anything */ + char *name; /* Name of the generated parser */ + char *arg; /* Declaration of the 3th argument to parser */ + char *tokentype; /* Type of terminal symbols in the parser stack */ + char *vartype; /* The default type of non-terminal symbols */ + char *start; /* Name of the start symbol for the grammar */ + char *stacksize; /* Size of the parser stack */ + char *include; /* Code to put at the start of the C file */ + char *error; /* Code to execute when an error is seen */ + char *overflow; /* Code to execute on a stack overflow */ + char *failure; /* Code to execute on parser failure */ + char *accept; /* Code to execute when the parser excepts */ + char *extracode; /* Code appended to the generated file */ + char *tokendest; /* Code to execute to destroy token data */ + char *vardest; /* Code for the default non-terminal destructor */ + char *filename; /* Name of the input file */ + char *outname; /* Name of the current output file */ + char *tokenprefix; /* A prefix added to token names in the .h file */ + int nconflict; /* Number of parsing conflicts */ + int nactiontab; /* Number of entries in the yy_action[] table */ + int tablesize; /* Total table size of all tables in bytes */ + int basisflag; /* Print only basis configurations */ + int has_fallback; /* True if any %fallback is seen in the grammar */ + int nolinenosflag; /* True if #line statements should not be printed */ + char *argv0; /* Name of the program */ +}; + +#define MemoryCheck(X) if((X)==0){ \ + extern void memory_error(); \ + memory_error(); \ +} + +/**************** From the file "table.h" *********************************/ +/* +** All code in this file has been automatically generated +** from a specification in the file +** "table.q" +** by the associative array code building program "aagen". +** Do not edit this file! Instead, edit the specification +** file, then rerun aagen. +*/ +/* +** Code for processing tables in the LEMON parser generator. +*/ +/* Routines for handling a strings */ + +const char *Strsafe(const char *); + +void Strsafe_init(void); +int Strsafe_insert(const char *); +const char *Strsafe_find(const char *); + +/* Routines for handling symbols of the grammar */ + +struct symbol *Symbol_new(const char *); +int Symbolcmpp(const void *, const void *); +void Symbol_init(void); +int Symbol_insert(struct symbol *, const char *); +struct symbol *Symbol_find(const char *); +struct symbol *Symbol_Nth(int); +int Symbol_count(void); +struct symbol **Symbol_arrayof(void); + +/* Routines to manage the state table */ + +int Configcmp(const char *, const char *); +struct state *State_new(void); +void State_init(void); +int State_insert(struct state *, struct config *); +struct state *State_find(struct config *); +struct state **State_arrayof(/* */); + +/* Routines used for efficiency in Configlist_add */ + +void Configtable_init(void); +int Configtable_insert(struct config *); +struct config *Configtable_find(struct config *); +void Configtable_clear(int(*)(struct config *)); + +/****************** From the file "action.c" *******************************/ +/* +** Routines processing parser actions in the LEMON parser generator. +*/ + +/* Allocate a new parser action */ +static struct action *Action_new(void){ + static struct action *freelist = 0; + struct action *newaction; + + if( freelist==0 ){ + int i; + int amt = 100; + freelist = (struct action *)calloc(amt, sizeof(struct action)); + if( freelist==0 ){ + fprintf(stderr,"Unable to allocate memory for a new parser action."); + exit(1); + } + for(i=0; inext; + return newaction; +} + +/* Compare two actions for sorting purposes. Return negative, zero, or +** positive if the first action is less than, equal to, or greater than +** the first +*/ +static int actioncmp( + struct action *ap1, + struct action *ap2 +){ + int rc; + rc = ap1->sp->index - ap2->sp->index; + if( rc==0 ){ + rc = (int)ap1->type - (int)ap2->type; + } + if( rc==0 && (ap1->type==REDUCE || ap1->type==SHIFTREDUCE) ){ + rc = ap1->x.rp->index - ap2->x.rp->index; + } + if( rc==0 ){ + rc = (int) (ap2 - ap1); + } + return rc; +} + +/* Sort parser actions */ +static struct action *Action_sort( + struct action *ap +){ + ap = (struct action *)msort((char *)ap,(char **)&ap->next, + (int(*)(const char*,const char*))actioncmp); + return ap; +} + +void Action_add( + struct action **app, + enum e_action type, + struct symbol *sp, + char *arg +){ + struct action *newaction; + newaction = Action_new(); + newaction->next = *app; + *app = newaction; + newaction->type = type; + newaction->sp = sp; + newaction->spOpt = 0; + if( type==SHIFT ){ + newaction->x.stp = (struct state *)arg; + }else{ + newaction->x.rp = (struct rule *)arg; + } +} +/********************** New code to implement the "acttab" module ***********/ +/* +** This module implements routines use to construct the yy_action[] table. +*/ + +/* +** The state of the yy_action table under construction is an instance of +** the following structure. +** +** The yy_action table maps the pair (state_number, lookahead) into an +** action_number. The table is an array of integers pairs. The state_number +** determines an initial offset into the yy_action array. The lookahead +** value is then added to this initial offset to get an index X into the +** yy_action array. If the aAction[X].lookahead equals the value of the +** of the lookahead input, then the value of the action_number output is +** aAction[X].action. If the lookaheads do not match then the +** default action for the state_number is returned. +** +** All actions associated with a single state_number are first entered +** into aLookahead[] using multiple calls to acttab_action(). Then the +** actions for that single state_number are placed into the aAction[] +** array with a single call to acttab_insert(). The acttab_insert() call +** also resets the aLookahead[] array in preparation for the next +** state number. +*/ +struct lookahead_action { + int lookahead; /* Value of the lookahead token */ + int action; /* Action to take on the given lookahead */ +}; +typedef struct acttab acttab; +struct acttab { + int nAction; /* Number of used slots in aAction[] */ + int nActionAlloc; /* Slots allocated for aAction[] */ + struct lookahead_action + *aAction, /* The yy_action[] table under construction */ + *aLookahead; /* A single new transaction set */ + int mnLookahead; /* Minimum aLookahead[].lookahead */ + int mnAction; /* Action associated with mnLookahead */ + int mxLookahead; /* Maximum aLookahead[].lookahead */ + int nLookahead; /* Used slots in aLookahead[] */ + int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ +}; + +/* Return the number of entries in the yy_action table */ +#define acttab_size(X) ((X)->nAction) + +/* The value for the N-th entry in yy_action */ +#define acttab_yyaction(X,N) ((X)->aAction[N].action) + +/* The value for the N-th entry in yy_lookahead */ +#define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) + +/* Free all memory associated with the given acttab */ +void acttab_free(acttab *p){ + free( p->aAction ); + free( p->aLookahead ); + free( p ); +} + +/* Allocate a new acttab structure */ +acttab *acttab_alloc(void){ + acttab *p = (acttab *) calloc( 1, sizeof(*p) ); + if( p==0 ){ + fprintf(stderr,"Unable to allocate memory for a new acttab."); + exit(1); + } + memset(p, 0, sizeof(*p)); + return p; +} + +/* Add a new action to the current transaction set. +** +** This routine is called once for each lookahead for a particular +** state. +*/ +void acttab_action(acttab *p, int lookahead, int action){ + if( p->nLookahead>=p->nLookaheadAlloc ){ + p->nLookaheadAlloc += 25; + p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, + sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); + if( p->aLookahead==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + } + if( p->nLookahead==0 ){ + p->mxLookahead = lookahead; + p->mnLookahead = lookahead; + p->mnAction = action; + }else{ + if( p->mxLookaheadmxLookahead = lookahead; + if( p->mnLookahead>lookahead ){ + p->mnLookahead = lookahead; + p->mnAction = action; + } + } + p->aLookahead[p->nLookahead].lookahead = lookahead; + p->aLookahead[p->nLookahead].action = action; + p->nLookahead++; +} + +/* +** Add the transaction set built up with prior calls to acttab_action() +** into the current action table. Then reset the transaction set back +** to an empty set in preparation for a new round of acttab_action() calls. +** +** Return the offset into the action table of the new transaction. +*/ +int acttab_insert(acttab *p){ + int i, j, k, n; + assert( p->nLookahead>0 ); + + /* Make sure we have enough space to hold the expanded action table + ** in the worst case. The worst case occurs if the transaction set + ** must be appended to the current action table + */ + n = p->mxLookahead + 1; + if( p->nAction + n >= p->nActionAlloc ){ + int oldAlloc = p->nActionAlloc; + p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; + p->aAction = (struct lookahead_action *) realloc( p->aAction, + sizeof(p->aAction[0])*p->nActionAlloc); + if( p->aAction==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + for(i=oldAlloc; inActionAlloc; i++){ + p->aAction[i].lookahead = -1; + p->aAction[i].action = -1; + } + } + + /* Scan the existing action table looking for an offset that is a + ** duplicate of the current transaction set. Fall out of the loop + ** if and when the duplicate is found. + ** + ** i is the index in p->aAction[] where p->mnLookahead is inserted. + */ + for(i=p->nAction-1; i>=0; i--){ + if( p->aAction[i].lookahead==p->mnLookahead ){ + /* All lookaheads and actions in the aLookahead[] transaction + ** must match against the candidate aAction[i] entry. */ + if( p->aAction[i].action!=p->mnAction ) continue; + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 || k>=p->nAction ) break; + if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; + if( p->aLookahead[j].action!=p->aAction[k].action ) break; + } + if( jnLookahead ) continue; + + /* No possible lookahead value that is not in the aLookahead[] + ** transaction is allowed to match aAction[i] */ + n = 0; + for(j=0; jnAction; j++){ + if( p->aAction[j].lookahead<0 ) continue; + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; + } + if( n==p->nLookahead ){ + break; /* An exact match is found at offset i */ + } + } + } + + /* If no existing offsets exactly match the current transaction, find an + ** an empty offset in the aAction[] table in which we can add the + ** aLookahead[] transaction. + */ + if( i<0 ){ + /* Look for holes in the aAction[] table that fit the current + ** aLookahead[] transaction. Leave i set to the offset of the hole. + ** If no holes are found, i is left at p->nAction, which means the + ** transaction will be appended. */ + for(i=0; inActionAlloc - p->mxLookahead; i++){ + if( p->aAction[i].lookahead<0 ){ + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 ) break; + if( p->aAction[k].lookahead>=0 ) break; + } + if( jnLookahead ) continue; + for(j=0; jnAction; j++){ + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; + } + if( j==p->nAction ){ + break; /* Fits in empty slots */ + } + } + } + } + /* Insert transaction set at index i. */ + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + p->aAction[k] = p->aLookahead[j]; + if( k>=p->nAction ) p->nAction = k+1; + } + p->nLookahead = 0; + + /* Return the offset that is added to the lookahead in order to get the + ** index into yy_action of the action */ + return i - p->mnLookahead; +} + +/********************** From the file "build.c" *****************************/ +/* +** Routines to construction the finite state machine for the LEMON +** parser generator. +*/ + +/* Find a precedence symbol of every rule in the grammar. +** +** Those rules which have a precedence symbol coded in the input +** grammar using the "[symbol]" construct will already have the +** rp->precsym field filled. Other rules take as their precedence +** symbol the first RHS symbol with a defined precedence. If there +** are not RHS symbols with a defined precedence, the precedence +** symbol field is left blank. +*/ +void FindRulePrecedences(struct lemon *xp) +{ + struct rule *rp; + for(rp=xp->rule; rp; rp=rp->next){ + if( rp->precsym==0 ){ + int i, j; + for(i=0; inrhs && rp->precsym==0; i++){ + struct symbol *sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + for(j=0; jnsubsym; j++){ + if( sp->subsym[j]->prec>=0 ){ + rp->precsym = sp->subsym[j]; + break; + } + } + }else if( sp->prec>=0 ){ + rp->precsym = rp->rhs[i]; + } + } + } + } + return; +} + +/* Find all nonterminals which will generate the empty string. +** Then go back and compute the first sets of every nonterminal. +** The first set is the set of all terminal symbols which can begin +** a string generated by that nonterminal. +*/ +void FindFirstSets(struct lemon *lemp) +{ + int i, j; + struct rule *rp; + int progress; + + for(i=0; insymbol; i++){ + lemp->symbols[i]->lambda = LEMON_FALSE; + } + for(i=lemp->nterminal; insymbol; i++){ + lemp->symbols[i]->firstset = SetNew(); + } + + /* First compute all lambdas */ + do{ + progress = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->lhs->lambda ) continue; + for(i=0; inrhs; i++){ + struct symbol *sp = rp->rhs[i]; + assert( sp->type==NONTERMINAL || sp->lambda==LEMON_FALSE ); + if( sp->lambda==LEMON_FALSE ) break; + } + if( i==rp->nrhs ){ + rp->lhs->lambda = LEMON_TRUE; + progress = 1; + } + } + }while( progress ); + + /* Now compute all first sets */ + do{ + struct symbol *s1, *s2; + progress = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + s1 = rp->lhs; + for(i=0; inrhs; i++){ + s2 = rp->rhs[i]; + if( s2->type==TERMINAL ){ + progress += SetAdd(s1->firstset,s2->index); + break; + }else if( s2->type==MULTITERMINAL ){ + for(j=0; jnsubsym; j++){ + progress += SetAdd(s1->firstset,s2->subsym[j]->index); + } + break; + }else if( s1==s2 ){ + if( s1->lambda==LEMON_FALSE ) break; + }else{ + progress += SetUnion(s1->firstset,s2->firstset); + if( s2->lambda==LEMON_FALSE ) break; + } + } + } + }while( progress ); + return; +} + +/* Compute all LR(0) states for the grammar. Links +** are added to between some states so that the LR(1) follow sets +** can be computed later. +*/ +PRIVATE struct state *getstate(struct lemon *); /* forward reference */ +void FindStates(struct lemon *lemp) +{ + struct symbol *sp; + struct rule *rp; + + Configlist_init(); + + /* Find the start symbol */ + if( lemp->start ){ + sp = Symbol_find(lemp->start); + if( sp==0 ){ + ErrorMsg(lemp->filename,0, +"The specified start symbol \"%s\" is not \ +in a nonterminal of the grammar. \"%s\" will be used as the start \ +symbol instead.",lemp->start,lemp->startRule->lhs->name); + lemp->errorcnt++; + sp = lemp->startRule->lhs; + } + }else{ + sp = lemp->startRule->lhs; + } + + /* Make sure the start symbol doesn't occur on the right-hand side of + ** any rule. Report an error if it does. (YACC would generate a new + ** start symbol in this case.) */ + for(rp=lemp->rule; rp; rp=rp->next){ + int i; + for(i=0; inrhs; i++){ + if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ + ErrorMsg(lemp->filename,0, +"The start symbol \"%s\" occurs on the \ +right-hand side of a rule. This will result in a parser which \ +does not work properly.",sp->name); + lemp->errorcnt++; + } + } + } + + /* The basis configuration set for the first state + ** is all rules which have the start symbol as their + ** left-hand side */ + for(rp=sp->rule; rp; rp=rp->nextlhs){ + struct config *newcfp; + rp->lhsStart = 1; + newcfp = Configlist_addbasis(rp,0); + SetAdd(newcfp->fws,0); + } + + /* Compute the first state. All other states will be + ** computed automatically during the computation of the first one. + ** The returned pointer to the first state is not used. */ + (void)getstate(lemp); + return; +} + +/* Return a pointer to a state which is described by the configuration +** list which has been built from calls to Configlist_add. +*/ +PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ +PRIVATE struct state *getstate(struct lemon *lemp) +{ + struct config *cfp, *bp; + struct state *stp; + + /* Extract the sorted basis of the new state. The basis was constructed + ** by prior calls to "Configlist_addbasis()". */ + Configlist_sortbasis(); + bp = Configlist_basis(); + + /* Get a state with the same basis */ + stp = State_find(bp); + if( stp ){ + /* A state with the same basis already exists! Copy all the follow-set + ** propagation links from the state under construction into the + ** preexisting state, then return a pointer to the preexisting state */ + struct config *x, *y; + for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ + Plink_copy(&y->bplp,x->bplp); + Plink_delete(x->fplp); + x->fplp = x->bplp = 0; + } + cfp = Configlist_return(); + Configlist_eat(cfp); + }else{ + /* This really is a new state. Construct all the details */ + Configlist_closure(lemp); /* Compute the configuration closure */ + Configlist_sort(); /* Sort the configuration closure */ + cfp = Configlist_return(); /* Get a pointer to the config list */ + stp = State_new(); /* A new state structure */ + MemoryCheck(stp); + stp->bp = bp; /* Remember the configuration basis */ + stp->cfp = cfp; /* Remember the configuration closure */ + stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ + stp->ap = 0; /* No actions, yet. */ + State_insert(stp,stp->bp); /* Add to the state table */ + buildshifts(lemp,stp); /* Recursively compute successor states */ + } + return stp; +} + +/* +** Return true if two symbols are the same. +*/ +int same_symbol(struct symbol *a, struct symbol *b) +{ + int i; + if( a==b ) return 1; + if( a->type!=MULTITERMINAL ) return 0; + if( b->type!=MULTITERMINAL ) return 0; + if( a->nsubsym!=b->nsubsym ) return 0; + for(i=0; insubsym; i++){ + if( a->subsym[i]!=b->subsym[i] ) return 0; + } + return 1; +} + +/* Construct all successor states to the given state. A "successor" +** state is any state which can be reached by a shift action. +*/ +PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) +{ + struct config *cfp; /* For looping thru the config closure of "stp" */ + struct config *bcfp; /* For the inner loop on config closure of "stp" */ + struct config *newcfg; /* */ + struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ + struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ + struct state *newstp; /* A pointer to a successor state */ + + /* Each configuration becomes complete after it contibutes to a successor + ** state. Initially, all configurations are incomplete */ + for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; + + /* Loop through all configurations of the state "stp" */ + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ + if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ + Configlist_reset(); /* Reset the new config set */ + sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ + + /* For every configuration in the state "stp" which has the symbol "sp" + ** following its dot, add the same configuration to the basis set under + ** construction but with the dot shifted one symbol to the right. */ + for(bcfp=cfp; bcfp; bcfp=bcfp->next){ + if( bcfp->status==COMPLETE ) continue; /* Already used */ + if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ + bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ + if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ + bcfp->status = COMPLETE; /* Mark this config as used */ + newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); + Plink_add(&newcfg->bplp,bcfp); + } + + /* Get a pointer to the state described by the basis configuration set + ** constructed in the preceding loop */ + newstp = getstate(lemp); + + /* The state "newstp" is reached from the state "stp" by a shift action + ** on the symbol "sp" */ + if( sp->type==MULTITERMINAL ){ + int i; + for(i=0; insubsym; i++){ + Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); + } + }else{ + Action_add(&stp->ap,SHIFT,sp,(char *)newstp); + } + } +} + +/* +** Construct the propagation links +*/ +void FindLinks(struct lemon *lemp) +{ + int i; + struct config *cfp, *other; + struct state *stp; + struct plink *plp; + + /* Housekeeping detail: + ** Add to every propagate link a pointer back to the state to + ** which the link is attached. */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + cfp->stp = stp; + } + } + + /* Convert all backlinks into forward links. Only the forward + ** links are used in the follow-set computation. */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + for(plp=cfp->bplp; plp; plp=plp->next){ + other = plp->cfp; + Plink_add(&other->fplp,cfp); + } + } + } +} + +/* Compute all followsets. +** +** A followset is the set of all symbols which can come immediately +** after a configuration. +*/ +void FindFollowSets(struct lemon *lemp) +{ + int i; + struct config *cfp; + struct plink *plp; + int progress; + int change; + + for(i=0; instate; i++){ + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ + cfp->status = INCOMPLETE; + } + } + + do{ + progress = 0; + for(i=0; instate; i++){ + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ + if( cfp->status==COMPLETE ) continue; + for(plp=cfp->fplp; plp; plp=plp->next){ + change = SetUnion(plp->cfp->fws,cfp->fws); + if( change ){ + plp->cfp->status = INCOMPLETE; + progress = 1; + } + } + cfp->status = COMPLETE; + } + } + }while( progress ); +} + +static int resolve_conflict(struct action *,struct action *); + +/* Compute the reduce actions, and resolve conflicts. +*/ +void FindActions(struct lemon *lemp) +{ + int i,j; + struct config *cfp; + struct state *stp; + struct symbol *sp; + struct rule *rp; + + /* Add all of the reduce actions + ** A reduce action is added for each element of the followset of + ** a configuration which has its dot at the extreme right. + */ + for(i=0; instate; i++){ /* Loop over all states */ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ + if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ + for(j=0; jnterminal; j++){ + if( SetFind(cfp->fws,j) ){ + /* Add a reduce action to the state "stp" which will reduce by the + ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ + Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); + } + } + } + } + } + + /* Add the accepting token */ + if( lemp->start ){ + sp = Symbol_find(lemp->start); + if( sp==0 ) sp = lemp->startRule->lhs; + }else{ + sp = lemp->startRule->lhs; + } + /* Add to the first state (which is always the starting state of the + ** finite state machine) an action to ACCEPT if the lookahead is the + ** start nonterminal. */ + Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); + + /* Resolve conflicts */ + for(i=0; instate; i++){ + struct action *ap, *nap; + stp = lemp->sorted[i]; + /* assert( stp->ap ); */ + stp->ap = Action_sort(stp->ap); + for(ap=stp->ap; ap && ap->next; ap=ap->next){ + for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ + /* The two actions "ap" and "nap" have the same lookahead. + ** Figure out which one should be used */ + lemp->nconflict += resolve_conflict(ap,nap); + } + } + } + + /* Report an error for each rule that can never be reduced. */ + for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; + for(i=0; instate; i++){ + struct action *ap; + for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ + if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; + } + } + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->canReduce ) continue; + ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); + lemp->errorcnt++; + } +} + +/* Resolve a conflict between the two given actions. If the +** conflict can't be resolved, return non-zero. +** +** NO LONGER TRUE: +** To resolve a conflict, first look to see if either action +** is on an error rule. In that case, take the action which +** is not associated with the error rule. If neither or both +** actions are associated with an error rule, then try to +** use precedence to resolve the conflict. +** +** If either action is a SHIFT, then it must be apx. This +** function won't work if apx->type==REDUCE and apy->type==SHIFT. +*/ +static int resolve_conflict( + struct action *apx, + struct action *apy +){ + struct symbol *spx, *spy; + int errcnt = 0; + assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ + if( apx->type==SHIFT && apy->type==SHIFT ){ + apy->type = SSCONFLICT; + errcnt++; + } + if( apx->type==SHIFT && apy->type==REDUCE ){ + spx = apx->sp; + spy = apy->x.rp->precsym; + if( spy==0 || spx->prec<0 || spy->prec<0 ){ + /* Not enough precedence information. */ + apy->type = SRCONFLICT; + errcnt++; + }else if( spx->prec>spy->prec ){ /* higher precedence wins */ + apy->type = RD_RESOLVED; + }else if( spx->precprec ){ + apx->type = SH_RESOLVED; + }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ + apy->type = RD_RESOLVED; /* associativity */ + }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ + apx->type = SH_RESOLVED; + }else{ + assert( spx->prec==spy->prec && spx->assoc==NONE ); + apx->type = ERROR; + } + }else if( apx->type==REDUCE && apy->type==REDUCE ){ + spx = apx->x.rp->precsym; + spy = apy->x.rp->precsym; + if( spx==0 || spy==0 || spx->prec<0 || + spy->prec<0 || spx->prec==spy->prec ){ + apy->type = RRCONFLICT; + errcnt++; + }else if( spx->prec>spy->prec ){ + apy->type = RD_RESOLVED; + }else if( spx->precprec ){ + apx->type = RD_RESOLVED; + } + }else{ + assert( + apx->type==SH_RESOLVED || + apx->type==RD_RESOLVED || + apx->type==SSCONFLICT || + apx->type==SRCONFLICT || + apx->type==RRCONFLICT || + apy->type==SH_RESOLVED || + apy->type==RD_RESOLVED || + apy->type==SSCONFLICT || + apy->type==SRCONFLICT || + apy->type==RRCONFLICT + ); + /* The REDUCE/SHIFT case cannot happen because SHIFTs come before + ** REDUCEs on the list. If we reach this point it must be because + ** the parser conflict had already been resolved. */ + } + return errcnt; +} +/********************* From the file "configlist.c" *************************/ +/* +** Routines to processing a configuration list and building a state +** in the LEMON parser generator. +*/ + +static struct config *freelist = 0; /* List of free configurations */ +static struct config *current = 0; /* Top of list of configurations */ +static struct config **currentend = 0; /* Last on list of configs */ +static struct config *basis = 0; /* Top of list of basis configs */ +static struct config **basisend = 0; /* End of list of basis configs */ + +/* Return a pointer to a new configuration */ +PRIVATE struct config *newconfig(){ + struct config *newcfg; + if( freelist==0 ){ + int i; + int amt = 3; + freelist = (struct config *)calloc( amt, sizeof(struct config) ); + if( freelist==0 ){ + fprintf(stderr,"Unable to allocate memory for a new configuration."); + exit(1); + } + for(i=0; inext; + return newcfg; +} + +/* The configuration "old" is no longer used */ +PRIVATE void deleteconfig(struct config *old) +{ + old->next = freelist; + freelist = old; +} + +/* Initialized the configuration list builder */ +void Configlist_init(){ + current = 0; + currentend = ¤t; + basis = 0; + basisend = &basis; + Configtable_init(); + return; +} + +/* Initialized the configuration list builder */ +void Configlist_reset(){ + current = 0; + currentend = ¤t; + basis = 0; + basisend = &basis; + Configtable_clear(0); + return; +} + +/* Add another configuration to the configuration list */ +struct config *Configlist_add( + struct rule *rp, /* The rule */ + int dot /* Index into the RHS of the rule where the dot goes */ +){ + struct config *cfp, model; + + assert( currentend!=0 ); + model.rp = rp; + model.dot = dot; + cfp = Configtable_find(&model); + if( cfp==0 ){ + cfp = newconfig(); + cfp->rp = rp; + cfp->dot = dot; + cfp->fws = SetNew(); + cfp->stp = 0; + cfp->fplp = cfp->bplp = 0; + cfp->next = 0; + cfp->bp = 0; + *currentend = cfp; + currentend = &cfp->next; + Configtable_insert(cfp); + } + return cfp; +} + +/* Add a basis configuration to the configuration list */ +struct config *Configlist_addbasis(struct rule *rp, int dot) +{ + struct config *cfp, model; + + assert( basisend!=0 ); + assert( currentend!=0 ); + model.rp = rp; + model.dot = dot; + cfp = Configtable_find(&model); + if( cfp==0 ){ + cfp = newconfig(); + cfp->rp = rp; + cfp->dot = dot; + cfp->fws = SetNew(); + cfp->stp = 0; + cfp->fplp = cfp->bplp = 0; + cfp->next = 0; + cfp->bp = 0; + *currentend = cfp; + currentend = &cfp->next; + *basisend = cfp; + basisend = &cfp->bp; + Configtable_insert(cfp); + } + return cfp; +} + +/* Compute the closure of the configuration list */ +void Configlist_closure(struct lemon *lemp) +{ + struct config *cfp, *newcfp; + struct rule *rp, *newrp; + struct symbol *sp, *xsp; + int i, dot; + + assert( currentend!=0 ); + for(cfp=current; cfp; cfp=cfp->next){ + rp = cfp->rp; + dot = cfp->dot; + if( dot>=rp->nrhs ) continue; + sp = rp->rhs[dot]; + if( sp->type==NONTERMINAL ){ + if( sp->rule==0 && sp!=lemp->errsym ){ + ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", + sp->name); + lemp->errorcnt++; + } + for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ + newcfp = Configlist_add(newrp,0); + for(i=dot+1; inrhs; i++){ + xsp = rp->rhs[i]; + if( xsp->type==TERMINAL ){ + SetAdd(newcfp->fws,xsp->index); + break; + }else if( xsp->type==MULTITERMINAL ){ + int k; + for(k=0; knsubsym; k++){ + SetAdd(newcfp->fws, xsp->subsym[k]->index); + } + break; + }else{ + SetUnion(newcfp->fws,xsp->firstset); + if( xsp->lambda==LEMON_FALSE ) break; + } + } + if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); + } + } + } + return; +} + +/* Sort the configuration list */ +void Configlist_sort(){ + current = (struct config*)msort((char*)current,(char**)&(current->next), + Configcmp); + currentend = 0; + return; +} + +/* Sort the basis configuration list */ +void Configlist_sortbasis(){ + basis = (struct config*)msort((char*)current,(char**)&(current->bp), + Configcmp); + basisend = 0; + return; +} + +/* Return a pointer to the head of the configuration list and +** reset the list */ +struct config *Configlist_return(){ + struct config *old; + old = current; + current = 0; + currentend = 0; + return old; +} + +/* Return a pointer to the head of the configuration list and +** reset the list */ +struct config *Configlist_basis(){ + struct config *old; + old = basis; + basis = 0; + basisend = 0; + return old; +} + +/* Free all elements of the given configuration list */ +void Configlist_eat(struct config *cfp) +{ + struct config *nextcfp; + for(; cfp; cfp=nextcfp){ + nextcfp = cfp->next; + assert( cfp->fplp==0 ); + assert( cfp->bplp==0 ); + if( cfp->fws ) SetFree(cfp->fws); + deleteconfig(cfp); + } + return; +} +/***************** From the file "error.c" *********************************/ +/* +** Code for printing error message. +*/ + +void ErrorMsg(const char *filename, int lineno, const char *format, ...){ + va_list ap; + fprintf(stderr, "%s:%d: ", filename, lineno); + va_start(ap, format); + vfprintf(stderr,format,ap); + va_end(ap); + fprintf(stderr, "\n"); +} +/**************** From the file "main.c" ************************************/ +/* +** Main program file for the LEMON parser generator. +*/ + +/* Report an out-of-memory condition and abort. This function +** is used mostly by the "MemoryCheck" macro in struct.h +*/ +void memory_error(){ + fprintf(stderr,"Out of memory. Aborting...\n"); + exit(1); +} + +static int nDefine = 0; /* Number of -D options on the command line */ +static char **azDefine = 0; /* Name of the -D macros */ + +/* This routine is called with the argument to each -D command-line option. +** Add the macro defined to the azDefine array. +*/ +static void handle_D_option(char *z){ + char **paz; + nDefine++; + azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); + if( azDefine==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + paz = &azDefine[nDefine-1]; + *paz = (char *) malloc( lemonStrlen(z)+1 ); + if( *paz==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + lemon_strcpy(*paz, z); + for(z=*paz; *z && *z!='='; z++){} + *z = 0; +} + +static char *user_templatename = NULL; +static void handle_T_option(char *z){ + user_templatename = (char *) malloc( lemonStrlen(z)+1 ); + if( user_templatename==0 ){ + memory_error(); + } + lemon_strcpy(user_templatename, z); +} + +/* Merge together to lists of rules ordered by rule.iRule */ +static struct rule *Rule_merge(struct rule *pA, struct rule *pB){ + struct rule *pFirst = 0; + struct rule **ppPrev = &pFirst; + while( pA && pB ){ + if( pA->iRuleiRule ){ + *ppPrev = pA; + ppPrev = &pA->next; + pA = pA->next; + }else{ + *ppPrev = pB; + ppPrev = &pB->next; + pB = pB->next; + } + } + if( pA ){ + *ppPrev = pA; + }else{ + *ppPrev = pB; + } + return pFirst; +} + +/* +** Sort a list of rules in order of increasing iRule value +*/ +static struct rule *Rule_sort(struct rule *rp){ + int i; + struct rule *pNext; + struct rule *x[32]; + memset(x, 0, sizeof(x)); + while( rp ){ + pNext = rp->next; + rp->next = 0; + for(i=0; iuseCnt = 0; + + /* Parse the input file */ + Parse(&lem); + if( lem.errorcnt ) exit(lem.errorcnt); + if( lem.nrule==0 ){ + fprintf(stderr,"Empty grammar.\n"); + exit(1); + } + + /* Count and index the symbols of the grammar */ + Symbol_new("{default}"); + lem.nsymbol = Symbol_count(); + lem.symbols = Symbol_arrayof(); + for(i=0; iindex = i; + qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); + for(i=0; iindex = i; + while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } + assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); + lem.nsymbol = i - 1; + for(i=1; ISUPPER(lem.symbols[i]->name[0]); i++); + lem.nterminal = i; + + /* Assign sequential rule numbers. Start with 0. Put rules that have no + ** reduce action C-code associated with them last, so that the switch() + ** statement that selects reduction actions will have a smaller jump table. + */ + for(i=0, rp=lem.rule; rp; rp=rp->next){ + rp->iRule = rp->code ? i++ : -1; + } + for(rp=lem.rule; rp; rp=rp->next){ + if( rp->iRule<0 ) rp->iRule = i++; + } + lem.startRule = lem.rule; + lem.rule = Rule_sort(lem.rule); + + /* Generate a reprint of the grammar, if requested on the command line */ + if( rpflag ){ + Reprint(&lem); + }else{ + /* Initialize the size for all follow and first sets */ + SetSize(lem.nterminal+1); + + /* Find the precedence for every production rule (that has one) */ + FindRulePrecedences(&lem); + + /* Compute the lambda-nonterminals and the first-sets for every + ** nonterminal */ + FindFirstSets(&lem); + + /* Compute all LR(0) states. Also record follow-set propagation + ** links so that the follow-set can be computed later */ + lem.nstate = 0; + FindStates(&lem); + lem.sorted = State_arrayof(); + + /* Tie up loose ends on the propagation links */ + FindLinks(&lem); + + /* Compute the follow set of every reducible configuration */ + FindFollowSets(&lem); + + /* Compute the action tables */ + FindActions(&lem); + + /* Compress the action tables */ + if( compress==0 ) CompressTables(&lem); + + /* Reorder and renumber the states so that states with fewer choices + ** occur at the end. This is an optimization that helps make the + ** generated parser tables smaller. */ + if( noResort==0 ) ResortStates(&lem); + + /* Generate a report of the parser generated. (the "y.output" file) */ + if( !quiet ) ReportOutput(&lem); + + /* Generate the source code for the parser */ + ReportTable(&lem, mhflag); + + /* Produce a header file for use by the scanner. (This step is + ** omitted if the "-m" option is used because makeheaders will + ** generate the file for us.) */ + if( !mhflag ) ReportHeader(&lem); + } + if( statistics ){ + printf("Parser statistics:\n"); + stats_line("terminal symbols", lem.nterminal); + stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); + stats_line("total symbols", lem.nsymbol); + stats_line("rules", lem.nrule); + stats_line("states", lem.nxstate); + stats_line("conflicts", lem.nconflict); + stats_line("action table entries", lem.nactiontab); + stats_line("total table size (bytes)", lem.tablesize); + } + if( lem.nconflict > 0 ){ + fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); + } + + /* return 0 on success, 1 on failure. */ + exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; + exit(exitcode); + return (exitcode); +} +/******************** From the file "msort.c" *******************************/ +/* +** A generic merge-sort program. +** +** USAGE: +** Let "ptr" be a pointer to some structure which is at the head of +** a null-terminated list. Then to sort the list call: +** +** ptr = msort(ptr,&(ptr->next),cmpfnc); +** +** In the above, "cmpfnc" is a pointer to a function which compares +** two instances of the structure and returns an integer, as in +** strcmp. The second argument is a pointer to the pointer to the +** second element of the linked list. This address is used to compute +** the offset to the "next" field within the structure. The offset to +** the "next" field must be constant for all structures in the list. +** +** The function returns a new pointer which is the head of the list +** after sorting. +** +** ALGORITHM: +** Merge-sort. +*/ + +/* +** Return a pointer to the next structure in the linked list. +*/ +#define NEXT(A) (*(char**)(((char*)A)+offset)) + +/* +** Inputs: +** a: A sorted, null-terminated linked list. (May be null). +** b: A sorted, null-terminated linked list. (May be null). +** cmp: A pointer to the comparison function. +** offset: Offset in the structure to the "next" field. +** +** Return Value: +** A pointer to the head of a sorted list containing the elements +** of both a and b. +** +** Side effects: +** The "next" pointers for elements in the lists a and b are +** changed. +*/ +static char *merge( + char *a, + char *b, + int (*cmp)(const char*,const char*), + int offset +){ + char *ptr, *head; + + if( a==0 ){ + head = b; + }else if( b==0 ){ + head = a; + }else{ + if( (*cmp)(a,b)<=0 ){ + ptr = a; + a = NEXT(a); + }else{ + ptr = b; + b = NEXT(b); + } + head = ptr; + while( a && b ){ + if( (*cmp)(a,b)<=0 ){ + NEXT(ptr) = a; + ptr = a; + a = NEXT(a); + }else{ + NEXT(ptr) = b; + ptr = b; + b = NEXT(b); + } + } + if( a ) NEXT(ptr) = a; + else NEXT(ptr) = b; + } + return head; +} + +/* +** Inputs: +** list: Pointer to a singly-linked list of structures. +** next: Pointer to pointer to the second element of the list. +** cmp: A comparison function. +** +** Return Value: +** A pointer to the head of a sorted list containing the elements +** orginally in list. +** +** Side effects: +** The "next" pointers for elements in list are changed. +*/ +#define LISTSIZE 30 +static char *msort( + char *list, + char **next, + int (*cmp)(const char*,const char*) +){ + unsigned long offset; + char *ep; + char *set[LISTSIZE]; + int i; + offset = (unsigned long)((char*)next - (char*)list); + for(i=0; istate = WAITING_FOR_DECL_KEYWORD; + }else if( ISLOWER(x[0]) ){ + psp->lhs = Symbol_new(x); + psp->nrhs = 0; + psp->lhsalias = 0; + psp->state = WAITING_FOR_ARROW; + }else if( x[0]=='{' ){ + if( psp->prevrule==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"There is no prior rule upon which to attach the code \ +fragment which begins on this line."); + psp->errorcnt++; + }else if( psp->prevrule->code!=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"Code fragment beginning on this line is not the first \ +to follow the previous rule."); + psp->errorcnt++; + }else{ + psp->prevrule->line = psp->tokenlineno; + psp->prevrule->code = &x[1]; + psp->prevrule->noCode = 0; + } + }else if( x[0]=='[' ){ + psp->state = PRECEDENCE_MARK_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Token \"%s\" should be either \"%%\" or a nonterminal name.", + x); + psp->errorcnt++; + } + break; + case PRECEDENCE_MARK_1: + if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "The precedence symbol must be a terminal."); + psp->errorcnt++; + }else if( psp->prevrule==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "There is no prior rule to assign precedence \"[%s]\".",x); + psp->errorcnt++; + }else if( psp->prevrule->precsym!=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"Precedence mark on this line is not the first \ +to follow the previous rule."); + psp->errorcnt++; + }else{ + psp->prevrule->precsym = Symbol_new(x); + } + psp->state = PRECEDENCE_MARK_2; + break; + case PRECEDENCE_MARK_2: + if( x[0]!=']' ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \"]\" on precedence mark."); + psp->errorcnt++; + } + psp->state = WAITING_FOR_DECL_OR_RULE; + break; + case WAITING_FOR_ARROW: + if( x[0]==':' && x[1]==':' && x[2]=='=' ){ + psp->state = IN_RHS; + }else if( x[0]=='(' ){ + psp->state = LHS_ALIAS_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Expected to see a \":\" following the LHS symbol \"%s\".", + psp->lhs->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_1: + if( ISALPHA(x[0]) ){ + psp->lhsalias = x; + psp->state = LHS_ALIAS_2; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "\"%s\" is not a valid alias for the LHS \"%s\"\n", + x,psp->lhs->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_2: + if( x[0]==')' ){ + psp->state = LHS_ALIAS_3; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_3: + if( x[0]==':' && x[1]==':' && x[2]=='=' ){ + psp->state = IN_RHS; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \"->\" following: \"%s(%s)\".", + psp->lhs->name,psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case IN_RHS: + if( x[0]=='.' ){ + struct rule *rp; + rp = (struct rule *)calloc( sizeof(struct rule) + + sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); + if( rp==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Can't allocate enough memory for this rule."); + psp->errorcnt++; + psp->prevrule = 0; + }else{ + int i; + rp->ruleline = psp->tokenlineno; + rp->rhs = (struct symbol**)&rp[1]; + rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); + for(i=0; inrhs; i++){ + rp->rhs[i] = psp->rhs[i]; + rp->rhsalias[i] = psp->alias[i]; + } + rp->lhs = psp->lhs; + rp->lhsalias = psp->lhsalias; + rp->nrhs = psp->nrhs; + rp->code = 0; + rp->noCode = 1; + rp->precsym = 0; + rp->index = psp->gp->nrule++; + rp->nextlhs = rp->lhs->rule; + rp->lhs->rule = rp; + rp->next = 0; + if( psp->firstrule==0 ){ + psp->firstrule = psp->lastrule = rp; + }else{ + psp->lastrule->next = rp; + psp->lastrule = rp; + } + psp->prevrule = rp; + } + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISALPHA(x[0]) ){ + if( psp->nrhs>=MAXRHS ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Too many symbols on RHS of rule beginning at \"%s\".", + x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + }else{ + psp->rhs[psp->nrhs] = Symbol_new(x); + psp->alias[psp->nrhs] = 0; + psp->nrhs++; + } + }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){ + struct symbol *msp = psp->rhs[psp->nrhs-1]; + if( msp->type!=MULTITERMINAL ){ + struct symbol *origsp = msp; + msp = (struct symbol *) calloc(1,sizeof(*msp)); + memset(msp, 0, sizeof(*msp)); + msp->type = MULTITERMINAL; + msp->nsubsym = 1; + msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); + msp->subsym[0] = origsp; + msp->name = origsp->name; + psp->rhs[psp->nrhs-1] = msp; + } + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); + if( ISLOWER(x[1]) || ISLOWER(msp->subsym[0]->name[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Cannot form a compound containing a non-terminal"); + psp->errorcnt++; + } + }else if( x[0]=='(' && psp->nrhs>0 ){ + psp->state = RHS_ALIAS_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal character on RHS of rule: \"%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case RHS_ALIAS_1: + if( ISALPHA(x[0]) ){ + psp->alias[psp->nrhs-1] = x; + psp->state = RHS_ALIAS_2; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", + x,psp->rhs[psp->nrhs-1]->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case RHS_ALIAS_2: + if( x[0]==')' ){ + psp->state = IN_RHS; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case WAITING_FOR_DECL_KEYWORD: + if( ISALPHA(x[0]) ){ + psp->declkeyword = x; + psp->declargslot = 0; + psp->decllinenoslot = 0; + psp->insertLineMacro = 1; + psp->state = WAITING_FOR_DECL_ARG; + if( strcmp(x,"name")==0 ){ + psp->declargslot = &(psp->gp->name); + psp->insertLineMacro = 0; + }else if( strcmp(x,"include")==0 ){ + psp->declargslot = &(psp->gp->include); + }else if( strcmp(x,"code")==0 ){ + psp->declargslot = &(psp->gp->extracode); + }else if( strcmp(x,"token_destructor")==0 ){ + psp->declargslot = &psp->gp->tokendest; + }else if( strcmp(x,"default_destructor")==0 ){ + psp->declargslot = &psp->gp->vardest; + }else if( strcmp(x,"token_prefix")==0 ){ + psp->declargslot = &psp->gp->tokenprefix; + psp->insertLineMacro = 0; + }else if( strcmp(x,"syntax_error")==0 ){ + psp->declargslot = &(psp->gp->error); + }else if( strcmp(x,"parse_accept")==0 ){ + psp->declargslot = &(psp->gp->accept); + }else if( strcmp(x,"parse_failure")==0 ){ + psp->declargslot = &(psp->gp->failure); + }else if( strcmp(x,"stack_overflow")==0 ){ + psp->declargslot = &(psp->gp->overflow); + }else if( strcmp(x,"extra_argument")==0 ){ + psp->declargslot = &(psp->gp->arg); + psp->insertLineMacro = 0; + }else if( strcmp(x,"token_type")==0 ){ + psp->declargslot = &(psp->gp->tokentype); + psp->insertLineMacro = 0; + }else if( strcmp(x,"default_type")==0 ){ + psp->declargslot = &(psp->gp->vartype); + psp->insertLineMacro = 0; + }else if( strcmp(x,"stack_size")==0 ){ + psp->declargslot = &(psp->gp->stacksize); + psp->insertLineMacro = 0; + }else if( strcmp(x,"start_symbol")==0 ){ + psp->declargslot = &(psp->gp->start); + psp->insertLineMacro = 0; + }else if( strcmp(x,"left")==0 ){ + psp->preccounter++; + psp->declassoc = LEFT; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"right")==0 ){ + psp->preccounter++; + psp->declassoc = RIGHT; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"nonassoc")==0 ){ + psp->preccounter++; + psp->declassoc = NONE; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"destructor")==0 ){ + psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; + }else if( strcmp(x,"type")==0 ){ + psp->state = WAITING_FOR_DATATYPE_SYMBOL; + }else if( strcmp(x,"fallback")==0 ){ + psp->fallback = 0; + psp->state = WAITING_FOR_FALLBACK_ID; + }else if( strcmp(x,"wildcard")==0 ){ + psp->state = WAITING_FOR_WILDCARD_ID; + }else if( strcmp(x,"token_class")==0 ){ + psp->state = WAITING_FOR_CLASS_ID; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Unknown declaration keyword: \"%%%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal declaration keyword: \"%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case WAITING_FOR_DESTRUCTOR_SYMBOL: + if( !ISALPHA(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol name missing after %%destructor keyword"); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + struct symbol *sp = Symbol_new(x); + psp->declargslot = &sp->destructor; + psp->decllinenoslot = &sp->destLineno; + psp->insertLineMacro = 1; + psp->state = WAITING_FOR_DECL_ARG; + } + break; + case WAITING_FOR_DATATYPE_SYMBOL: + if( !ISALPHA(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol name missing after %%type keyword"); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + struct symbol *sp = Symbol_find(x); + if((sp) && (sp->datatype)){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol %%type \"%s\" already defined", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + if (!sp){ + sp = Symbol_new(x); + } + psp->declargslot = &sp->datatype; + psp->insertLineMacro = 0; + psp->state = WAITING_FOR_DECL_ARG; + } + } + break; + case WAITING_FOR_PRECEDENCE_SYMBOL: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISUPPER(x[0]) ){ + struct symbol *sp; + sp = Symbol_new(x); + if( sp->prec>=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol \"%s\" has already be given a precedence.",x); + psp->errorcnt++; + }else{ + sp->prec = psp->preccounter; + sp->assoc = psp->declassoc; + } + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Can't assign a precedence to \"%s\".",x); + psp->errorcnt++; + } + break; + case WAITING_FOR_DECL_ARG: + if( x[0]=='{' || x[0]=='\"' || ISALNUM(x[0]) ){ + const char *zOld, *zNew; + char *zBuf, *z; + int nOld, n, nLine = 0, nNew, nBack; + int addLineMacro; + char zLine[50]; + zNew = x; + if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; + nNew = lemonStrlen(zNew); + if( *psp->declargslot ){ + zOld = *psp->declargslot; + }else{ + zOld = ""; + } + nOld = lemonStrlen(zOld); + n = nOld + nNew + 20; + addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro && + (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); + if( addLineMacro ){ + for(z=psp->filename, nBack=0; *z; z++){ + if( *z=='\\' ) nBack++; + } + lemon_sprintf(zLine, "#line %d ", psp->tokenlineno); + nLine = lemonStrlen(zLine); + n += nLine + lemonStrlen(psp->filename) + nBack; + } + *psp->declargslot = (char *) realloc(*psp->declargslot, n); + zBuf = *psp->declargslot + nOld; + if( addLineMacro ){ + if( nOld && zBuf[-1]!='\n' ){ + *(zBuf++) = '\n'; + } + memcpy(zBuf, zLine, nLine); + zBuf += nLine; + *(zBuf++) = '"'; + for(z=psp->filename; *z; z++){ + if( *z=='\\' ){ + *(zBuf++) = '\\'; + } + *(zBuf++) = *z; + } + *(zBuf++) = '"'; + *(zBuf++) = '\n'; + } + if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ + psp->decllinenoslot[0] = psp->tokenlineno; + } + memcpy(zBuf, zNew, nNew); + zBuf += nNew; + *zBuf = 0; + psp->state = WAITING_FOR_DECL_OR_RULE; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal argument to %%%s: %s",psp->declkeyword,x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case WAITING_FOR_FALLBACK_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%fallback argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->fallback==0 ){ + psp->fallback = sp; + }else if( sp->fallback ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "More than one fallback assigned to token %s", x); + psp->errorcnt++; + }else{ + sp->fallback = psp->fallback; + psp->gp->has_fallback = 1; + } + } + break; + case WAITING_FOR_WILDCARD_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%wildcard argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->gp->wildcard==0 ){ + psp->gp->wildcard = sp; + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "Extra wildcard to token: %s", x); + psp->errorcnt++; + } + } + break; + case WAITING_FOR_CLASS_ID: + if( !ISLOWER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class must be followed by an identifier: ", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else if( Symbol_find(x) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "Symbol \"%s\" already used", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + psp->tkclass = Symbol_new(x); + psp->tkclass->type = MULTITERMINAL; + psp->state = WAITING_FOR_CLASS_TOKEN; + } + break; + case WAITING_FOR_CLASS_TOKEN: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISUPPER(x[0]) || ((x[0]=='|' || x[0]=='/') && ISUPPER(x[1])) ){ + struct symbol *msp = psp->tkclass; + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + if( !ISUPPER(x[0]) ) x++; + msp->subsym[msp->nsubsym-1] = Symbol_new(x); + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class argument \"%s\" should be a token", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case RESYNC_AFTER_RULE_ERROR: +/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; +** break; */ + case RESYNC_AFTER_DECL_ERROR: + if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; + if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; + break; + } +} + +/* Run the preprocessor over the input file text. The global variables +** azDefine[0] through azDefine[nDefine-1] contains the names of all defined +** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and +** comments them out. Text in between is also commented out as appropriate. +*/ +static void preprocess_input(char *z){ + int i, j, k, n; + int exclude = 0; + int start = 0; + int lineno = 1; + int start_lineno = 1; + for(i=0; z[i]; i++){ + if( z[i]=='\n' ) lineno++; + if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; + if( strncmp(&z[i],"%endif",6)==0 && ISSPACE(z[i+6]) ){ + if( exclude ){ + exclude--; + if( exclude==0 ){ + for(j=start; jfilename; + ps.errorcnt = 0; + ps.state = INITIALIZE; + + /* Begin by reading the input file */ + fp = fopen(ps.filename,"rb"); + if( fp==0 ){ + ErrorMsg(ps.filename,0,"Can't open this file for reading."); + gp->errorcnt++; + return; + } + fseek(fp,0,2); + filesize = ftell(fp); + rewind(fp); + filebuf = (char *)malloc( filesize+1 ); + if( filesize>100000000 || filebuf==0 ){ + ErrorMsg(ps.filename,0,"Input file too large."); + gp->errorcnt++; + fclose(fp); + return; + } + if( fread(filebuf,1,filesize,fp)!=filesize ){ + ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", + filesize); + free(filebuf); + gp->errorcnt++; + fclose(fp); + return; + } + fclose(fp); + filebuf[filesize] = 0; + + /* Make an initial pass through the file to handle %ifdef and %ifndef */ + preprocess_input(filebuf); + + /* Now scan the text of the input file */ + lineno = 1; + for(cp=filebuf; (c= *cp)!=0; ){ + if( c=='\n' ) lineno++; /* Keep track of the line number */ + if( ISSPACE(c) ){ cp++; continue; } /* Skip all white space */ + if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ + cp+=2; + while( (c= *cp)!=0 && c!='\n' ) cp++; + continue; + } + if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ + cp+=2; + while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ + if( c=='\n' ) lineno++; + cp++; + } + if( c ) cp++; + continue; + } + ps.tokenstart = cp; /* Mark the beginning of the token */ + ps.tokenlineno = lineno; /* Linenumber on which token begins */ + if( c=='\"' ){ /* String literals */ + cp++; + while( (c= *cp)!=0 && c!='\"' ){ + if( c=='\n' ) lineno++; + cp++; + } + if( c==0 ){ + ErrorMsg(ps.filename,startline, +"String starting on this line is not terminated before the end of the file."); + ps.errorcnt++; + nextcp = cp; + }else{ + nextcp = cp+1; + } + }else if( c=='{' ){ /* A block of C code */ + int level; + cp++; + for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ + if( c=='\n' ) lineno++; + else if( c=='{' ) level++; + else if( c=='}' ) level--; + else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ + int prevc; + cp = &cp[2]; + prevc = 0; + while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ + if( c=='\n' ) lineno++; + prevc = c; + cp++; + } + }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ + cp = &cp[2]; + while( (c= *cp)!=0 && c!='\n' ) cp++; + if( c ) lineno++; + }else if( c=='\'' || c=='\"' ){ /* String a character literals */ + int startchar, prevc; + startchar = c; + prevc = 0; + for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ + if( c=='\n' ) lineno++; + if( prevc=='\\' ) prevc = 0; + else prevc = c; + } + } + } + if( c==0 ){ + ErrorMsg(ps.filename,ps.tokenlineno, +"C code starting on this line is not terminated before the end of the file."); + ps.errorcnt++; + nextcp = cp; + }else{ + nextcp = cp+1; + } + }else if( ISALNUM(c) ){ /* Identifiers */ + while( (c= *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; + nextcp = cp; + }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ + cp += 3; + nextcp = cp; + }else if( (c=='/' || c=='|') && ISALPHA(cp[1]) ){ + cp += 2; + while( (c = *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; + nextcp = cp; + }else{ /* All other (one character) operators */ + cp++; + nextcp = cp; + } + c = *cp; + *cp = 0; /* Null terminate the token */ + parseonetoken(&ps); /* Parse the token */ + *cp = (char)c; /* Restore the buffer */ + cp = nextcp; + } + free(filebuf); /* Release the buffer after parsing */ + gp->rule = ps.firstrule; + gp->errorcnt = ps.errorcnt; +} +/*************************** From the file "plink.c" *********************/ +/* +** Routines processing configuration follow-set propagation links +** in the LEMON parser generator. +*/ +static struct plink *plink_freelist = 0; + +/* Allocate a new plink */ +struct plink *Plink_new(){ + struct plink *newlink; + + if( plink_freelist==0 ){ + int i; + int amt = 100; + plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); + if( plink_freelist==0 ){ + fprintf(stderr, + "Unable to allocate memory for a new follow-set propagation link.\n"); + exit(1); + } + for(i=0; inext; + return newlink; +} + +/* Add a plink to a plink list */ +void Plink_add(struct plink **plpp, struct config *cfp) +{ + struct plink *newlink; + newlink = Plink_new(); + newlink->next = *plpp; + *plpp = newlink; + newlink->cfp = cfp; +} + +/* Transfer every plink on the list "from" to the list "to" */ +void Plink_copy(struct plink **to, struct plink *from) +{ + struct plink *nextpl; + while( from ){ + nextpl = from->next; + from->next = *to; + *to = from; + from = nextpl; + } +} + +/* Delete every plink on the list */ +void Plink_delete(struct plink *plp) +{ + struct plink *nextpl; + + while( plp ){ + nextpl = plp->next; + plp->next = plink_freelist; + plink_freelist = plp; + plp = nextpl; + } +} +/*********************** From the file "report.c" **************************/ +/* +** Procedures for generating reports and tables in the LEMON parser generator. +*/ + +/* Generate a filename with the given suffix. Space to hold the +** name comes from malloc() and must be freed by the calling +** function. +*/ +PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) +{ + char *name; + char *cp; + + name = (char*)malloc( lemonStrlen(lemp->filename) + lemonStrlen(suffix) + 5 ); + if( name==0 ){ + fprintf(stderr,"Can't allocate space for a filename.\n"); + exit(1); + } + lemon_strcpy(name,lemp->filename); + cp = strrchr(name,'.'); + if( cp ) *cp = 0; + lemon_strcat(name,suffix); + return name; +} + +/* Open a file with a name based on the name of the input file, +** but with a different (specified) suffix, and return a pointer +** to the stream */ +PRIVATE FILE *file_open( + struct lemon *lemp, + const char *suffix, + const char *mode +){ + FILE *fp; + + if( lemp->outname ) free(lemp->outname); + lemp->outname = file_makename(lemp, suffix); + fp = fopen(lemp->outname,mode); + if( fp==0 && *mode=='w' ){ + fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); + lemp->errorcnt++; + return 0; + } + return fp; +} + +/* Duplicate the input file without comments and without actions +** on rules */ +void Reprint(struct lemon *lemp) +{ + struct rule *rp; + struct symbol *sp; + int i, j, maxlen, len, ncolumns, skip; + printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); + maxlen = 10; + for(i=0; insymbol; i++){ + sp = lemp->symbols[i]; + len = lemonStrlen(sp->name); + if( len>maxlen ) maxlen = len; + } + ncolumns = 76/(maxlen+5); + if( ncolumns<1 ) ncolumns = 1; + skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; + for(i=0; insymbol; j+=skip){ + sp = lemp->symbols[j]; + assert( sp->index==j ); + printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); + } + printf("\n"); + } + for(rp=lemp->rule; rp; rp=rp->next){ + printf("%s",rp->lhs->name); + /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ + printf(" ::="); + for(i=0; inrhs; i++){ + sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + printf(" %s", sp->subsym[0]->name); + for(j=1; jnsubsym; j++){ + printf("|%s", sp->subsym[j]->name); + } + }else{ + printf(" %s", sp->name); + } + /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ + } + printf("."); + if( rp->precsym ) printf(" [%s]",rp->precsym->name); + /* if( rp->code ) printf("\n %s",rp->code); */ + printf("\n"); + } +} + +/* Print a single rule. +*/ +void RulePrint(FILE *fp, struct rule *rp, int iCursor){ + struct symbol *sp; + int i, j; + fprintf(fp,"%s ::=",rp->lhs->name); + for(i=0; i<=rp->nrhs; i++){ + if( i==iCursor ) fprintf(fp," *"); + if( i==rp->nrhs ) break; + sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + fprintf(fp," %s", sp->subsym[0]->name); + for(j=1; jnsubsym; j++){ + fprintf(fp,"|%s",sp->subsym[j]->name); + } + }else{ + fprintf(fp," %s", sp->name); + } + } +} + +/* Print the rule for a configuration. +*/ +void ConfigPrint(FILE *fp, struct config *cfp){ + RulePrint(fp, cfp->rp, cfp->dot); +} + +/* #define TEST */ +#if 0 +/* Print a set */ +PRIVATE void SetPrint(out,set,lemp) +FILE *out; +char *set; +struct lemon *lemp; +{ + int i; + char *spacer; + spacer = ""; + fprintf(out,"%12s[",""); + for(i=0; interminal; i++){ + if( SetFind(set,i) ){ + fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); + spacer = " "; + } + } + fprintf(out,"]\n"); +} + +/* Print a plink chain */ +PRIVATE void PlinkPrint(out,plp,tag) +FILE *out; +struct plink *plp; +char *tag; +{ + while( plp ){ + fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); + ConfigPrint(out,plp->cfp); + fprintf(out,"\n"); + plp = plp->next; + } +} +#endif + +/* Print an action to the given file descriptor. Return FALSE if +** nothing was actually printed. +*/ +int PrintAction( + struct action *ap, /* The action to print */ + FILE *fp, /* Print the action here */ + int indent /* Indent by this amount */ +){ + int result = 1; + switch( ap->type ){ + case SHIFT: { + struct state *stp = ap->x.stp; + fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum); + break; + } + case REDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s reduce %-7d",indent,ap->sp->name,rp->iRule); + RulePrint(fp, rp, -1); + break; + } + case SHIFTREDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s shift-reduce %-7d",indent,ap->sp->name,rp->iRule); + RulePrint(fp, rp, -1); + break; + } + case ACCEPT: + fprintf(fp,"%*s accept",indent,ap->sp->name); + break; + case ERROR: + fprintf(fp,"%*s error",indent,ap->sp->name); + break; + case SRCONFLICT: + case RRCONFLICT: + fprintf(fp,"%*s reduce %-7d ** Parsing conflict **", + indent,ap->sp->name,ap->x.rp->iRule); + break; + case SSCONFLICT: + fprintf(fp,"%*s shift %-7d ** Parsing conflict **", + indent,ap->sp->name,ap->x.stp->statenum); + break; + case SH_RESOLVED: + if( showPrecedenceConflict ){ + fprintf(fp,"%*s shift %-7d -- dropped by precedence", + indent,ap->sp->name,ap->x.stp->statenum); + }else{ + result = 0; + } + break; + case RD_RESOLVED: + if( showPrecedenceConflict ){ + fprintf(fp,"%*s reduce %-7d -- dropped by precedence", + indent,ap->sp->name,ap->x.rp->iRule); + }else{ + result = 0; + } + break; + case NOT_USED: + result = 0; + break; + } + if( result && ap->spOpt ){ + fprintf(fp," /* because %s==%s */", ap->sp->name, ap->spOpt->name); + } + return result; +} + +/* Generate the "*.out" log file */ +void ReportOutput(struct lemon *lemp) +{ + int i; + struct state *stp; + struct config *cfp; + struct action *ap; + FILE *fp; + + fp = file_open(lemp,".out","wb"); + if( fp==0 ) return; + for(i=0; inxstate; i++){ + stp = lemp->sorted[i]; + fprintf(fp,"State %d:\n",stp->statenum); + if( lemp->basisflag ) cfp=stp->bp; + else cfp=stp->cfp; + while( cfp ){ + char buf[20]; + if( cfp->dot==cfp->rp->nrhs ){ + lemon_sprintf(buf,"(%d)",cfp->rp->iRule); + fprintf(fp," %5s ",buf); + }else{ + fprintf(fp," "); + } + ConfigPrint(fp,cfp); + fprintf(fp,"\n"); +#if 0 + SetPrint(fp,cfp->fws,lemp); + PlinkPrint(fp,cfp->fplp,"To "); + PlinkPrint(fp,cfp->bplp,"From"); +#endif + if( lemp->basisflag ) cfp=cfp->bp; + else cfp=cfp->next; + } + fprintf(fp,"\n"); + for(ap=stp->ap; ap; ap=ap->next){ + if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); + } + fprintf(fp,"\n"); + } + fprintf(fp, "----------------------------------------------------\n"); + fprintf(fp, "Symbols:\n"); + for(i=0; insymbol; i++){ + int j; + struct symbol *sp; + + sp = lemp->symbols[i]; + fprintf(fp, " %3d: %s", i, sp->name); + if( sp->type==NONTERMINAL ){ + fprintf(fp, ":"); + if( sp->lambda ){ + fprintf(fp, " "); + } + for(j=0; jnterminal; j++){ + if( sp->firstset && SetFind(sp->firstset, j) ){ + fprintf(fp, " %s", lemp->symbols[j]->name); + } + } + } + fprintf(fp, "\n"); + } + fclose(fp); + return; +} + +/* Search for the file "name" which is in the same directory as +** the exacutable */ +PRIVATE char *pathsearch(char *argv0, char *name, int modemask) +{ + const char *pathlist; + char *pathbufptr; + char *pathbuf; + char *path,*cp; + char c; + +#ifdef __WIN32__ + cp = strrchr(argv0,'\\'); +#else + cp = strrchr(argv0,'/'); +#endif + if( cp ){ + c = *cp; + *cp = 0; + path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); + if( path ) lemon_sprintf(path,"%s/%s",argv0,name); + *cp = c; + }else{ + pathlist = getenv("PATH"); + if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; + pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 ); + path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); + if( (pathbuf != 0) && (path!=0) ){ + pathbufptr = pathbuf; + lemon_strcpy(pathbuf, pathlist); + while( *pathbuf ){ + cp = strchr(pathbuf,':'); + if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; + c = *cp; + *cp = 0; + lemon_sprintf(path,"%s/%s",pathbuf,name); + *cp = c; + if( c==0 ) pathbuf[0] = 0; + else pathbuf = &cp[1]; + if( access(path,modemask)==0 ) break; + } + free(pathbufptr); + } + } + return path; +} + +/* Given an action, compute the integer value for that action +** which is to be put in the action table of the generated machine. +** Return negative if no action should be generated. +*/ +PRIVATE int compute_action(struct lemon *lemp, struct action *ap) +{ + int act; + switch( ap->type ){ + case SHIFT: act = ap->x.stp->statenum; break; + case SHIFTREDUCE: act = ap->x.rp->iRule + lemp->nstate; break; + case REDUCE: act = ap->x.rp->iRule + lemp->nstate+lemp->nrule; break; + case ERROR: act = lemp->nstate + lemp->nrule*2; break; + case ACCEPT: act = lemp->nstate + lemp->nrule*2 + 1; break; + default: act = -1; break; + } + return act; +} + +#define LINESIZE 1000 +/* The next cluster of routines are for reading the template file +** and writing the results to the generated parser */ +/* The first function transfers data from "in" to "out" until +** a line is seen which begins with "%%". The line number is +** tracked. +** +** if name!=0, then any word that begin with "Parse" is changed to +** begin with *name instead. +*/ +PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) +{ + int i, iStart; + char line[LINESIZE]; + while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ + (*lineno)++; + iStart = 0; + if( name ){ + for(i=0; line[i]; i++){ + if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 + && (i==0 || !ISALPHA(line[i-1])) + ){ + if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); + fprintf(out,"%s",name); + i += 4; + iStart = i+1; + } + } + } + fprintf(out,"%s",&line[iStart]); + } +} + +/* The next function finds the template file and opens it, returning +** a pointer to the opened file. */ +PRIVATE FILE *tplt_open(struct lemon *lemp) +{ + static char templatename[] = "lempar.c"; + char buf[1000]; + FILE *in; + char *tpltname; + char *cp; + + /* first, see if user specified a template filename on the command line. */ + if (user_templatename != 0) { + if( access(user_templatename,004)==-1 ){ + fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", + user_templatename); + lemp->errorcnt++; + return 0; + } + in = fopen(user_templatename,"rb"); + if( in==0 ){ + fprintf(stderr,"Can't open the template file \"%s\".\n", + user_templatename); + lemp->errorcnt++; + return 0; + } + return in; + } + + cp = strrchr(lemp->filename,'.'); + if( cp ){ + lemon_sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); + }else{ + lemon_sprintf(buf,"%s.lt",lemp->filename); + } + if( access(buf,004)==0 ){ + tpltname = buf; + }else if( access(templatename,004)==0 ){ + tpltname = templatename; + }else{ + tpltname = pathsearch(lemp->argv0,templatename,0); + } + if( tpltname==0 ){ + fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", + templatename); + lemp->errorcnt++; + return 0; + } + in = fopen(tpltname,"rb"); + if( in==0 ){ + fprintf(stderr,"Can't open the template file \"%s\".\n",templatename); + lemp->errorcnt++; + return 0; + } + return in; +} + +/* Print a #line directive line to the output file. */ +PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) +{ + fprintf(out,"#line %d \"",lineno); + while( *filename ){ + if( *filename == '\\' ) putc('\\',out); + putc(*filename,out); + filename++; + } + fprintf(out,"\"\n"); +} + +/* Print a string to the file and keep the linenumber up to date */ +PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) +{ + if( str==0 ) return; + while( *str ){ + putc(*str,out); + if( *str=='\n' ) (*lineno)++; + str++; + } + if( str[-1]!='\n' ){ + putc('\n',out); + (*lineno)++; + } + if (!lemp->nolinenosflag) { + (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); + } + return; +} + +/* +** The following routine emits code for the destructor for the +** symbol sp +*/ +void emit_destructor_code( + FILE *out, + struct symbol *sp, + struct lemon *lemp, + int *lineno +){ + char *cp = 0; + + if( sp->type==TERMINAL ){ + cp = lemp->tokendest; + if( cp==0 ) return; + fprintf(out,"{\n"); (*lineno)++; + }else if( sp->destructor ){ + cp = sp->destructor; + fprintf(out,"{\n"); (*lineno)++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,sp->destLineno,lemp->filename); + } + }else if( lemp->vardest ){ + cp = lemp->vardest; + if( cp==0 ) return; + fprintf(out,"{\n"); (*lineno)++; + }else{ + assert( 0 ); /* Cannot happen */ + } + for(; *cp; cp++){ + if( *cp=='$' && cp[1]=='$' ){ + fprintf(out,"(yypminor->yy%d)",sp->dtnum); + cp++; + continue; + } + if( *cp=='\n' ) (*lineno)++; + fputc(*cp,out); + } + fprintf(out,"\n"); (*lineno)++; + if (!lemp->nolinenosflag) { + (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); + } + fprintf(out,"}\n"); (*lineno)++; + return; +} + +/* +** Return TRUE (non-zero) if the given symbol has a destructor. +*/ +int has_destructor(struct symbol *sp, struct lemon *lemp) +{ + int ret; + if( sp->type==TERMINAL ){ + ret = lemp->tokendest!=0; + }else{ + ret = lemp->vardest!=0 || sp->destructor!=0; + } + return ret; +} + +/* +** Append text to a dynamically allocated string. If zText is 0 then +** reset the string to be empty again. Always return the complete text +** of the string (which is overwritten with each call). +** +** n bytes of zText are stored. If n==0 then all of zText up to the first +** \000 terminator is stored. zText can contain up to two instances of +** %d. The values of p1 and p2 are written into the first and second +** %d. +** +** If n==-1, then the previous character is overwritten. +*/ +PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ + static char empty[1] = { 0 }; + static char *z = 0; + static int alloced = 0; + static int used = 0; + int c; + char zInt[40]; + if( zText==0 ){ + if( used==0 && z!=0 ) z[0] = 0; + used = 0; + return z; + } + if( n<=0 ){ + if( n<0 ){ + used += n; + assert( used>=0 ); + } + n = lemonStrlen(zText); + } + if( (int) (n+sizeof(zInt)*2+used) >= alloced ){ + alloced = n + sizeof(zInt)*2 + used + 200; + z = (char *) realloc(z, alloced); + } + if( z==0 ) return empty; + while( n-- > 0 ){ + c = *(zText++); + if( c=='%' && n>0 && zText[0]=='d' ){ + lemon_sprintf(zInt, "%d", p1); + p1 = p2; + lemon_strcpy(&z[used], zInt); + used += lemonStrlen(&z[used]); + zText++; + n--; + }else{ + z[used++] = (char)c; + } + } + z[used] = 0; + return z; +} + +/* +** Write and transform the rp->code string so that symbols are expanded. +** Populate the rp->codePrefix and rp->codeSuffix strings, as appropriate. +** +** Return 1 if the expanded code requires that "yylhsminor" local variable +** to be defined. +*/ +PRIVATE int translate_code(struct lemon *lemp, struct rule *rp){ + char *cp, *xp; + int i; + int rc = 0; /* True if yylhsminor is used */ + int dontUseRhs0 = 0; /* If true, use of left-most RHS label is illegal */ + const char *zSkip = 0; /* The zOvwrt comment within rp->code, or NULL */ + char lhsused = 0; /* True if the LHS element has been used */ + char lhsdirect; /* True if LHS writes directly into stack */ + char used[MAXRHS]; /* True for each RHS element which is used */ + char zLhs[50]; /* Convert the LHS symbol into this string */ + char zOvwrt[900]; /* Comment that to allow LHS to overwrite RHS */ + + for(i=0; inrhs; i++) used[i] = 0; + lhsused = 0; + + if( rp->code==0 ){ + static char newlinestr[2] = { '\n', '\0' }; + rp->code = newlinestr; + rp->line = rp->ruleline; + rp->noCode = 1; + }else{ + rp->noCode = 0; + } + + + if( rp->nrhs==0 ){ + /* If there are no RHS symbols, then writing directly to the LHS is ok */ + lhsdirect = 1; + }else if( rp->rhsalias[0]==0 ){ + /* The left-most RHS symbol has no value. LHS direct is ok. But + ** we have to call the distructor on the RHS symbol first. */ + lhsdirect = 1; + if( has_destructor(rp->rhs[0],lemp) ){ + append_str(0,0,0,0); + append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, + rp->rhs[0]->index,1-rp->nrhs); + rp->codePrefix = Strsafe(append_str(0,0,0,0)); + rp->noCode = 0; + } + }else if( rp->lhsalias==0 ){ + /* There is no LHS value symbol. */ + lhsdirect = 1; + }else if( strcmp(rp->lhsalias,rp->rhsalias[0])==0 ){ + /* The LHS symbol and the left-most RHS symbol are the same, so + ** direct writing is allowed */ + lhsdirect = 1; + lhsused = 1; + used[0] = 1; + if( rp->lhs->dtnum!=rp->rhs[0]->dtnum ){ + ErrorMsg(lemp->filename,rp->ruleline, + "%s(%s) and %s(%s) share the same label but have " + "different datatypes.", + rp->lhs->name, rp->lhsalias, rp->rhs[0]->name, rp->rhsalias[0]); + lemp->errorcnt++; + } + }else{ + lemon_sprintf(zOvwrt, "/*%s-overwrites-%s*/", + rp->lhsalias, rp->rhsalias[0]); + zSkip = strstr(rp->code, zOvwrt); + if( zSkip!=0 ){ + /* The code contains a special comment that indicates that it is safe + ** for the LHS label to overwrite left-most RHS label. */ + lhsdirect = 1; + }else{ + lhsdirect = 0; + } + } + if( lhsdirect ){ + sprintf(zLhs, "yymsp[%d].minor.yy%d",1-rp->nrhs,rp->lhs->dtnum); + }else{ + rc = 1; + sprintf(zLhs, "yylhsminor.yy%d",rp->lhs->dtnum); + } + + append_str(0,0,0,0); + + /* This const cast is wrong but harmless, if we're careful. */ + for(cp=(char *)rp->code; *cp; cp++){ + if( cp==zSkip ){ + append_str(zOvwrt,0,0,0); + cp += lemonStrlen(zOvwrt)-1; + dontUseRhs0 = 1; + continue; + } + if( ISALPHA(*cp) && (cp==rp->code || (!ISALNUM(cp[-1]) && cp[-1]!='_')) ){ + char saved; + for(xp= &cp[1]; ISALNUM(*xp) || *xp=='_'; xp++); + saved = *xp; + *xp = 0; + if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ + append_str(zLhs,0,0,0); + cp = xp; + lhsused = 1; + }else{ + for(i=0; inrhs; i++){ + if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ + if( i==0 && dontUseRhs0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s used after '%s'.", + rp->rhsalias[0], zOvwrt); + lemp->errorcnt++; + }else if( cp!=rp->code && cp[-1]=='@' ){ + /* If the argument is of the form @X then substituted + ** the token number of X, not the value of X */ + append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0); + }else{ + struct symbol *sp = rp->rhs[i]; + int dtnum; + if( sp->type==MULTITERMINAL ){ + dtnum = sp->subsym[0]->dtnum; + }else{ + dtnum = sp->dtnum; + } + append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum); + } + cp = xp; + used[i] = 1; + break; + } + } + } + *xp = saved; + } + append_str(cp, 1, 0, 0); + } /* End loop */ + + /* Main code generation completed */ + cp = append_str(0,0,0,0); + if( cp && cp[0] ) rp->code = Strsafe(cp); + append_str(0,0,0,0); + + /* Check to make sure the LHS has been used */ + if( rp->lhsalias && !lhsused ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label \"%s\" for \"%s(%s)\" is never used.", + rp->lhsalias,rp->lhs->name,rp->lhsalias); + lemp->errorcnt++; + } + + /* Generate destructor code for RHS minor values which are not referenced. + ** Generate error messages for unused labels and duplicate labels. + */ + for(i=0; inrhs; i++){ + if( rp->rhsalias[i] ){ + if( i>0 ){ + int j; + if( rp->lhsalias && strcmp(rp->lhsalias,rp->rhsalias[i])==0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "%s(%s) has the same label as the LHS but is not the left-most " + "symbol on the RHS.", + rp->rhs[i]->name, rp->rhsalias); + lemp->errorcnt++; + } + for(j=0; jrhsalias[j] && strcmp(rp->rhsalias[j],rp->rhsalias[i])==0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s used for multiple symbols on the RHS of a rule.", + rp->rhsalias[i]); + lemp->errorcnt++; + break; + } + } + } + if( !used[i] ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s for \"%s(%s)\" is never used.", + rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); + lemp->errorcnt++; + } + }else if( i>0 && has_destructor(rp->rhs[i],lemp) ){ + append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, + rp->rhs[i]->index,i-rp->nrhs+1); + } + } + + /* If unable to write LHS values directly into the stack, write the + ** saved LHS value now. */ + if( lhsdirect==0 ){ + append_str(" yymsp[%d].minor.yy%d = ", 0, 1-rp->nrhs, rp->lhs->dtnum); + append_str(zLhs, 0, 0, 0); + append_str(";\n", 0, 0, 0); + } + + /* Suffix code generation complete */ + cp = append_str(0,0,0,0); + if( cp && cp[0] ){ + rp->codeSuffix = Strsafe(cp); + rp->noCode = 0; + } + + return rc; +} + +/* +** Generate code which executes when the rule "rp" is reduced. Write +** the code to "out". Make sure lineno stays up-to-date. +*/ +PRIVATE void emit_code( + FILE *out, + struct rule *rp, + struct lemon *lemp, + int *lineno +){ + const char *cp; + + /* Setup code prior to the #line directive */ + if( rp->codePrefix && rp->codePrefix[0] ){ + fprintf(out, "{%s", rp->codePrefix); + for(cp=rp->codePrefix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + } + + /* Generate code to do the reduce action */ + if( rp->code ){ + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,rp->line,lemp->filename); + } + fprintf(out,"{%s",rp->code); + for(cp=rp->code; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + fprintf(out,"}\n"); (*lineno)++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,*lineno,lemp->outname); + } + } + + /* Generate breakdown code that occurs after the #line directive */ + if( rp->codeSuffix && rp->codeSuffix[0] ){ + fprintf(out, "%s", rp->codeSuffix); + for(cp=rp->codeSuffix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + } + + if( rp->codePrefix ){ + fprintf(out, "}\n"); (*lineno)++; + } + + return; +} + +/* +** Print the definition of the union used for the parser's data stack. +** This union contains fields for every possible data type for tokens +** and nonterminals. In the process of computing and printing this +** union, also set the ".dtnum" field of every terminal and nonterminal +** symbol. +*/ +void print_stack_union( + FILE *out, /* The output stream */ + struct lemon *lemp, /* The main info structure for this parser */ + int *plineno, /* Pointer to the line number */ + int mhflag /* True if generating makeheaders output */ +){ + int lineno = *plineno; /* The line number of the output */ + char **types; /* A hash table of datatypes */ + int arraysize; /* Size of the "types" array */ + int maxdtlength; /* Maximum length of any ".datatype" field. */ + char *stddt; /* Standardized name for a datatype */ + int i,j; /* Loop counters */ + unsigned hash; /* For hashing the name of a type */ + const char *name; /* Name of the parser */ + + /* Allocate and initialize types[] and allocate stddt[] */ + arraysize = lemp->nsymbol * 2; + types = (char**)calloc( arraysize, sizeof(char*) ); + if( types==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + for(i=0; ivartype ){ + maxdtlength = lemonStrlen(lemp->vartype); + } + for(i=0; insymbol; i++){ + int len; + struct symbol *sp = lemp->symbols[i]; + if( sp->datatype==0 ) continue; + len = lemonStrlen(sp->datatype); + if( len>maxdtlength ) maxdtlength = len; + } + stddt = (char*)malloc( maxdtlength*2 + 1 ); + if( stddt==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + + /* Build a hash table of datatypes. The ".dtnum" field of each symbol + ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is + ** used for terminal symbols. If there is no %default_type defined then + ** 0 is also used as the .dtnum value for nonterminals which do not specify + ** a datatype using the %type directive. + */ + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + char *cp; + if( sp==lemp->errsym ){ + sp->dtnum = arraysize+1; + continue; + } + if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ + sp->dtnum = 0; + continue; + } + cp = sp->datatype; + if( cp==0 ) cp = lemp->vartype; + j = 0; + while( ISSPACE(*cp) ) cp++; + while( *cp ) stddt[j++] = *cp++; + while( j>0 && ISSPACE(stddt[j-1]) ) j--; + stddt[j] = 0; + if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ + sp->dtnum = 0; + continue; + } + hash = 0; + for(j=0; stddt[j]; j++){ + hash = hash*53 + stddt[j]; + } + hash = (hash & 0x7fffffff)%arraysize; + while( types[hash] ){ + if( strcmp(types[hash],stddt)==0 ){ + sp->dtnum = hash + 1; + break; + } + hash++; + if( hash>=(unsigned)arraysize ) hash = 0; + } + if( types[hash]==0 ){ + sp->dtnum = hash + 1; + types[hash] = (char*)malloc( lemonStrlen(stddt)+1 ); + if( types[hash]==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + lemon_strcpy(types[hash],stddt); + } + } + + /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ + name = lemp->name ? lemp->name : "Parse"; + lineno = *plineno; + if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } + fprintf(out,"#define %sTOKENTYPE %s\n",name, + lemp->tokentype?lemp->tokentype:"void*"); lineno++; + if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } + fprintf(out,"typedef union {\n"); lineno++; + fprintf(out," int yyinit;\n"); lineno++; + fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; + for(i=0; ierrsym->useCnt ){ + fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; + } + free(stddt); + free(types); + fprintf(out,"} YYMINORTYPE;\n"); lineno++; + *plineno = lineno; +} + +/* +** Return the name of a C datatype able to represent values between +** lwr and upr, inclusive. If pnByte!=NULL then also write the sizeof +** for that type (1, 2, or 4) into *pnByte. +*/ +static const char *minimum_size_type(int lwr, int upr, int *pnByte){ + const char *zType = "int"; + int nByte = 4; + if( lwr>=0 ){ + if( upr<=255 ){ + zType = "unsigned char"; + nByte = 1; + }else if( upr<65535 ){ + zType = "unsigned short int"; + nByte = 2; + }else{ + zType = "unsigned int"; + nByte = 4; + } + }else if( lwr>=-127 && upr<=127 ){ + zType = "signed char"; + nByte = 1; + }else if( lwr>=-32767 && upr<32767 ){ + zType = "short"; + nByte = 2; + } + if( pnByte ) *pnByte = nByte; + return zType; +} + +/* +** Each state contains a set of token transaction and a set of +** nonterminal transactions. Each of these sets makes an instance +** of the following structure. An array of these structures is used +** to order the creation of entries in the yy_action[] table. +*/ +struct axset { + struct state *stp; /* A pointer to a state */ + int isTkn; /* True to use tokens. False for non-terminals */ + int nAction; /* Number of actions */ + int iOrder; /* Original order of action sets */ +}; + +/* +** Compare to axset structures for sorting purposes +*/ +static int axset_compare(const void *a, const void *b){ + struct axset *p1 = (struct axset*)a; + struct axset *p2 = (struct axset*)b; + int c; + c = p2->nAction - p1->nAction; + if( c==0 ){ + c = p1->iOrder - p2->iOrder; + } + assert( c!=0 || p1==p2 ); + return c; +} + +/* +** Write text on "out" that describes the rule "rp". +*/ +static void writeRuleText(FILE *out, struct rule *rp){ + int j; + fprintf(out,"%s ::=", rp->lhs->name); + for(j=0; jnrhs; j++){ + struct symbol *sp = rp->rhs[j]; + if( sp->type!=MULTITERMINAL ){ + fprintf(out," %s", sp->name); + }else{ + int k; + fprintf(out," %s", sp->subsym[0]->name); + for(k=1; knsubsym; k++){ + fprintf(out,"|%s",sp->subsym[k]->name); + } + } + } +} + + +/* Generate C source code for the parser */ +void ReportTable( + struct lemon *lemp, + int mhflag /* Output in makeheaders format if true */ +){ + FILE *out, *in; + char line[LINESIZE]; + int lineno; + struct state *stp; + struct action *ap; + struct rule *rp; + struct acttab *pActtab; + int i, j, n, sz; + int szActionType; /* sizeof(YYACTIONTYPE) */ + int szCodeType; /* sizeof(YYCODETYPE) */ + const char *name; + int mnTknOfst, mxTknOfst; + int mnNtOfst, mxNtOfst; + struct axset *ax; + + in = tplt_open(lemp); + if( in==0 ) return; + out = file_open(lemp,".c","wb"); + if( out==0 ){ + fclose(in); + return; + } + lineno = 1; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the include code, if any */ + tplt_print(out,lemp,lemp->include,&lineno); + if( mhflag ){ + char *incName = file_makename(lemp, ".h"); + fprintf(out,"#include \"%s\"\n", incName); lineno++; + free(incName); + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate #defines for all tokens */ + if( mhflag ){ + const char *prefix; + fprintf(out,"#if INTERFACE\n"); lineno++; + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; + for(i=1; interminal; i++){ + fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + lineno++; + } + fprintf(out,"#endif\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the defines */ + fprintf(out,"#define YYCODETYPE %s\n", + minimum_size_type(0, lemp->nsymbol+1, &szCodeType)); lineno++; + fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; + fprintf(out,"#define YYACTIONTYPE %s\n", + minimum_size_type(0,lemp->nstate+lemp->nrule*2+5,&szActionType)); lineno++; + if( lemp->wildcard ){ + fprintf(out,"#define YYWILDCARD %d\n", + lemp->wildcard->index); lineno++; + } + print_stack_union(out,lemp,&lineno,mhflag); + fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; + if( lemp->stacksize ){ + fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++; + }else{ + fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++; + } + fprintf(out, "#endif\n"); lineno++; + if( mhflag ){ + fprintf(out,"#if INTERFACE\n"); lineno++; + } + name = lemp->name ? lemp->name : "Parse"; + if( lemp->arg && lemp->arg[0] ){ + i = lemonStrlen(lemp->arg); + while( i>=1 && ISSPACE(lemp->arg[i-1]) ) i--; + while( i>=1 && (ISALNUM(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; + fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; + fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; + fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", + name,lemp->arg,&lemp->arg[i]); lineno++; + fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", + name,&lemp->arg[i],&lemp->arg[i]); lineno++; + }else{ + fprintf(out,"#define %sARG_SDECL\n",name); lineno++; + fprintf(out,"#define %sARG_PDECL\n",name); lineno++; + fprintf(out,"#define %sARG_FETCH\n",name); lineno++; + fprintf(out,"#define %sARG_STORE\n",name); lineno++; + } + if( mhflag ){ + fprintf(out,"#endif\n"); lineno++; + } + if( lemp->errsym->useCnt ){ + fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; + fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; + } + if( lemp->has_fallback ){ + fprintf(out,"#define YYFALLBACK 1\n"); lineno++; + } + + /* Compute the action table, but do not output it yet. The action + ** table must be computed before generating the YYNSTATE macro because + ** we need to know how many states can be eliminated. + */ + ax = (struct axset *) calloc(lemp->nxstate*2, sizeof(ax[0])); + if( ax==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + for(i=0; inxstate; i++){ + stp = lemp->sorted[i]; + ax[i*2].stp = stp; + ax[i*2].isTkn = 1; + ax[i*2].nAction = stp->nTknAct; + ax[i*2+1].stp = stp; + ax[i*2+1].isTkn = 0; + ax[i*2+1].nAction = stp->nNtAct; + } + mxTknOfst = mnTknOfst = 0; + mxNtOfst = mnNtOfst = 0; + /* In an effort to minimize the action table size, use the heuristic + ** of placing the largest action sets first */ + for(i=0; inxstate*2; i++) ax[i].iOrder = i; + qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); + pActtab = acttab_alloc(); + for(i=0; inxstate*2 && ax[i].nAction>0; i++){ + stp = ax[i].stp; + if( ax[i].isTkn ){ + for(ap=stp->ap; ap; ap=ap->next){ + int action; + if( ap->sp->index>=lemp->nterminal ) continue; + action = compute_action(lemp, ap); + if( action<0 ) continue; + acttab_action(pActtab, ap->sp->index, action); + } + stp->iTknOfst = acttab_insert(pActtab); + if( stp->iTknOfstiTknOfst; + if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; + }else{ + for(ap=stp->ap; ap; ap=ap->next){ + int action; + if( ap->sp->indexnterminal ) continue; + if( ap->sp->index==lemp->nsymbol ) continue; + action = compute_action(lemp, ap); + if( action<0 ) continue; + acttab_action(pActtab, ap->sp->index, action); + } + stp->iNtOfst = acttab_insert(pActtab); + if( stp->iNtOfstiNtOfst; + if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; + } +#if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ + { int jj, nn; + for(jj=nn=0; jjnAction; jj++){ + if( pActtab->aAction[jj].action<0 ) nn++; + } + printf("%4d: State %3d %s n: %2d size: %5d freespace: %d\n", + i, stp->statenum, ax[i].isTkn ? "Token" : "Var ", + ax[i].nAction, pActtab->nAction, nn); + } +#endif + } + free(ax); + + /* Mark rules that are actually used for reduce actions after all + ** optimizations have been applied + */ + for(rp=lemp->rule; rp; rp=rp->next) rp->doesReduce = LEMON_FALSE; + for(i=0; inxstate; i++){ + struct action *ap; + for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ + if( ap->type==REDUCE || ap->type==SHIFTREDUCE ){ + ap->x.rp->doesReduce = i; + } + } + } + + /* Finish rendering the constants now that the action table has + ** been computed */ + fprintf(out,"#define YYNSTATE %d\n",lemp->nxstate); lineno++; + fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; + fprintf(out,"#define YY_MAX_SHIFT %d\n",lemp->nxstate-1); lineno++; + fprintf(out,"#define YY_MIN_SHIFTREDUCE %d\n",lemp->nstate); lineno++; + i = lemp->nstate + lemp->nrule; + fprintf(out,"#define YY_MAX_SHIFTREDUCE %d\n", i-1); lineno++; + fprintf(out,"#define YY_MIN_REDUCE %d\n", i); lineno++; + i = lemp->nstate + lemp->nrule*2; + fprintf(out,"#define YY_MAX_REDUCE %d\n", i-1); lineno++; + fprintf(out,"#define YY_ERROR_ACTION %d\n", i); lineno++; + fprintf(out,"#define YY_ACCEPT_ACTION %d\n", i+1); lineno++; + fprintf(out,"#define YY_NO_ACTION %d\n", i+2); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Now output the action table and its associates: + ** + ** yy_action[] A single table containing all actions. + ** yy_lookahead[] A table containing the lookahead for each entry in + ** yy_action. Used to detect hash collisions. + ** yy_shift_ofst[] For each state, the offset into yy_action for + ** shifting terminals. + ** yy_reduce_ofst[] For each state, the offset into yy_action for + ** shifting non-terminals after a reduce. + ** yy_default[] Default action for each state. + */ + + /* Output the yy_action table */ + lemp->nactiontab = n = acttab_size(pActtab); + lemp->tablesize += n*szActionType; + fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; + fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; + for(i=j=0; instate + lemp->nrule + 2; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", action); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_lookahead table */ + lemp->tablesize += n*szCodeType; + fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; + for(i=j=0; insymbol; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", la); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_shift_ofst[] table */ + n = lemp->nxstate; + while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; + fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", lemp->nactiontab); lineno++; + fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; + fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; + fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; + fprintf(out, "static const %s yy_shift_ofst[] = {\n", + minimum_size_type(mnTknOfst, lemp->nterminal+lemp->nactiontab, &sz)); + lineno++; + lemp->tablesize += n*sz; + for(i=j=0; isorted[i]; + ofst = stp->iTknOfst; + if( ofst==NO_OFFSET ) ofst = lemp->nactiontab; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", ofst); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_reduce_ofst[] table */ + fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; + n = lemp->nxstate; + while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; + fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; + fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; + fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; + fprintf(out, "static const %s yy_reduce_ofst[] = {\n", + minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)); lineno++; + lemp->tablesize += n*sz; + for(i=j=0; isorted[i]; + ofst = stp->iNtOfst; + if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", ofst); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the default action table */ + fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; + n = lemp->nxstate; + lemp->tablesize += n*szActionType; + for(i=j=0; isorted[i]; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", stp->iDfltReduce+lemp->nstate+lemp->nrule); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the table of fallback tokens. + */ + if( lemp->has_fallback ){ + int mx = lemp->nterminal - 1; + while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } + lemp->tablesize += (mx+1)*szCodeType; + for(i=0; i<=mx; i++){ + struct symbol *p = lemp->symbols[i]; + if( p->fallback==0 ){ + fprintf(out, " 0, /* %10s => nothing */\n", p->name); + }else{ + fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, + p->name, p->fallback->name); + } + lineno++; + } + } + tplt_xfer(lemp->name, in, out, &lineno); + + /* Generate a table containing the symbolic name of every symbol + */ + for(i=0; insymbol; i++){ + lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name); + fprintf(out," %-15s",line); + if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } + } + if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate a table containing a text string that describes every + ** rule in the rule set of the grammar. This information is used + ** when tracing REDUCE actions. + */ + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + assert( rp->iRule==i ); + fprintf(out," /* %3d */ \"", i); + writeRuleText(out, rp); + fprintf(out,"\",\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes every time a symbol is popped from + ** the stack while processing errors or while destroying the parser. + ** (In other words, generate the %destructor actions) + */ + if( lemp->tokendest ){ + int once = 1; + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type!=TERMINAL ) continue; + if( once ){ + fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + } + for(i=0; insymbol && lemp->symbols[i]->type!=TERMINAL; i++); + if( insymbol ){ + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + } + if( lemp->vardest ){ + struct symbol *dflt_sp = 0; + int once = 1; + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || + sp->index<=0 || sp->destructor!=0 ) continue; + if( once ){ + fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + dflt_sp = sp; + } + if( dflt_sp!=0 ){ + emit_destructor_code(out,dflt_sp,lemp,&lineno); + } + fprintf(out," break;\n"); lineno++; + } + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; + if( sp->destLineno<0 ) continue; /* Already emitted */ + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + + /* Combine duplicate destructors into a single case */ + for(j=i+1; jnsymbol; j++){ + struct symbol *sp2 = lemp->symbols[j]; + if( sp2 && sp2->type!=TERMINAL && sp2->destructor + && sp2->dtnum==sp->dtnum + && strcmp(sp->destructor,sp2->destructor)==0 ){ + fprintf(out," case %d: /* %s */\n", + sp2->index, sp2->name); lineno++; + sp2->destLineno = -1; /* Avoid emitting this destructor again */ + } + } + + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes whenever the parser stack overflows */ + tplt_print(out,lemp,lemp->overflow,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the table of rule information + ** + ** Note: This code depends on the fact that rules are number + ** sequentually beginning with 0. + */ + for(rp=lemp->rule; rp; rp=rp->next){ + fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which execution during each REDUCE action */ + i = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + i += translate_code(lemp, rp); + } + if( i ){ + fprintf(out," YYMINORTYPE yylhsminor;\n"); lineno++; + } + /* First output rules other than the default: rule */ + for(rp=lemp->rule; rp; rp=rp->next){ + struct rule *rp2; /* Other rules with the same action */ + if( rp->codeEmitted ) continue; + if( rp->noCode ){ + /* No C code actions, so this will be part of the "default:" rule */ + continue; + } + fprintf(out," case %d: /* ", rp->iRule); + writeRuleText(out, rp); + fprintf(out, " */\n"); lineno++; + for(rp2=rp->next; rp2; rp2=rp2->next){ + if( rp2->code==rp->code && rp2->codePrefix==rp->codePrefix + && rp2->codeSuffix==rp->codeSuffix ){ + fprintf(out," case %d: /* ", rp2->iRule); + writeRuleText(out, rp2); + fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->iRule); lineno++; + rp2->codeEmitted = 1; + } + } + emit_code(out,rp,lemp,&lineno); + fprintf(out," break;\n"); lineno++; + rp->codeEmitted = 1; + } + /* Finally, output the default: rule. We choose as the default: all + ** empty actions. */ + fprintf(out," default:\n"); lineno++; + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->codeEmitted ) continue; + assert( rp->noCode ); + fprintf(out," /* (%d) ", rp->iRule); + writeRuleText(out, rp); + if( rp->doesReduce ){ + fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->iRule); lineno++; + }else{ + fprintf(out, " (OPTIMIZED OUT) */ assert(yyruleno!=%d);\n", + rp->iRule); lineno++; + } + } + fprintf(out," break;\n"); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes if a parse fails */ + tplt_print(out,lemp,lemp->failure,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes when a syntax error occurs */ + tplt_print(out,lemp,lemp->error,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes when the parser accepts its input */ + tplt_print(out,lemp,lemp->accept,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Append any addition code the user desires */ + tplt_print(out,lemp,lemp->extracode,&lineno); + + fclose(in); + fclose(out); + return; +} + +/* Generate a header file for the parser */ +void ReportHeader(struct lemon *lemp) +{ + FILE *out, *in; + const char *prefix; + char line[LINESIZE]; + char pattern[LINESIZE]; + int i; + + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; + in = file_open(lemp,".h","rb"); + if( in ){ + int nextChar; + for(i=1; interminal && fgets(line,LINESIZE,in); i++){ + lemon_sprintf(pattern,"#define %s%-30s %3d\n", + prefix,lemp->symbols[i]->name,i); + if( strcmp(line,pattern) ) break; + } + nextChar = fgetc(in); + fclose(in); + if( i==lemp->nterminal && nextChar==EOF ){ + /* No change in the file. Don't rewrite it. */ + return; + } + } + out = file_open(lemp,".h","wb"); + if( out ){ + for(i=1; interminal; i++){ + fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i); + } + fclose(out); + } + return; +} + +/* Reduce the size of the action tables, if possible, by making use +** of defaults. +** +** In this version, we take the most frequent REDUCE action and make +** it the default. Except, there is no default if the wildcard token +** is a possible look-ahead. +*/ +void CompressTables(struct lemon *lemp) +{ + struct state *stp; + struct action *ap, *ap2, *nextap; + struct rule *rp, *rp2, *rbest; + int nbest, n; + int i; + int usesWildcard; + + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + nbest = 0; + rbest = 0; + usesWildcard = 0; + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ + usesWildcard = 1; + } + if( ap->type!=REDUCE ) continue; + rp = ap->x.rp; + if( rp->lhsStart ) continue; + if( rp==rbest ) continue; + n = 1; + for(ap2=ap->next; ap2; ap2=ap2->next){ + if( ap2->type!=REDUCE ) continue; + rp2 = ap2->x.rp; + if( rp2==rbest ) continue; + if( rp2==rp ) n++; + } + if( n>nbest ){ + nbest = n; + rbest = rp; + } + } + + /* Do not make a default if the number of rules to default + ** is not at least 1 or if the wildcard token is a possible + ** lookahead. + */ + if( nbest<1 || usesWildcard ) continue; + + + /* Combine matching REDUCE actions into a single default */ + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==REDUCE && ap->x.rp==rbest ) break; + } + assert( ap ); + ap->sp = Symbol_new("{default}"); + for(ap=ap->next; ap; ap=ap->next){ + if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; + } + stp->ap = Action_sort(stp->ap); + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT ) break; + if( ap->type==REDUCE && ap->x.rp!=rbest ) break; + } + if( ap==0 ){ + stp->autoReduce = 1; + stp->pDfltReduce = rbest; + } + } + + /* Make a second pass over all states and actions. Convert + ** every action that is a SHIFT to an autoReduce state into + ** a SHIFTREDUCE action. + */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=ap->next){ + struct state *pNextState; + if( ap->type!=SHIFT ) continue; + pNextState = ap->x.stp; + if( pNextState->autoReduce && pNextState->pDfltReduce!=0 ){ + ap->type = SHIFTREDUCE; + ap->x.rp = pNextState->pDfltReduce; + } + } + } + + /* If a SHIFTREDUCE action specifies a rule that has a single RHS term + ** (meaning that the SHIFTREDUCE will land back in the state where it + ** started) and if there is no C-code associated with the reduce action, + ** then we can go ahead and convert the action to be the same as the + ** action for the RHS of the rule. + */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=nextap){ + nextap = ap->next; + if( ap->type!=SHIFTREDUCE ) continue; + rp = ap->x.rp; + if( rp->noCode==0 ) continue; + if( rp->nrhs!=1 ) continue; +#if 1 + /* Only apply this optimization to non-terminals. It would be OK to + ** apply it to terminal symbols too, but that makes the parser tables + ** larger. */ + if( ap->sp->indexnterminal ) continue; +#endif + /* If we reach this point, it means the optimization can be applied */ + nextap = ap; + for(ap2=stp->ap; ap2 && (ap2==ap || ap2->sp!=rp->lhs); ap2=ap2->next){} + assert( ap2!=0 ); + ap->spOpt = ap2->sp; + ap->type = ap2->type; + ap->x = ap2->x; + } + } +} + + +/* +** Compare two states for sorting purposes. The smaller state is the +** one with the most non-terminal actions. If they have the same number +** of non-terminal actions, then the smaller is the one with the most +** token actions. +*/ +static int stateResortCompare(const void *a, const void *b){ + const struct state *pA = *(const struct state**)a; + const struct state *pB = *(const struct state**)b; + int n; + + n = pB->nNtAct - pA->nNtAct; + if( n==0 ){ + n = pB->nTknAct - pA->nTknAct; + if( n==0 ){ + n = pB->statenum - pA->statenum; + } + } + assert( n!=0 ); + return n; +} + + +/* +** Renumber and resort states so that states with fewer choices +** occur at the end. Except, keep state 0 as the first state. +*/ +void ResortStates(struct lemon *lemp) +{ + int i; + struct state *stp; + struct action *ap; + + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + stp->nTknAct = stp->nNtAct = 0; + stp->iDfltReduce = lemp->nrule; /* Init dflt action to "syntax error" */ + stp->iTknOfst = NO_OFFSET; + stp->iNtOfst = NO_OFFSET; + for(ap=stp->ap; ap; ap=ap->next){ + int iAction = compute_action(lemp,ap); + if( iAction>=0 ){ + if( ap->sp->indexnterminal ){ + stp->nTknAct++; + }else if( ap->sp->indexnsymbol ){ + stp->nNtAct++; + }else{ + assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp ); + stp->iDfltReduce = iAction - lemp->nstate - lemp->nrule; + } + } + } + } + qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), + stateResortCompare); + for(i=0; instate; i++){ + lemp->sorted[i]->statenum = i; + } + lemp->nxstate = lemp->nstate; + while( lemp->nxstate>1 && lemp->sorted[lemp->nxstate-1]->autoReduce ){ + lemp->nxstate--; + } +} + + +/***************** From the file "set.c" ************************************/ +/* +** Set manipulation routines for the LEMON parser generator. +*/ + +static int size = 0; + +/* Set the set size */ +void SetSize(int n) +{ + size = n+1; +} + +/* Allocate a new set */ +char *SetNew(){ + char *s; + s = (char*)calloc( size, 1); + if( s==0 ){ + extern void memory_error(); + memory_error(); + } + return s; +} + +/* Deallocate a set */ +void SetFree(char *s) +{ + free(s); +} + +/* Add a new element to the set. Return TRUE if the element was added +** and FALSE if it was already there. */ +int SetAdd(char *s, int e) +{ + int rv; + assert( e>=0 && esize = 1024; + x1a->count = 0; + x1a->tbl = (x1node*)calloc(1024, sizeof(x1node) + sizeof(x1node*)); + if( x1a->tbl==0 ){ + free(x1a); + x1a = 0; + }else{ + int i; + x1a->ht = (x1node**)&(x1a->tbl[1024]); + for(i=0; i<1024; i++) x1a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Strsafe_insert(const char *data) +{ + x1node *np; + unsigned h; + unsigned ph; + + if( x1a==0 ) return 0; + ph = strhash(data); + h = ph & (x1a->size-1); + np = x1a->ht[h]; + while( np ){ + if( strcmp(np->data,data)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x1a->count>=x1a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x1 array; + array.size = arrSize = x1a->size*2; + array.count = x1a->count; + array.tbl = (x1node*)calloc(arrSize, sizeof(x1node) + sizeof(x1node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x1node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x1node *oldnp, *newnp; + oldnp = &(x1a->tbl[i]); + h = strhash(oldnp->data) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x1a->tbl); + *x1a = array; + } + /* Insert the new data */ + h = ph & (x1a->size-1); + np = &(x1a->tbl[x1a->count++]); + np->data = data; + if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); + np->next = x1a->ht[h]; + x1a->ht[h] = np; + np->from = &(x1a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +const char *Strsafe_find(const char *key) +{ + unsigned h; + x1node *np; + + if( x1a==0 ) return 0; + h = strhash(key) & (x1a->size-1); + np = x1a->ht[h]; + while( np ){ + if( strcmp(np->data,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return a pointer to the (terminal or nonterminal) symbol "x". +** Create a new symbol if this is the first time "x" has been seen. +*/ +struct symbol *Symbol_new(const char *x) +{ + struct symbol *sp; + + sp = Symbol_find(x); + if( sp==0 ){ + sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); + MemoryCheck(sp); + sp->name = Strsafe(x); + sp->type = ISUPPER(*x) ? TERMINAL : NONTERMINAL; + sp->rule = 0; + sp->fallback = 0; + sp->prec = -1; + sp->assoc = UNK; + sp->firstset = 0; + sp->lambda = LEMON_FALSE; + sp->destructor = 0; + sp->destLineno = 0; + sp->datatype = 0; + sp->useCnt = 0; + Symbol_insert(sp,sp->name); + } + sp->useCnt++; + return sp; +} + +/* Compare two symbols for sorting purposes. Return negative, +** zero, or positive if a is less then, equal to, or greater +** than b. +** +** Symbols that begin with upper case letters (terminals or tokens) +** must sort before symbols that begin with lower case letters +** (non-terminals). And MULTITERMINAL symbols (created using the +** %token_class directive) must sort at the very end. Other than +** that, the order does not matter. +** +** We find experimentally that leaving the symbols in their original +** order (the order they appeared in the grammar file) gives the +** smallest parser tables in SQLite. +*/ +int Symbolcmpp(const void *_a, const void *_b) +{ + const struct symbol *a = *(const struct symbol **) _a; + const struct symbol *b = *(const struct symbol **) _b; + int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; + int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; + return i1==i2 ? a->index - b->index : i1 - i2; +} + +/* There is one instance of the following structure for each +** associative array of type "x2". +*/ +struct s_x2 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x2node *tbl; /* The data stored here */ + struct s_x2node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x2". +*/ +typedef struct s_x2node { + struct symbol *data; /* The data */ + const char *key; /* The key */ + struct s_x2node *next; /* Next entry with the same hash */ + struct s_x2node **from; /* Previous link */ +} x2node; + +/* There is only one instance of the array, which is the following */ +static struct s_x2 *x2a; + +/* Allocate a new associative array */ +void Symbol_init(){ + if( x2a ) return; + x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); + if( x2a ){ + x2a->size = 128; + x2a->count = 0; + x2a->tbl = (x2node*)calloc(128, sizeof(x2node) + sizeof(x2node*)); + if( x2a->tbl==0 ){ + free(x2a); + x2a = 0; + }else{ + int i; + x2a->ht = (x2node**)&(x2a->tbl[128]); + for(i=0; i<128; i++) x2a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Symbol_insert(struct symbol *data, const char *key) +{ + x2node *np; + unsigned h; + unsigned ph; + + if( x2a==0 ) return 0; + ph = strhash(key); + h = ph & (x2a->size-1); + np = x2a->ht[h]; + while( np ){ + if( strcmp(np->key,key)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x2a->count>=x2a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x2 array; + array.size = arrSize = x2a->size*2; + array.count = x2a->count; + array.tbl = (x2node*)calloc(arrSize, sizeof(x2node) + sizeof(x2node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x2node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x2node *oldnp, *newnp; + oldnp = &(x2a->tbl[i]); + h = strhash(oldnp->key) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->key = oldnp->key; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x2a->tbl); + *x2a = array; + } + /* Insert the new data */ + h = ph & (x2a->size-1); + np = &(x2a->tbl[x2a->count++]); + np->key = key; + np->data = data; + if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); + np->next = x2a->ht[h]; + x2a->ht[h] = np; + np->from = &(x2a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct symbol *Symbol_find(const char *key) +{ + unsigned h; + x2node *np; + + if( x2a==0 ) return 0; + h = strhash(key) & (x2a->size-1); + np = x2a->ht[h]; + while( np ){ + if( strcmp(np->key,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return the n-th data. Return NULL if n is out of range. */ +struct symbol *Symbol_Nth(int n) +{ + struct symbol *data; + if( x2a && n>0 && n<=x2a->count ){ + data = x2a->tbl[n-1].data; + }else{ + data = 0; + } + return data; +} + +/* Return the size of the array */ +int Symbol_count() +{ + return x2a ? x2a->count : 0; +} + +/* Return an array of pointers to all data in the table. +** The array is obtained from malloc. Return NULL if memory allocation +** problems, or if the array is empty. */ +struct symbol **Symbol_arrayof() +{ + struct symbol **array; + int i,arrSize; + if( x2a==0 ) return 0; + arrSize = x2a->count; + array = (struct symbol **)calloc(arrSize, sizeof(struct symbol *)); + if( array ){ + for(i=0; itbl[i].data; + } + return array; +} + +/* Compare two configurations */ +int Configcmp(const char *_a,const char *_b) +{ + const struct config *a = (struct config *) _a; + const struct config *b = (struct config *) _b; + int x; + x = a->rp->index - b->rp->index; + if( x==0 ) x = a->dot - b->dot; + return x; +} + +/* Compare two states */ +PRIVATE int statecmp(struct config *a, struct config *b) +{ + int rc; + for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ + rc = a->rp->index - b->rp->index; + if( rc==0 ) rc = a->dot - b->dot; + } + if( rc==0 ){ + if( a ) rc = 1; + if( b ) rc = -1; + } + return rc; +} + +/* Hash a state */ +PRIVATE unsigned statehash(struct config *a) +{ + unsigned h=0; + while( a ){ + h = h*571 + a->rp->index*37 + a->dot; + a = a->bp; + } + return h; +} + +/* Allocate a new state structure */ +struct state *State_new() +{ + struct state *newstate; + newstate = (struct state *)calloc(1, sizeof(struct state) ); + MemoryCheck(newstate); + return newstate; +} + +/* There is one instance of the following structure for each +** associative array of type "x3". +*/ +struct s_x3 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x3node *tbl; /* The data stored here */ + struct s_x3node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x3". +*/ +typedef struct s_x3node { + struct state *data; /* The data */ + struct config *key; /* The key */ + struct s_x3node *next; /* Next entry with the same hash */ + struct s_x3node **from; /* Previous link */ +} x3node; + +/* There is only one instance of the array, which is the following */ +static struct s_x3 *x3a; + +/* Allocate a new associative array */ +void State_init(){ + if( x3a ) return; + x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); + if( x3a ){ + x3a->size = 128; + x3a->count = 0; + x3a->tbl = (x3node*)calloc(128, sizeof(x3node) + sizeof(x3node*)); + if( x3a->tbl==0 ){ + free(x3a); + x3a = 0; + }else{ + int i; + x3a->ht = (x3node**)&(x3a->tbl[128]); + for(i=0; i<128; i++) x3a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int State_insert(struct state *data, struct config *key) +{ + x3node *np; + unsigned h; + unsigned ph; + + if( x3a==0 ) return 0; + ph = statehash(key); + h = ph & (x3a->size-1); + np = x3a->ht[h]; + while( np ){ + if( statecmp(np->key,key)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x3a->count>=x3a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x3 array; + array.size = arrSize = x3a->size*2; + array.count = x3a->count; + array.tbl = (x3node*)calloc(arrSize, sizeof(x3node) + sizeof(x3node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x3node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x3node *oldnp, *newnp; + oldnp = &(x3a->tbl[i]); + h = statehash(oldnp->key) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->key = oldnp->key; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x3a->tbl); + *x3a = array; + } + /* Insert the new data */ + h = ph & (x3a->size-1); + np = &(x3a->tbl[x3a->count++]); + np->key = key; + np->data = data; + if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); + np->next = x3a->ht[h]; + x3a->ht[h] = np; + np->from = &(x3a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct state *State_find(struct config *key) +{ + unsigned h; + x3node *np; + + if( x3a==0 ) return 0; + h = statehash(key) & (x3a->size-1); + np = x3a->ht[h]; + while( np ){ + if( statecmp(np->key,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return an array of pointers to all data in the table. +** The array is obtained from malloc. Return NULL if memory allocation +** problems, or if the array is empty. */ +struct state **State_arrayof() +{ + struct state **array; + int i,arrSize; + if( x3a==0 ) return 0; + arrSize = x3a->count; + array = (struct state **)calloc(arrSize, sizeof(struct state *)); + if( array ){ + for(i=0; itbl[i].data; + } + return array; +} + +/* Hash a configuration */ +PRIVATE unsigned confighash(struct config *a) +{ + unsigned h=0; + h = h*571 + a->rp->index*37 + a->dot; + return h; +} + +/* There is one instance of the following structure for each +** associative array of type "x4". +*/ +struct s_x4 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x4node *tbl; /* The data stored here */ + struct s_x4node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x4". +*/ +typedef struct s_x4node { + struct config *data; /* The data */ + struct s_x4node *next; /* Next entry with the same hash */ + struct s_x4node **from; /* Previous link */ +} x4node; + +/* There is only one instance of the array, which is the following */ +static struct s_x4 *x4a; + +/* Allocate a new associative array */ +void Configtable_init(){ + if( x4a ) return; + x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); + if( x4a ){ + x4a->size = 64; + x4a->count = 0; + x4a->tbl = (x4node*)calloc(64, sizeof(x4node) + sizeof(x4node*)); + if( x4a->tbl==0 ){ + free(x4a); + x4a = 0; + }else{ + int i; + x4a->ht = (x4node**)&(x4a->tbl[64]); + for(i=0; i<64; i++) x4a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Configtable_insert(struct config *data) +{ + x4node *np; + unsigned h; + unsigned ph; + + if( x4a==0 ) return 0; + ph = confighash(data); + h = ph & (x4a->size-1); + np = x4a->ht[h]; + while( np ){ + if( Configcmp((const char *) np->data,(const char *) data)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x4a->count>=x4a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x4 array; + array.size = arrSize = x4a->size*2; + array.count = x4a->count; + array.tbl = (x4node*)calloc(arrSize, sizeof(x4node) + sizeof(x4node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x4node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x4node *oldnp, *newnp; + oldnp = &(x4a->tbl[i]); + h = confighash(oldnp->data) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x4a->tbl); + *x4a = array; + } + /* Insert the new data */ + h = ph & (x4a->size-1); + np = &(x4a->tbl[x4a->count++]); + np->data = data; + if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); + np->next = x4a->ht[h]; + x4a->ht[h] = np; + np->from = &(x4a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct config *Configtable_find(struct config *key) +{ + int h; + x4node *np; + + if( x4a==0 ) return 0; + h = confighash(key) & (x4a->size-1); + np = x4a->ht[h]; + while( np ){ + if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Remove all data from the table. Pass each data to the function "f" +** as it is removed. ("f" may be null to avoid this step.) */ +void Configtable_clear(int(*f)(struct config *)) +{ + int i; + if( x4a==0 || x4a->count==0 ) return; + if( f ) for(i=0; icount; i++) (*f)(x4a->tbl[i].data); + for(i=0; isize; i++) x4a->ht[i] = 0; + x4a->count = 0; + return; +} diff --git a/lemon/lempar.c b/lemon/lempar.c new file mode 100644 index 0000000..5195d9c --- /dev/null +++ b/lemon/lempar.c @@ -0,0 +1,946 @@ +/* +** 2000-05-29 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Driver template for the LEMON parser generator. +** +** The "lemon" program processes an LALR(1) input grammar file, then uses +** this template to construct a parser. The "lemon" program inserts text +** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the +** interstitial "-" characters) contained in this template is changed into +** the value of the %name directive from the grammar. Otherwise, the content +** of this template is copied straight through into the generate parser +** source file. +** +** The following is the concatenation of all %include directives from the +** input grammar file: +*/ +#include +/************ Begin %include sections from the grammar ************************/ +%% +/**************** End of %include directives **********************************/ +/* These constants specify the various numeric values for terminal symbols +** in a format understandable to "makeheaders". This section is blank unless +** "lemon" is run with the "-m" command-line option. +***************** Begin makeheaders token definitions *************************/ +%% +/**************** End makeheaders token definitions ***************************/ + +/* The next sections is a series of control #defines. +** various aspects of the generated parser. +** YYCODETYPE is the data type used to store the integer codes +** that represent terminal and non-terminal symbols. +** "unsigned char" is used if there are fewer than +** 256 symbols. Larger types otherwise. +** YYNOCODE is a number of type YYCODETYPE that is not used for +** any terminal or nonterminal symbol. +** YYFALLBACK If defined, this indicates that one or more tokens +** (also known as: "terminal symbols") have fall-back +** values which should be used if the original symbol +** would not parse. This permits keywords to sometimes +** be used as identifiers, for example. +** YYACTIONTYPE is the data type used for "action codes" - numbers +** that indicate what to do in response to the next +** token. +** ParseTOKENTYPE is the data type used for minor type for terminal +** symbols. Background: A "minor type" is a semantic +** value associated with a terminal or non-terminal +** symbols. For example, for an "ID" terminal symbol, +** the minor type might be the name of the identifier. +** Each non-terminal can have a different minor type. +** Terminal symbols all have the same minor type, though. +** This macros defines the minor type for terminal +** symbols. +** YYMINORTYPE is the data type used for all minor types. +** This is typically a union of many types, one of +** which is ParseTOKENTYPE. The entry in the union +** for terminal symbols is called "yy0". +** YYSTACKDEPTH is the maximum depth of the parser's stack. If +** zero the stack is dynamically sized using realloc() +** ParseARG_SDECL A static variable declaration for the %extra_argument +** ParseARG_PDECL A parameter declaration for the %extra_argument +** ParseARG_STORE Code to store %extra_argument into yypParser +** ParseARG_FETCH Code to extract %extra_argument from yypParser +** YYERRORSYMBOL is the code number of the error symbol. If not +** defined, then do no error processing. +** YYNSTATE the combined number of states. +** YYNRULE the number of rules in the grammar +** YY_MAX_SHIFT Maximum value for shift actions +** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions +** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions +** YY_MIN_REDUCE Maximum value for reduce actions +** YY_ERROR_ACTION The yy_action[] code for syntax error +** YY_ACCEPT_ACTION The yy_action[] code for accept +** YY_NO_ACTION The yy_action[] code for no-op +*/ +#ifndef INTERFACE +# define INTERFACE 1 +#endif +/************* Begin control #defines *****************************************/ +%% +/************* End control #defines *******************************************/ + +/* Define the yytestcase() macro to be a no-op if is not already defined +** otherwise. +** +** Applications can choose to define yytestcase() in the %include section +** to a macro that can assist in verifying code coverage. For production +** code the yytestcase() macro should be turned off. But it is useful +** for testing. +*/ +#ifndef yytestcase +# define yytestcase(X) +#endif + + +/* Next are the tables used to determine what action to take based on the +** current state and lookahead token. These tables are used to implement +** functions that take a state number and lookahead value and return an +** action integer. +** +** Suppose the action integer is N. Then the action is determined as +** follows +** +** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead +** token onto the stack and goto state N. +** +** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then +** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE. +** +** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE +** and YY_MAX_REDUCE +** +** N == YY_ERROR_ACTION A syntax error has occurred. +** +** N == YY_ACCEPT_ACTION The parser accepts its input. +** +** N == YY_NO_ACTION No such action. Denotes unused +** slots in the yy_action[] table. +** +** The action table is constructed as a single large table named yy_action[]. +** Given state S and lookahead X, the action is computed as either: +** +** (A) N = yy_action[ yy_shift_ofst[S] + X ] +** (B) N = yy_default[S] +** +** The (A) formula is preferred. The B formula is used instead if: +** (1) The yy_shift_ofst[S]+X value is out of range, or +** (2) yy_lookahead[yy_shift_ofst[S]+X] is not equal to X, or +** (3) yy_shift_ofst[S] equal YY_SHIFT_USE_DFLT. +** (Implementation note: YY_SHIFT_USE_DFLT is chosen so that +** YY_SHIFT_USE_DFLT+X will be out of range for all possible lookaheads X. +** Hence only tests (1) and (2) need to be evaluated.) +** +** The formulas above are for computing the action when the lookahead is +** a terminal symbol. If the lookahead is a non-terminal (as occurs after +** a reduce action) then the yy_reduce_ofst[] array is used in place of +** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of +** YY_SHIFT_USE_DFLT. +** +** The following are the tables generated in this section: +** +** yy_action[] A single table containing all actions. +** yy_lookahead[] A table containing the lookahead for each entry in +** yy_action. Used to detect hash collisions. +** yy_shift_ofst[] For each state, the offset into yy_action for +** shifting terminals. +** yy_reduce_ofst[] For each state, the offset into yy_action for +** shifting non-terminals after a reduce. +** yy_default[] Default action for each state. +** +*********** Begin parsing tables **********************************************/ +%% +/********** End of lemon-generated parsing tables *****************************/ + +/* The next table maps tokens (terminal symbols) into fallback tokens. +** If a construct like the following: +** +** %fallback ID X Y Z. +** +** appears in the grammar, then ID becomes a fallback token for X, Y, +** and Z. Whenever one of the tokens X, Y, or Z is input to the parser +** but it does not parse, the type of the token is changed to ID and +** the parse is retried before an error is thrown. +** +** This feature can be used, for example, to cause some keywords in a language +** to revert to identifiers if they keyword does not apply in the context where +** it appears. +*/ +#ifdef YYFALLBACK +static const YYCODETYPE yyFallback[] = { +%% +}; +#endif /* YYFALLBACK */ + +/* The following structure represents a single element of the +** parser's stack. Information stored includes: +** +** + The state number for the parser at this level of the stack. +** +** + The value of the token stored at this level of the stack. +** (In other words, the "major" token.) +** +** + The semantic value stored at this level of the stack. This is +** the information used by the action routines in the grammar. +** It is sometimes called the "minor" token. +** +** After the "shift" half of a SHIFTREDUCE action, the stateno field +** actually contains the reduce action for the second half of the +** SHIFTREDUCE. +*/ +struct yyStackEntry { + YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */ + YYCODETYPE major; /* The major token value. This is the code + ** number for the token at this stack level */ + YYMINORTYPE minor; /* The user-supplied minor token value. This + ** is the value of the token */ +}; +typedef struct yyStackEntry yyStackEntry; + +/* The state of the parser is completely contained in an instance of +** the following structure */ +struct yyParser { + yyStackEntry *yytos; /* Pointer to top element of the stack */ +#ifdef YYTRACKMAXSTACKDEPTH + int yyhwm; /* High-water mark of the stack */ +#endif +#ifndef YYNOERRORRECOVERY + int yyerrcnt; /* Shifts left before out of the error */ +#endif + ParseARG_SDECL /* A place to hold %extra_argument */ +#if YYSTACKDEPTH<=0 + int yystksz; /* Current side of the stack */ + yyStackEntry *yystack; /* The parser's stack */ + yyStackEntry yystk0; /* First stack entry */ +#else + yyStackEntry yystack[YYSTACKDEPTH]; /* The parser's stack */ +#endif +}; +typedef struct yyParser yyParser; + +#ifndef NDEBUG +#include +static FILE *yyTraceFILE = 0; +static char *yyTracePrompt = 0; +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* +** Turn parser tracing on by giving a stream to which to write the trace +** and a prompt to preface each trace message. Tracing is turned off +** by making either argument NULL +** +** Inputs: +**
    +**
  • A FILE* to which trace output should be written. +** If NULL, then tracing is turned off. +**
  • A prefix string written at the beginning of every +** line of trace output. If NULL, then tracing is +** turned off. +**
+** +** Outputs: +** None. +*/ +void ParseTrace(FILE *TraceFILE, char *zTracePrompt){ + yyTraceFILE = TraceFILE; + yyTracePrompt = zTracePrompt; + if( yyTraceFILE==0 ) yyTracePrompt = 0; + else if( yyTracePrompt==0 ) yyTraceFILE = 0; +} +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* For tracing shifts, the names of all terminals and nonterminals +** are required. The following table supplies these names */ +static const char *const yyTokenName[] = { +%% +}; +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* For tracing reduce actions, the names of all rules are required. +*/ +static const char *const yyRuleName[] = { +%% +}; +#endif /* NDEBUG */ + + +#if YYSTACKDEPTH<=0 +/* +** Try to increase the size of the parser stack. Return the number +** of errors. Return 0 on success. +*/ +static int yyGrowStack(yyParser *p){ + int newSize; + int idx; + yyStackEntry *pNew; + + newSize = p->yystksz*2 + 100; + idx = p->yytos ? (int)(p->yytos - p->yystack) : 0; + if( p->yystack==&p->yystk0 ){ + pNew = malloc(newSize*sizeof(pNew[0])); + if( pNew ) pNew[0] = p->yystk0; + }else{ + pNew = realloc(p->yystack, newSize*sizeof(pNew[0])); + } + if( pNew ){ + p->yystack = pNew; + p->yytos = &p->yystack[idx]; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sStack grows from %d to %d entries.\n", + yyTracePrompt, p->yystksz, newSize); + } +#endif + p->yystksz = newSize; + } + return pNew==0; +} +#endif + +/* Datatype of the argument to the memory allocated passed as the +** second argument to ParseAlloc() below. This can be changed by +** putting an appropriate #define in the %include section of the input +** grammar. +*/ +#ifndef YYMALLOCARGTYPE +# define YYMALLOCARGTYPE size_t +#endif + +/* +** This function allocates a new parser. +** The only argument is a pointer to a function which works like +** malloc. +** +** Inputs: +** A pointer to the function used to allocate memory. +** +** Outputs: +** A pointer to a parser. This pointer is used in subsequent calls +** to Parse and ParseFree. +*/ +void *ParseAlloc(void *(*mallocProc)(YYMALLOCARGTYPE)){ + yyParser *pParser; + pParser = (yyParser*)(*mallocProc)( (YYMALLOCARGTYPE)sizeof(yyParser) ); + if( pParser ){ +#ifdef YYTRACKMAXSTACKDEPTH + pParser->yyhwm = 0; +#endif +#if YYSTACKDEPTH<=0 + pParser->yytos = NULL; + pParser->yystack = NULL; + pParser->yystksz = 0; + if( yyGrowStack(pParser) ){ + pParser->yystack = &pParser->yystk0; + pParser->yystksz = 1; + } +#endif +#ifndef YYNOERRORRECOVERY + pParser->yyerrcnt = -1; +#endif + pParser->yytos = pParser->yystack; + pParser->yystack[0].stateno = 0; + pParser->yystack[0].major = 0; + } + return pParser; +} + +/* The following function deletes the "minor type" or semantic value +** associated with a symbol. The symbol can be either a terminal +** or nonterminal. "yymajor" is the symbol code, and "yypminor" is +** a pointer to the value to be deleted. The code used to do the +** deletions is derived from the %destructor and/or %token_destructor +** directives of the input grammar. +*/ +static void yy_destructor( + yyParser *yypParser, /* The parser */ + YYCODETYPE yymajor, /* Type code for object to destroy */ + YYMINORTYPE *yypminor /* The object to be destroyed */ +){ + ParseARG_FETCH; + switch( yymajor ){ + /* Here is inserted the actions which take place when a + ** terminal or non-terminal is destroyed. This can happen + ** when the symbol is popped from the stack during a + ** reduce or during error processing or when a parser is + ** being destroyed before it is finished parsing. + ** + ** Note: during a reduce, the only symbols destroyed are those + ** which appear on the RHS of the rule, but which are *not* used + ** inside the C code. + */ +/********* Begin destructor definitions ***************************************/ +%% +/********* End destructor definitions *****************************************/ + default: break; /* If no destructor action specified: do nothing */ + } +} + +/* +** Pop the parser's stack once. +** +** If there is a destructor routine associated with the token which +** is popped from the stack, then call it. +*/ +static void yy_pop_parser_stack(yyParser *pParser){ + yyStackEntry *yytos; + assert( pParser->yytos!=0 ); + assert( pParser->yytos > pParser->yystack ); + yytos = pParser->yytos--; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sPopping %s\n", + yyTracePrompt, + yyTokenName[yytos->major]); + } +#endif + yy_destructor(pParser, yytos->major, &yytos->minor); +} + +/* +** Deallocate and destroy a parser. Destructors are called for +** all stack elements before shutting the parser down. +** +** If the YYPARSEFREENEVERNULL macro exists (for example because it +** is defined in a %include section of the input grammar) then it is +** assumed that the input pointer is never NULL. +*/ +void ParseFree( + void *p, /* The parser to be deleted */ + void (*freeProc)(void*) /* Function used to reclaim memory */ +){ + yyParser *pParser = (yyParser*)p; +#ifndef YYPARSEFREENEVERNULL + if( pParser==0 ) return; +#endif + while( pParser->yytos>pParser->yystack ) yy_pop_parser_stack(pParser); +#if YYSTACKDEPTH<=0 + if( pParser->yystack!=&pParser->yystk0 ) free(pParser->yystack); +#endif + (*freeProc)((void*)pParser); +} + +/* +** Return the peak depth of the stack for a parser. +*/ +#ifdef YYTRACKMAXSTACKDEPTH +int ParseStackPeak(void *p){ + yyParser *pParser = (yyParser*)p; + return pParser->yyhwm; +} +#endif + +/* +** Find the appropriate action for a parser given the terminal +** look-ahead token iLookAhead. +*/ +static unsigned int yy_find_shift_action( + yyParser *pParser, /* The parser */ + YYCODETYPE iLookAhead /* The look-ahead token */ +){ + int i; + int stateno = pParser->yytos->stateno; + + if( stateno>=YY_MIN_REDUCE ) return stateno; + assert( stateno <= YY_SHIFT_COUNT ); + do{ + i = yy_shift_ofst[stateno]; + assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; + if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ +#ifdef YYFALLBACK + YYCODETYPE iFallback; /* Fallback token */ + if( iLookAhead %s\n", + yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[iFallback]); + } +#endif + assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */ + iLookAhead = iFallback; + continue; + } +#endif +#ifdef YYWILDCARD + { + int j = i - iLookAhead + YYWILDCARD; + if( +#if YY_SHIFT_MIN+YYWILDCARD<0 + j>=0 && +#endif +#if YY_SHIFT_MAX+YYWILDCARD>=YY_ACTTAB_COUNT + j0 + ){ +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE, "%sWILDCARD %s => %s\n", + yyTracePrompt, yyTokenName[iLookAhead], + yyTokenName[YYWILDCARD]); + } +#endif /* NDEBUG */ + return yy_action[j]; + } + } +#endif /* YYWILDCARD */ + return yy_default[stateno]; + }else{ + return yy_action[i]; + } + }while(1); +} + +/* +** Find the appropriate action for a parser given the non-terminal +** look-ahead token iLookAhead. +*/ +static int yy_find_reduce_action( + int stateno, /* Current state number */ + YYCODETYPE iLookAhead /* The look-ahead token */ +){ + int i; +#ifdef YYERRORSYMBOL + if( stateno>YY_REDUCE_COUNT ){ + return yy_default[stateno]; + } +#else + assert( stateno<=YY_REDUCE_COUNT ); +#endif + i = yy_reduce_ofst[stateno]; + assert( i!=YY_REDUCE_USE_DFLT ); + assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; +#ifdef YYERRORSYMBOL + if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ + return yy_default[stateno]; + } +#else + assert( i>=0 && iyytos--; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt); + } +#endif + while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will execute if the parser + ** stack every overflows */ +/******** Begin %stack_overflow code ******************************************/ +%% +/******** End %stack_overflow code ********************************************/ + ParseARG_STORE; /* Suppress warning about unused %extra_argument var */ +} + +/* +** Print tracing information for a SHIFT action +*/ +#ifndef NDEBUG +static void yyTraceShift(yyParser *yypParser, int yyNewState){ + if( yyTraceFILE ){ + if( yyNewStateyytos->major], + yyNewState); + }else{ + fprintf(yyTraceFILE,"%sShift '%s'\n", + yyTracePrompt,yyTokenName[yypParser->yytos->major]); + } + } +} +#else +# define yyTraceShift(X,Y) +#endif + +/* +** Perform a shift action. +*/ +static void yy_shift( + yyParser *yypParser, /* The parser to be shifted */ + int yyNewState, /* The new state to shift in */ + int yyMajor, /* The major token to shift in */ + ParseTOKENTYPE yyMinor /* The minor token to shift in */ +){ + yyStackEntry *yytos; + yypParser->yytos++; +#ifdef YYTRACKMAXSTACKDEPTH + if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){ + yypParser->yyhwm++; + assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) ); + } +#endif +#if YYSTACKDEPTH>0 + if( yypParser->yytos>=&yypParser->yystack[YYSTACKDEPTH] ){ + yyStackOverflow(yypParser); + return; + } +#else + if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz] ){ + if( yyGrowStack(yypParser) ){ + yyStackOverflow(yypParser); + return; + } + } +#endif + if( yyNewState > YY_MAX_SHIFT ){ + yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; + } + yytos = yypParser->yytos; + yytos->stateno = (YYACTIONTYPE)yyNewState; + yytos->major = (YYCODETYPE)yyMajor; + yytos->minor.yy0 = yyMinor; + yyTraceShift(yypParser, yyNewState); +} + +/* The following table contains information about every rule that +** is used during the reduce. +*/ +static const struct { + YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */ + unsigned char nrhs; /* Number of right-hand side symbols in the rule */ +} yyRuleInfo[] = { +%% +}; + +static void yy_accept(yyParser*); /* Forward Declaration */ + +/* +** Perform a reduce action and the shift that must immediately +** follow the reduce. +*/ +static void yy_reduce( + yyParser *yypParser, /* The parser */ + unsigned int yyruleno /* Number of the rule by which to reduce */ +){ + int yygoto; /* The next state */ + int yyact; /* The next action */ + yyStackEntry *yymsp; /* The top of the parser's stack */ + int yysize; /* Amount to pop the stack */ + ParseARG_FETCH; + yymsp = yypParser->yytos; +#ifndef NDEBUG + if( yyTraceFILE && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){ + yysize = yyRuleInfo[yyruleno].nrhs; + fprintf(yyTraceFILE, "%sReduce [%s], go to state %d.\n", yyTracePrompt, + yyRuleName[yyruleno], yymsp[-yysize].stateno); + } +#endif /* NDEBUG */ + + /* Check that the stack is large enough to grow by a single entry + ** if the RHS of the rule is empty. This ensures that there is room + ** enough on the stack to push the LHS value */ + if( yyRuleInfo[yyruleno].nrhs==0 ){ +#ifdef YYTRACKMAXSTACKDEPTH + if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){ + yypParser->yyhwm++; + assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack)); + } +#endif +#if YYSTACKDEPTH>0 + if( yypParser->yytos>=&yypParser->yystack[YYSTACKDEPTH-1] ){ + yyStackOverflow(yypParser); + return; + } +#else + if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){ + if( yyGrowStack(yypParser) ){ + yyStackOverflow(yypParser); + return; + } + yymsp = yypParser->yytos; + } +#endif + } + + switch( yyruleno ){ + /* Beginning here are the reduction cases. A typical example + ** follows: + ** case 0: + ** #line + ** { ... } // User supplied code + ** #line + ** break; + */ +/********** Begin reduce actions **********************************************/ +%% +/********** End reduce actions ************************************************/ + }; + assert( yyrulenoYY_MAX_SHIFT ){ + yyact += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; + } + yymsp -= yysize-1; + yypParser->yytos = yymsp; + yymsp->stateno = (YYACTIONTYPE)yyact; + yymsp->major = (YYCODETYPE)yygoto; + yyTraceShift(yypParser, yyact); + }else{ + assert( yyact == YY_ACCEPT_ACTION ); + yypParser->yytos -= yysize; + yy_accept(yypParser); + } +} + +/* +** The following code executes when the parse fails +*/ +#ifndef YYNOERRORRECOVERY +static void yy_parse_failed( + yyParser *yypParser /* The parser */ +){ + ParseARG_FETCH; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt); + } +#endif + while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will be executed whenever the + ** parser fails */ +/************ Begin %parse_failure code ***************************************/ +%% +/************ End %parse_failure code *****************************************/ + ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} +#endif /* YYNOERRORRECOVERY */ + +/* +** The following code executes when a syntax error first occurs. +*/ +static void yy_syntax_error( + yyParser *yypParser, /* The parser */ + int yymajor, /* The major type of the error token */ + ParseTOKENTYPE yyminor /* The minor type of the error token */ +){ + ParseARG_FETCH; +#define TOKEN yyminor +/************ Begin %syntax_error code ****************************************/ +%% +/************ End %syntax_error code ******************************************/ + ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} + +/* +** The following is executed when the parser accepts +*/ +static void yy_accept( + yyParser *yypParser /* The parser */ +){ + ParseARG_FETCH; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sAccept!\n",yyTracePrompt); + } +#endif +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + assert( yypParser->yytos==yypParser->yystack ); + /* Here code is inserted which will be executed whenever the + ** parser accepts */ +/*********** Begin %parse_accept code *****************************************/ +%% +/*********** End %parse_accept code *******************************************/ + ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} + +/* The main parser program. +** The first argument is a pointer to a structure obtained from +** "ParseAlloc" which describes the current state of the parser. +** The second argument is the major token number. The third is +** the minor token. The fourth optional argument is whatever the +** user wants (and specified in the grammar) and is available for +** use by the action routines. +** +** Inputs: +**
    +**
  • A pointer to the parser (an opaque structure.) +**
  • The major token number. +**
  • The minor token number. +**
  • An option argument of a grammar-specified type. +**
+** +** Outputs: +** None. +*/ +void Parse( + void *yyp, /* The parser */ + int yymajor, /* The major token code number */ + ParseTOKENTYPE yyminor /* The value for the token */ + ParseARG_PDECL /* Optional %extra_argument parameter */ +){ + YYMINORTYPE yyminorunion; + unsigned int yyact; /* The parser action. */ +#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) + int yyendofinput; /* True if we are at the end of input */ +#endif +#ifdef YYERRORSYMBOL + int yyerrorhit = 0; /* True if yymajor has invoked an error */ +#endif + yyParser *yypParser; /* The parser */ + + yypParser = (yyParser*)yyp; + assert( yypParser->yytos!=0 ); +#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) + yyendofinput = (yymajor==0); +#endif + ParseARG_STORE; + +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sInput '%s'\n",yyTracePrompt,yyTokenName[yymajor]); + } +#endif + + do{ + yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor); + if( yyact <= YY_MAX_SHIFTREDUCE ){ + yy_shift(yypParser,yyact,yymajor,yyminor); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt--; +#endif + yymajor = YYNOCODE; + }else if( yyact <= YY_MAX_REDUCE ){ + yy_reduce(yypParser,yyact-YY_MIN_REDUCE); + }else{ + assert( yyact == YY_ERROR_ACTION ); + yyminorunion.yy0 = yyminor; +#ifdef YYERRORSYMBOL + int yymx; +#endif +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt); + } +#endif +#ifdef YYERRORSYMBOL + /* A syntax error has occurred. + ** The response to an error depends upon whether or not the + ** grammar defines an error token "ERROR". + ** + ** This is what we do if the grammar does define ERROR: + ** + ** * Call the %syntax_error function. + ** + ** * Begin popping the stack until we enter a state where + ** it is legal to shift the error symbol, then shift + ** the error symbol. + ** + ** * Set the error count to three. + ** + ** * Begin accepting and shifting new tokens. No new error + ** processing will occur until three tokens have been + ** shifted successfully. + ** + */ + if( yypParser->yyerrcnt<0 ){ + yy_syntax_error(yypParser,yymajor,yyminor); + } + yymx = yypParser->yytos->major; + if( yymx==YYERRORSYMBOL || yyerrorhit ){ +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sDiscard input token %s\n", + yyTracePrompt,yyTokenName[yymajor]); + } +#endif + yy_destructor(yypParser, (YYCODETYPE)yymajor, &yyminorunion); + yymajor = YYNOCODE; + }else{ + while( yypParser->yytos >= yypParser->yystack + && yymx != YYERRORSYMBOL + && (yyact = yy_find_reduce_action( + yypParser->yytos->stateno, + YYERRORSYMBOL)) >= YY_MIN_REDUCE + ){ + yy_pop_parser_stack(yypParser); + } + if( yypParser->yytos < yypParser->yystack || yymajor==0 ){ + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + yy_parse_failed(yypParser); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + yymajor = YYNOCODE; + }else if( yymx!=YYERRORSYMBOL ){ + yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor); + } + } + yypParser->yyerrcnt = 3; + yyerrorhit = 1; +#elif defined(YYNOERRORRECOVERY) + /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to + ** do any kind of error recovery. Instead, simply invoke the syntax + ** error routine and continue going as if nothing had happened. + ** + ** Applications can set this macro (for example inside %include) if + ** they intend to abandon the parse upon the first syntax error seen. + */ + yy_syntax_error(yypParser,yymajor, yyminor); + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + yymajor = YYNOCODE; + +#else /* YYERRORSYMBOL is not defined */ + /* This is what we do if the grammar does not define ERROR: + ** + ** * Report an error message, and throw away the input token. + ** + ** * If the input token is $, then fail the parse. + ** + ** As before, subsequent error messages are suppressed until + ** three input tokens have been successfully shifted. + */ + if( yypParser->yyerrcnt<=0 ){ + yy_syntax_error(yypParser,yymajor, yyminor); + } + yypParser->yyerrcnt = 3; + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + if( yyendofinput ){ + yy_parse_failed(yypParser); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + } + yymajor = YYNOCODE; +#endif + } + }while( yymajor!=YYNOCODE && yypParser->yytos>yypParser->yystack ); +#ifndef NDEBUG + if( yyTraceFILE ){ + yyStackEntry *i; + char cDiv = '['; + fprintf(yyTraceFILE,"%sReturn. Stack=",yyTracePrompt); + for(i=&yypParser->yystack[1]; i<=yypParser->yytos; i++){ + fprintf(yyTraceFILE,"%c%s", cDiv, yyTokenName[i->major]); + cDiv = ' '; + } + fprintf(yyTraceFILE,"]\n"); + } +#endif + return; +} diff --git a/src/argtable3.c b/src/argtable3.c new file mode 100755 index 0000000..2bc0121 --- /dev/null +++ b/src/argtable3.c @@ -0,0 +1,4955 @@ +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include "argtable3.h" + +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 2013 Tom G. Huang + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#ifndef ARG_UTILS_H +#define ARG_UTILS_H + +#define ARG_ENABLE_TRACE 0 +#define ARG_ENABLE_LOG 1 + +#ifdef __cplusplus +extern "C" { +#endif + +enum +{ + EMINCOUNT = 1, + EMAXCOUNT, + EBADINT, + EOVERFLOW, + EBADDOUBLE, + EBADDATE, + EREGNOMATCH +}; + + +#if defined(_MSC_VER) +#define ARG_TRACE(x) \ + __pragma(warning(push)) \ + __pragma(warning(disable:4127)) \ + do { if (ARG_ENABLE_TRACE) dbg_printf x; } while (0) \ + __pragma(warning(pop)) + +#define ARG_LOG(x) \ + __pragma(warning(push)) \ + __pragma(warning(disable:4127)) \ + do { if (ARG_ENABLE_LOG) dbg_printf x; } while (0) \ + __pragma(warning(pop)) +#else +#define ARG_TRACE(x) \ + do { if (ARG_ENABLE_TRACE) dbg_printf x; } while (0) + +#define ARG_LOG(x) \ + do { if (ARG_ENABLE_LOG) dbg_printf x; } while (0) +#endif + +extern void dbg_printf(const char *fmt, ...); + +#ifdef __cplusplus +} +#endif + +#endif + +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include +#include + + +void dbg_printf(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); +} + +/* $Id: getopt.h,v 1.1 2009/10/16 19:50:28 rodney Exp rodney $ */ +/* $OpenBSD: getopt.h,v 1.1 2002/12/03 20:24:29 millert Exp $ */ +/* $NetBSD: getopt.h,v 1.4 2000/07/07 10:43:54 ad Exp $ */ + +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Dieter Baron and Thomas Klausner. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _GETOPT_H_ +#define _GETOPT_H_ + +#if 0 +#include +#endif + +/* + * GNU-like getopt_long() and 4.4BSD getsubopt()/optreset extensions + */ +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +struct option { + /* name of long option */ + const char *name; + /* + * one of no_argument, required_argument, and optional_argument: + * whether option takes an argument + */ + int has_arg; + /* if not NULL, set *flag to val when option found */ + int *flag; + /* if flag not NULL, value to set *flag to; else return value */ + int val; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +int getopt_long(int, char * const *, const char *, + const struct option *, int *); +int getopt_long_only(int, char * const *, const char *, + const struct option *, int *); +#ifndef _GETOPT_DEFINED +#define _GETOPT_DEFINED +int getopt(int, char * const *, const char *); +int getsubopt(char **, char * const *, char **); + +extern char *optarg; /* getopt(3) external variables */ +extern int opterr; +extern int optind; +extern int optopt; +extern int optreset; +extern char *suboptarg; /* getsubopt(3) external variable */ +#endif /* _GETOPT_DEFINED */ + +#ifdef __cplusplus +} +#endif +#endif /* !_GETOPT_H_ */ +/* $Id: getopt_long.c,v 1.1 2009/10/16 19:50:28 rodney Exp rodney $ */ +/* $OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $ */ +/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */ + +/* + * Copyright (c) 2002 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ + +#ifndef lint +static const char rcsid[]="$Id: getopt_long.c,v 1.1 2009/10/16 19:50:28 rodney Exp rodney $"; +#endif /* lint */ +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Dieter Baron and Thomas Klausner. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#if 0 +#include +#endif +#include +#include +#include + + +#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */ + +#ifdef REPLACE_GETOPT +int opterr = 1; /* if error message should be printed */ +int optind = 1; /* index into parent argv vector */ +int optopt = '?'; /* character checked for validity */ +int optreset; /* reset getopt */ +char *optarg; /* argument associated with option */ +#endif + +#define PRINT_ERROR ((opterr) && (*options != ':')) + +#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */ +#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */ +#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */ + +/* return values */ +#define BADCH (int)'?' +#define BADARG ((*options == ':') ? (int)':' : (int)'?') +#define INORDER (int)1 + +#define EMSG "" + +static int getopt_internal(int, char * const *, const char *, + const struct option *, int *, int); +static int parse_long_options(char * const *, const char *, + const struct option *, int *, int); +static int gcd(int, int); +static void permute_args(int, int, int, char * const *); + +static char *place = EMSG; /* option letter processing */ + +/* XXX: set optreset to 1 rather than these two */ +static int nonopt_start = -1; /* first non option argument (for permute) */ +static int nonopt_end = -1; /* first option after non options (for permute) */ + +/* Error messages */ +static const char recargchar[] = "option requires an argument -- %c"; +static const char recargstring[] = "option requires an argument -- %s"; +static const char ambig[] = "ambiguous option -- %.*s"; +static const char noarg[] = "option doesn't take an argument -- %.*s"; +static const char illoptchar[] = "unknown option -- %c"; +static const char illoptstring[] = "unknown option -- %s"; + + + +#ifdef _WIN32 + +/* Windows needs warnx(). We change the definition though: + * 1. (another) global is defined, opterrmsg, which holds the error message + * 2. errors are always printed out on stderr w/o the program name + * Note that opterrmsg always gets set no matter what opterr is set to. The + * error message will not be printed if opterr is 0 as usual. + */ + +#include +#include + +extern char opterrmsg[128]; +char opterrmsg[128]; /* buffer for the last error message */ + +static void warnx(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + /* + Make sure opterrmsg is always zero-terminated despite the _vsnprintf() + implementation specifics and manually suppress the warning. + */ + memset(opterrmsg, 0, sizeof opterrmsg); + if (fmt != NULL) + _vsnprintf(opterrmsg, sizeof(opterrmsg) - 1, fmt, ap); + va_end(ap); + +#pragma warning(suppress: 6053) + fprintf(stderr, "%s\n", opterrmsg); +} + +#else +#include +#endif /*_WIN32*/ + + +/* + * Compute the greatest common divisor of a and b. + */ +static int +gcd(int a, int b) +{ + int c; + + c = a % b; + while (c != 0) { + a = b; + b = c; + c = a % b; + } + + return (b); +} + +/* + * Exchange the block from nonopt_start to nonopt_end with the block + * from nonopt_end to opt_end (keeping the same order of arguments + * in each block). + */ +static void +permute_args(int panonopt_start, int panonopt_end, int opt_end, + char * const *nargv) +{ + int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos; + char *swap; + + /* + * compute lengths of blocks and number and size of cycles + */ + nnonopts = panonopt_end - panonopt_start; + nopts = opt_end - panonopt_end; + ncycle = gcd(nnonopts, nopts); + cyclelen = (opt_end - panonopt_start) / ncycle; + + for (i = 0; i < ncycle; i++) { + cstart = panonopt_end+i; + pos = cstart; + for (j = 0; j < cyclelen; j++) { + if (pos >= panonopt_end) + pos -= nnonopts; + else + pos += nopts; + swap = nargv[pos]; + /* LINTED const cast */ + ((char **) nargv)[pos] = nargv[cstart]; + /* LINTED const cast */ + ((char **)nargv)[cstart] = swap; + } + } +} + +/* + * parse_long_options -- + * Parse long options in argc/argv argument vector. + * Returns -1 if short_too is set and the option does not match long_options. + */ +static int +parse_long_options(char * const *nargv, const char *options, + const struct option *long_options, int *idx, int short_too) +{ + char *current_argv, *has_equal; + size_t current_argv_len; + int i, match; + + current_argv = place; + match = -1; + + optind++; + + if ((has_equal = strchr(current_argv, '=')) != NULL) { + /* argument found (--option=arg) */ + current_argv_len = has_equal - current_argv; + has_equal++; + } else + current_argv_len = strlen(current_argv); + + for (i = 0; long_options[i].name; i++) { + /* find matching long option */ + if (strncmp(current_argv, long_options[i].name, + current_argv_len)) + continue; + + if (strlen(long_options[i].name) == current_argv_len) { + /* exact match */ + match = i; + break; + } + /* + * If this is a known short option, don't allow + * a partial match of a single character. + */ + if (short_too && current_argv_len == 1) + continue; + + if (match == -1) /* partial match */ + match = i; + else { + /* ambiguous abbreviation */ + if (PRINT_ERROR) + warnx(ambig, (int)current_argv_len, + current_argv); + optopt = 0; + return (BADCH); + } + } + if (match != -1) { /* option found */ + if (long_options[match].has_arg == no_argument + && has_equal) { + if (PRINT_ERROR) + warnx(noarg, (int)current_argv_len, + current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + return (BADARG); + } + if (long_options[match].has_arg == required_argument || + long_options[match].has_arg == optional_argument) { + if (has_equal) + optarg = has_equal; + else if (long_options[match].has_arg == + required_argument) { + /* + * optional argument doesn't use next nargv + */ + optarg = nargv[optind++]; + } + } + if ((long_options[match].has_arg == required_argument) + && (optarg == NULL)) { + /* + * Missing argument; leading ':' indicates no error + * should be generated. + */ + if (PRINT_ERROR) + warnx(recargstring, + current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + --optind; + return (BADARG); + } + } else { /* unknown option */ + if (short_too) { + --optind; + return (-1); + } + if (PRINT_ERROR) + warnx(illoptstring, current_argv); + optopt = 0; + return (BADCH); + } + if (idx) + *idx = match; + if (long_options[match].flag) { + *long_options[match].flag = long_options[match].val; + return (0); + } else + return (long_options[match].val); +} + +/* + * getopt_internal -- + * Parse argc/argv argument vector. Called by user level routines. + */ +static int +getopt_internal(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx, int flags) +{ + char *oli; /* option letter list index */ + int optchar, short_too; + static int posixly_correct = -1; + + if (options == NULL) + return (-1); + + /* + * Disable GNU extensions if POSIXLY_CORRECT is set or options + * string begins with a '+'. + */ + if (posixly_correct == -1) + posixly_correct = (getenv("POSIXLY_CORRECT") != NULL); + if (posixly_correct || *options == '+') + flags &= ~FLAG_PERMUTE; + else if (*options == '-') + flags |= FLAG_ALLARGS; + if (*options == '+' || *options == '-') + options++; + + /* + * XXX Some GNU programs (like cvs) set optind to 0 instead of + * XXX using optreset. Work around this braindamage. + */ + if (optind == 0) + optind = optreset = 1; + + optarg = NULL; + if (optreset) + nonopt_start = nonopt_end = -1; +start: + if (optreset || !*place) { /* update scanning pointer */ + optreset = 0; + if (optind >= nargc) { /* end of argument vector */ + place = EMSG; + if (nonopt_end != -1) { + /* do permutation, if we have to */ + permute_args(nonopt_start, nonopt_end, + optind, nargv); + optind -= nonopt_end - nonopt_start; + } + else if (nonopt_start != -1) { + /* + * If we skipped non-options, set optind + * to the first of them. + */ + optind = nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + if (*(place = nargv[optind]) != '-' || + (place[1] == '\0' && strchr(options, '-') == NULL)) { + place = EMSG; /* found non-option */ + if (flags & FLAG_ALLARGS) { + /* + * GNU extension: + * return non-option as argument to option 1 + */ + optarg = nargv[optind++]; + return (INORDER); + } + if (!(flags & FLAG_PERMUTE)) { + /* + * If no permutation wanted, stop parsing + * at first non-option. + */ + return (-1); + } + /* do permutation */ + if (nonopt_start == -1) + nonopt_start = optind; + else if (nonopt_end != -1) { + permute_args(nonopt_start, nonopt_end, + optind, nargv); + nonopt_start = optind - + (nonopt_end - nonopt_start); + nonopt_end = -1; + } + optind++; + /* process next argument */ + goto start; + } + if (nonopt_start != -1 && nonopt_end == -1) + nonopt_end = optind; + + /* + * If we have "-" do nothing, if "--" we are done. + */ + if (place[1] != '\0' && *++place == '-' && place[1] == '\0') { + optind++; + place = EMSG; + /* + * We found an option (--), so if we skipped + * non-options, we have to permute. + */ + if (nonopt_end != -1) { + permute_args(nonopt_start, nonopt_end, + optind, nargv); + optind -= nonopt_end - nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + } + + /* + * Check long options if: + * 1) we were passed some + * 2) the arg is not just "-" + * 3) either the arg starts with -- we are getopt_long_only() + */ + if (long_options != NULL && place != nargv[optind] && + (*place == '-' || (flags & FLAG_LONGONLY))) { + short_too = 0; + if (*place == '-') + place++; /* --foo long option */ + else if (*place != ':' && strchr(options, *place) != NULL) + short_too = 1; /* could be short option too */ + + optchar = parse_long_options(nargv, options, long_options, + idx, short_too); + if (optchar != -1) { + place = EMSG; + return (optchar); + } + } + + if ((optchar = (int)*place++) == (int)':' || + (optchar == (int)'-' && *place != '\0') || + (oli = strchr(options, optchar)) == NULL) { + /* + * If the user specified "-" and '-' isn't listed in + * options, return -1 (non-option) as per POSIX. + * Otherwise, it is an unknown option character (or ':'). + */ + if (optchar == (int)'-' && *place == '\0') + return (-1); + if (!*place) + ++optind; + if (PRINT_ERROR) + warnx(illoptchar, optchar); + optopt = optchar; + return (BADCH); + } + if (long_options != NULL && optchar == 'W' && oli[1] == ';') { + /* -W long-option */ + if (*place) /* no space */ + /* NOTHING */; + else if (++optind >= nargc) { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } else /* white space */ + place = nargv[optind]; + optchar = parse_long_options(nargv, options, long_options, + idx, 0); + place = EMSG; + return (optchar); + } + if (*++oli != ':') { /* doesn't take argument */ + if (!*place) + ++optind; + } else { /* takes (optional) argument */ + optarg = NULL; + if (*place) /* no white space */ + optarg = place; + else if (oli[1] != ':') { /* arg not optional */ + if (++optind >= nargc) { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } else + optarg = nargv[optind]; + } + place = EMSG; + ++optind; + } + /* dump back option letter */ + return (optchar); +} + +#ifdef REPLACE_GETOPT +/* + * getopt -- + * Parse argc/argv argument vector. + * + * [eventually this will replace the BSD getopt] + */ +int +getopt(int nargc, char * const *nargv, const char *options) +{ + + /* + * We don't pass FLAG_PERMUTE to getopt_internal() since + * the BSD getopt(3) (unlike GNU) has never done this. + * + * Furthermore, since many privileged programs call getopt() + * before dropping privileges it makes sense to keep things + * as simple (and bug-free) as possible. + */ + return (getopt_internal(nargc, nargv, options, NULL, NULL, 0)); +} +#endif /* REPLACE_GETOPT */ + +/* + * getopt_long -- + * Parse argc/argv argument vector. + */ +int +getopt_long(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, + FLAG_PERMUTE)); +} + +/* + * getopt_long_only -- + * Parse argc/argv argument vector. + */ +int +getopt_long_only(int nargc, char * const *nargv, const char *options, + const struct option *long_options, int *idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, + FLAG_PERMUTE|FLAG_LONGONLY)); +} +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include +#include + +#include "argtable3.h" + + +char * arg_strptime(const char *buf, const char *fmt, struct tm *tm); + + +static void arg_date_resetfn(struct arg_date *parent) +{ + ARG_TRACE(("%s:resetfn(%p)\n", __FILE__, parent)); + parent->count = 0; +} + + +static int arg_date_scanfn(struct arg_date *parent, const char *argval) +{ + int errorcode = 0; + + if (parent->count == parent->hdr.maxcount) + { + errorcode = EMAXCOUNT; + } + else if (!argval) + { + /* no argument value was given, leave parent->tmval[] unaltered but still count it */ + parent->count++; + } + else + { + const char *pend; + struct tm tm = parent->tmval[parent->count]; + + /* parse the given argument value, store result in parent->tmval[] */ + pend = arg_strptime(argval, parent->format, &tm); + if (pend && pend[0] == '\0') + parent->tmval[parent->count++] = tm; + else + errorcode = EBADDATE; + } + + ARG_TRACE(("%s:scanfn(%p) returns %d\n", __FILE__, parent, errorcode)); + return errorcode; +} + + +static int arg_date_checkfn(struct arg_date *parent) +{ + int errorcode = (parent->count < parent->hdr.mincount) ? EMINCOUNT : 0; + + ARG_TRACE(("%s:checkfn(%p) returns %d\n", __FILE__, parent, errorcode)); + return errorcode; +} + + +static void arg_date_errorfn( + struct arg_date *parent, + FILE *fp, + int errorcode, + const char *argval, + const char *progname) +{ + const char *shortopts = parent->hdr.shortopts; + const char *longopts = parent->hdr.longopts; + const char *datatype = parent->hdr.datatype; + + /* make argval NULL safe */ + argval = argval ? argval : ""; + + fprintf(fp, "%s: ", progname); + switch(errorcode) + { + case EMINCOUNT: + fputs("missing option ", fp); + arg_print_option(fp, shortopts, longopts, datatype, "\n"); + break; + + case EMAXCOUNT: + fputs("excess option ", fp); + arg_print_option(fp, shortopts, longopts, argval, "\n"); + break; + + case EBADDATE: + { + struct tm tm; + char buff[200]; + + fprintf(fp, "illegal timestamp format \"%s\"\n", argval); + memset(&tm, 0, sizeof(tm)); + arg_strptime("1999-12-31 23:59:59", "%F %H:%M:%S", &tm); + strftime(buff, sizeof(buff), parent->format, &tm); + printf("correct format is \"%s\"\n", buff); + break; + } + } +} + + +struct arg_date * arg_date0( + const char * shortopts, + const char * longopts, + const char * format, + const char *datatype, + const char *glossary) +{ + return arg_daten(shortopts, longopts, format, datatype, 0, 1, glossary); +} + + +struct arg_date * arg_date1( + const char * shortopts, + const char * longopts, + const char * format, + const char *datatype, + const char *glossary) +{ + return arg_daten(shortopts, longopts, format, datatype, 1, 1, glossary); +} + + +struct arg_date * arg_daten( + const char * shortopts, + const char * longopts, + const char * format, + const char *datatype, + int mincount, + int maxcount, + const char *glossary) +{ + size_t nbytes; + struct arg_date *result; + + /* foolproof things by ensuring maxcount is not less than mincount */ + maxcount = (maxcount < mincount) ? mincount : maxcount; + + /* default time format is the national date format for the locale */ + if (!format) + format = "%x"; + + nbytes = sizeof(struct arg_date) /* storage for struct arg_date */ + + maxcount * sizeof(struct tm); /* storage for tmval[maxcount] array */ + + /* allocate storage for the arg_date struct + tmval[] array. */ + /* we use calloc because we want the tmval[] array zero filled. */ + result = (struct arg_date *)calloc(1, nbytes); + if (result) + { + /* init the arg_hdr struct */ + result->hdr.flag = ARG_HASVALUE; + result->hdr.shortopts = shortopts; + result->hdr.longopts = longopts; + result->hdr.datatype = datatype ? datatype : format; + result->hdr.glossary = glossary; + result->hdr.mincount = mincount; + result->hdr.maxcount = maxcount; + result->hdr.parent = result; + result->hdr.resetfn = (arg_resetfn *)arg_date_resetfn; + result->hdr.scanfn = (arg_scanfn *)arg_date_scanfn; + result->hdr.checkfn = (arg_checkfn *)arg_date_checkfn; + result->hdr.errorfn = (arg_errorfn *)arg_date_errorfn; + + /* store the tmval[maxcount] array immediately after the arg_date struct */ + result->tmval = (struct tm *)(result + 1); + + /* init the remaining arg_date member variables */ + result->count = 0; + result->format = format; + } + + ARG_TRACE(("arg_daten() returns %p\n", result)); + return result; +} + + +/*- + * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code was contributed to The NetBSD Foundation by Klaus Klein. + * Heavily optimised by David Laight + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +/* + * We do not implement alternate representations. However, we always + * check whether a given modifier is allowed for a certain conversion. + */ +#define ALT_E 0x01 +#define ALT_O 0x02 +#define LEGAL_ALT(x) { if (alt_format & ~(x)) return (0); } +#define TM_YEAR_BASE (1900) + +static int conv_num(const char * *, int *, int, int); + +static const char *day[7] = { + "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", + "Friday", "Saturday" +}; + +static const char *abday[7] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" +}; + +static const char *mon[12] = { + "January", "February", "March", "April", "May", "June", "July", + "August", "September", "October", "November", "December" +}; + +static const char *abmon[12] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; + +static const char *am_pm[2] = { + "AM", "PM" +}; + + +static int arg_strcasecmp(const char *s1, const char *s2) +{ + const unsigned char *us1 = (const unsigned char *)s1; + const unsigned char *us2 = (const unsigned char *)s2; + while (tolower(*us1) == tolower(*us2++)) + if (*us1++ == '\0') + return 0; + + return tolower(*us1) - tolower(*--us2); +} + + +static int arg_strncasecmp(const char *s1, const char *s2, size_t n) +{ + if (n != 0) + { + const unsigned char *us1 = (const unsigned char *)s1; + const unsigned char *us2 = (const unsigned char *)s2; + do + { + if (tolower(*us1) != tolower(*us2++)) + return tolower(*us1) - tolower(*--us2); + + if (*us1++ == '\0') + break; + } while (--n != 0); + } + + return 0; +} + + +char * arg_strptime(const char *buf, const char *fmt, struct tm *tm) +{ + char c; + const char *bp; + size_t len = 0; + int alt_format, i, split_year = 0; + + bp = buf; + + while ((c = *fmt) != '\0') { + /* Clear `alternate' modifier prior to new conversion. */ + alt_format = 0; + + /* Eat up white-space. */ + if (isspace(c)) { + while (isspace(*bp)) + bp++; + + fmt++; + continue; + } + + if ((c = *fmt++) != '%') + goto literal; + + +again: + switch (c = *fmt++) + { + case '%': /* "%%" is converted to "%". */ +literal: + if (c != *bp++) + return (0); + break; + + /* + * "Alternative" modifiers. Just set the appropriate flag + * and start over again. + */ + case 'E': /* "%E?" alternative conversion modifier. */ + LEGAL_ALT(0); + alt_format |= ALT_E; + goto again; + + case 'O': /* "%O?" alternative conversion modifier. */ + LEGAL_ALT(0); + alt_format |= ALT_O; + goto again; + + /* + * "Complex" conversion rules, implemented through recursion. + */ + case 'c': /* Date and time, using the locale's format. */ + LEGAL_ALT(ALT_E); + bp = arg_strptime(bp, "%x %X", tm); + if (!bp) + return (0); + break; + + case 'D': /* The date as "%m/%d/%y". */ + LEGAL_ALT(0); + bp = arg_strptime(bp, "%m/%d/%y", tm); + if (!bp) + return (0); + break; + + case 'R': /* The time as "%H:%M". */ + LEGAL_ALT(0); + bp = arg_strptime(bp, "%H:%M", tm); + if (!bp) + return (0); + break; + + case 'r': /* The time in 12-hour clock representation. */ + LEGAL_ALT(0); + bp = arg_strptime(bp, "%I:%M:%S %p", tm); + if (!bp) + return (0); + break; + + case 'T': /* The time as "%H:%M:%S". */ + LEGAL_ALT(0); + bp = arg_strptime(bp, "%H:%M:%S", tm); + if (!bp) + return (0); + break; + + case 'X': /* The time, using the locale's format. */ + LEGAL_ALT(ALT_E); + bp = arg_strptime(bp, "%H:%M:%S", tm); + if (!bp) + return (0); + break; + + case 'x': /* The date, using the locale's format. */ + LEGAL_ALT(ALT_E); + bp = arg_strptime(bp, "%m/%d/%y", tm); + if (!bp) + return (0); + break; + + /* + * "Elementary" conversion rules. + */ + case 'A': /* The day of week, using the locale's form. */ + case 'a': + LEGAL_ALT(0); + for (i = 0; i < 7; i++) { + /* Full name. */ + len = strlen(day[i]); + if (arg_strncasecmp(day[i], bp, len) == 0) + break; + + /* Abbreviated name. */ + len = strlen(abday[i]); + if (arg_strncasecmp(abday[i], bp, len) == 0) + break; + } + + /* Nothing matched. */ + if (i == 7) + return (0); + + tm->tm_wday = i; + bp += len; + break; + + case 'B': /* The month, using the locale's form. */ + case 'b': + case 'h': + LEGAL_ALT(0); + for (i = 0; i < 12; i++) { + /* Full name. */ + len = strlen(mon[i]); + if (arg_strncasecmp(mon[i], bp, len) == 0) + break; + + /* Abbreviated name. */ + len = strlen(abmon[i]); + if (arg_strncasecmp(abmon[i], bp, len) == 0) + break; + } + + /* Nothing matched. */ + if (i == 12) + return (0); + + tm->tm_mon = i; + bp += len; + break; + + case 'C': /* The century number. */ + LEGAL_ALT(ALT_E); + if (!(conv_num(&bp, &i, 0, 99))) + return (0); + + if (split_year) { + tm->tm_year = (tm->tm_year % 100) + (i * 100); + } else { + tm->tm_year = i * 100; + split_year = 1; + } + break; + + case 'd': /* The day of month. */ + case 'e': + LEGAL_ALT(ALT_O); + if (!(conv_num(&bp, &tm->tm_mday, 1, 31))) + return (0); + break; + + case 'k': /* The hour (24-hour clock representation). */ + LEGAL_ALT(0); + /* FALLTHROUGH */ + case 'H': + LEGAL_ALT(ALT_O); + if (!(conv_num(&bp, &tm->tm_hour, 0, 23))) + return (0); + break; + + case 'l': /* The hour (12-hour clock representation). */ + LEGAL_ALT(0); + /* FALLTHROUGH */ + case 'I': + LEGAL_ALT(ALT_O); + if (!(conv_num(&bp, &tm->tm_hour, 1, 12))) + return (0); + if (tm->tm_hour == 12) + tm->tm_hour = 0; + break; + + case 'j': /* The day of year. */ + LEGAL_ALT(0); + if (!(conv_num(&bp, &i, 1, 366))) + return (0); + tm->tm_yday = i - 1; + break; + + case 'M': /* The minute. */ + LEGAL_ALT(ALT_O); + if (!(conv_num(&bp, &tm->tm_min, 0, 59))) + return (0); + break; + + case 'm': /* The month. */ + LEGAL_ALT(ALT_O); + if (!(conv_num(&bp, &i, 1, 12))) + return (0); + tm->tm_mon = i - 1; + break; + + case 'p': /* The locale's equivalent of AM/PM. */ + LEGAL_ALT(0); + /* AM? */ + if (arg_strcasecmp(am_pm[0], bp) == 0) { + if (tm->tm_hour > 11) + return (0); + + bp += strlen(am_pm[0]); + break; + } + /* PM? */ + else if (arg_strcasecmp(am_pm[1], bp) == 0) { + if (tm->tm_hour > 11) + return (0); + + tm->tm_hour += 12; + bp += strlen(am_pm[1]); + break; + } + + /* Nothing matched. */ + return (0); + + case 'S': /* The seconds. */ + LEGAL_ALT(ALT_O); + if (!(conv_num(&bp, &tm->tm_sec, 0, 61))) + return (0); + break; + + case 'U': /* The week of year, beginning on sunday. */ + case 'W': /* The week of year, beginning on monday. */ + LEGAL_ALT(ALT_O); + /* + * XXX This is bogus, as we can not assume any valid + * information present in the tm structure at this + * point to calculate a real value, so just check the + * range for now. + */ + if (!(conv_num(&bp, &i, 0, 53))) + return (0); + break; + + case 'w': /* The day of week, beginning on sunday. */ + LEGAL_ALT(ALT_O); + if (!(conv_num(&bp, &tm->tm_wday, 0, 6))) + return (0); + break; + + case 'Y': /* The year. */ + LEGAL_ALT(ALT_E); + if (!(conv_num(&bp, &i, 0, 9999))) + return (0); + + tm->tm_year = i - TM_YEAR_BASE; + break; + + case 'y': /* The year within 100 years of the epoch. */ + LEGAL_ALT(ALT_E | ALT_O); + if (!(conv_num(&bp, &i, 0, 99))) + return (0); + + if (split_year) { + tm->tm_year = ((tm->tm_year / 100) * 100) + i; + break; + } + split_year = 1; + if (i <= 68) + tm->tm_year = i + 2000 - TM_YEAR_BASE; + else + tm->tm_year = i + 1900 - TM_YEAR_BASE; + break; + + /* + * Miscellaneous conversions. + */ + case 'n': /* Any kind of white-space. */ + case 't': + LEGAL_ALT(0); + while (isspace(*bp)) + bp++; + break; + + + default: /* Unknown/unsupported conversion. */ + return (0); + } + + + } + + /* LINTED functional specification */ + return ((char *)bp); +} + + +static int conv_num(const char * *buf, int *dest, int llim, int ulim) +{ + int result = 0; + + /* The limit also determines the number of valid digits. */ + int rulim = ulim; + + if (**buf < '0' || **buf > '9') + return (0); + + do { + result *= 10; + result += *(*buf)++ - '0'; + rulim /= 10; + } while ((result * 10 <= ulim) && rulim && **buf >= '0' && **buf <= '9'); + + if (result < llim || result > ulim) + return (0); + + *dest = result; + return (1); +} +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include + +#include "argtable3.h" + + +static void arg_dbl_resetfn(struct arg_dbl *parent) +{ + ARG_TRACE(("%s:resetfn(%p)\n", __FILE__, parent)); + parent->count = 0; +} + + +static int arg_dbl_scanfn(struct arg_dbl *parent, const char *argval) +{ + int errorcode = 0; + + if (parent->count == parent->hdr.maxcount) + { + /* maximum number of arguments exceeded */ + errorcode = EMAXCOUNT; + } + else if (!argval) + { + /* a valid argument with no argument value was given. */ + /* This happens when an optional argument value was invoked. */ + /* leave parent argument value unaltered but still count the argument. */ + parent->count++; + } + else + { + double val; + char *end; + + /* extract double from argval into val */ + val = strtod(argval, &end); + + /* if success then store result in parent->dval[] array otherwise return error*/ + if (*end == 0) + parent->dval[parent->count++] = val; + else + errorcode = EBADDOUBLE; + } + + ARG_TRACE(("%s:scanfn(%p) returns %d\n", __FILE__, parent, errorcode)); + return errorcode; +} + + +static int arg_dbl_checkfn(struct arg_dbl *parent) +{ + int errorcode = (parent->count < parent->hdr.mincount) ? EMINCOUNT : 0; + + ARG_TRACE(("%s:checkfn(%p) returns %d\n", __FILE__, parent, errorcode)); + return errorcode; +} + + +static void arg_dbl_errorfn( + struct arg_dbl *parent, + FILE *fp, + int errorcode, + const char *argval, + const char *progname) +{ + const char *shortopts = parent->hdr.shortopts; + const char *longopts = parent->hdr.longopts; + const char *datatype = parent->hdr.datatype; + + /* make argval NULL safe */ + argval = argval ? argval : ""; + + fprintf(fp, "%s: ", progname); + switch(errorcode) + { + case EMINCOUNT: + fputs("missing option ", fp); + arg_print_option(fp, shortopts, longopts, datatype, "\n"); + break; + + case EMAXCOUNT: + fputs("excess option ", fp); + arg_print_option(fp, shortopts, longopts, argval, "\n"); + break; + + case EBADDOUBLE: + fprintf(fp, "invalid argument \"%s\" to option ", argval); + arg_print_option(fp, shortopts, longopts, datatype, "\n"); + break; + } +} + + +struct arg_dbl * arg_dbl0( + const char * shortopts, + const char * longopts, + const char *datatype, + const char *glossary) +{ + return arg_dbln(shortopts, longopts, datatype, 0, 1, glossary); +} + + +struct arg_dbl * arg_dbl1( + const char * shortopts, + const char * longopts, + const char *datatype, + const char *glossary) +{ + return arg_dbln(shortopts, longopts, datatype, 1, 1, glossary); +} + + +struct arg_dbl * arg_dbln( + const char * shortopts, + const char * longopts, + const char *datatype, + int mincount, + int maxcount, + const char *glossary) +{ + size_t nbytes; + struct arg_dbl *result; + + /* foolproof things by ensuring maxcount is not less than mincount */ + maxcount = (maxcount < mincount) ? mincount : maxcount; + + nbytes = sizeof(struct arg_dbl) /* storage for struct arg_dbl */ + + (maxcount + 1) * sizeof(double); /* storage for dval[maxcount] array plus one extra for padding to memory boundary */ + + result = (struct arg_dbl *)malloc(nbytes); + if (result) + { + size_t addr; + size_t rem; + + /* init the arg_hdr struct */ + result->hdr.flag = ARG_HASVALUE; + result->hdr.shortopts = shortopts; + result->hdr.longopts = longopts; + result->hdr.datatype = datatype ? datatype : ""; + result->hdr.glossary = glossary; + result->hdr.mincount = mincount; + result->hdr.maxcount = maxcount; + result->hdr.parent = result; + result->hdr.resetfn = (arg_resetfn *)arg_dbl_resetfn; + result->hdr.scanfn = (arg_scanfn *)arg_dbl_scanfn; + result->hdr.checkfn = (arg_checkfn *)arg_dbl_checkfn; + result->hdr.errorfn = (arg_errorfn *)arg_dbl_errorfn; + + /* Store the dval[maxcount] array on the first double boundary that + * immediately follows the arg_dbl struct. We do the memory alignment + * purely for SPARC and Motorola systems. They require floats and + * doubles to be aligned on natural boundaries. + */ + addr = (size_t)(result + 1); + rem = addr % sizeof(double); + result->dval = (double *)(addr + sizeof(double) - rem); + ARG_TRACE(("addr=%p, dval=%p, sizeof(double)=%d rem=%d\n", addr, result->dval, (int)sizeof(double), (int)rem)); + + result->count = 0; + } + + ARG_TRACE(("arg_dbln() returns %p\n", result)); + return result; +} +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include + +#include "argtable3.h" + + +static void arg_end_resetfn(struct arg_end *parent) +{ + ARG_TRACE(("%s:resetfn(%p)\n", __FILE__, parent)); + parent->count = 0; +} + +static void arg_end_errorfn( + void *parent, + FILE *fp, + int error, + const char *argval, + const char *progname) +{ + /* suppress unreferenced formal parameter warning */ + (void)parent; + + progname = progname ? progname : ""; + argval = argval ? argval : ""; + + fprintf(fp, "%s: ", progname); + switch(error) + { + case ARG_ELIMIT: + fputs("too many errors to display", fp); + break; + case ARG_EMALLOC: + fputs("insufficent memory", fp); + break; + case ARG_ENOMATCH: + fprintf(fp, "unexpected argument \"%s\"", argval); + break; + case ARG_EMISSARG: + fprintf(fp, "option \"%s\" requires an argument", argval); + break; + case ARG_ELONGOPT: + fprintf(fp, "invalid option \"%s\"", argval); + break; + default: + fprintf(fp, "invalid option \"-%c\"", error); + break; + } + + fputc('\n', fp); +} + + +struct arg_end * arg_end(int maxcount) +{ + size_t nbytes; + struct arg_end *result; + + nbytes = sizeof(struct arg_end) + + maxcount * sizeof(int) /* storage for int error[maxcount] array*/ + + maxcount * sizeof(void *) /* storage for void* parent[maxcount] array */ + + maxcount * sizeof(char *); /* storage for char* argval[maxcount] array */ + + result = (struct arg_end *)malloc(nbytes); + if (result) + { + /* init the arg_hdr struct */ + result->hdr.flag = ARG_TERMINATOR; + result->hdr.shortopts = NULL; + result->hdr.longopts = NULL; + result->hdr.datatype = NULL; + result->hdr.glossary = NULL; + result->hdr.mincount = 1; + result->hdr.maxcount = maxcount; + result->hdr.parent = result; + result->hdr.resetfn = (arg_resetfn *)arg_end_resetfn; + result->hdr.scanfn = NULL; + result->hdr.checkfn = NULL; + result->hdr.errorfn = (arg_errorfn *)arg_end_errorfn; + + /* store error[maxcount] array immediately after struct arg_end */ + result->error = (int *)(result + 1); + + /* store parent[maxcount] array immediately after error[] array */ + result->parent = (void * *)(result->error + maxcount ); + + /* store argval[maxcount] array immediately after parent[] array */ + result->argval = (const char * *)(result->parent + maxcount ); + } + + ARG_TRACE(("arg_end(%d) returns %p\n", maxcount, result)); + return result; +} + + +void arg_print_errors(FILE * fp, struct arg_end * end, const char * progname) +{ + int i; + ARG_TRACE(("arg_errors()\n")); + for (i = 0; i < end->count; i++) + { + struct arg_hdr *errorparent = (struct arg_hdr *)(end->parent[i]); + if (errorparent->errorfn) + errorparent->errorfn(end->parent[i], + fp, + end->error[i], + end->argval[i], + progname); + } +} +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include +#include + +#include "argtable3.h" + +#ifdef WIN32 +# define FILESEPARATOR1 '\\' +# define FILESEPARATOR2 '/' +#else +# define FILESEPARATOR1 '/' +# define FILESEPARATOR2 '/' +#endif + + +static void arg_file_resetfn(struct arg_file *parent) +{ + ARG_TRACE(("%s:resetfn(%p)\n", __FILE__, parent)); + parent->count = 0; +} + + +/* Returns ptr to the base filename within *filename */ +static const char * arg_basename(const char *filename) +{ + const char *result = NULL, *result1, *result2; + + /* Find the last occurrence of eother file separator character. */ + /* Two alternative file separator chars are supported as legal */ + /* file separators but not both together in the same filename. */ + result1 = (filename ? strrchr(filename, FILESEPARATOR1) : NULL); + result2 = (filename ? strrchr(filename, FILESEPARATOR2) : NULL); + + if (result2) + result = result2 + 1; /* using FILESEPARATOR2 (the alternative file separator) */ + + if (result1) + result = result1 + 1; /* using FILESEPARATOR1 (the preferred file separator) */ + + if (!result) + result = filename; /* neither file separator was found so basename is the whole filename */ + + /* special cases of "." and ".." are not considered basenames */ + if (result && ( strcmp(".", result) == 0 || strcmp("..", result) == 0 )) + result = filename + strlen(filename); + + return result; +} + + +/* Returns ptr to the file extension within *basename */ +static const char * arg_extension(const char *basename) +{ + /* find the last occurrence of '.' in basename */ + const char *result = (basename ? strrchr(basename, '.') : NULL); + + /* if no '.' was found then return pointer to end of basename */ + if (basename && !result) + result = basename + strlen(basename); + + /* special case: basenames with a single leading dot (eg ".foo") are not considered as true extensions */ + if (basename && result == basename) + result = basename + strlen(basename); + + /* special case: empty extensions (eg "foo.","foo..") are not considered as true extensions */ + if (basename && result && result[1] == '\0') + result = basename + strlen(basename); + + return result; +} + + +static int arg_file_scanfn(struct arg_file *parent, const char *argval) +{ + int errorcode = 0; + + if (parent->count == parent->hdr.maxcount) + { + /* maximum number of arguments exceeded */ + errorcode = EMAXCOUNT; + } + else if (!argval) + { + /* a valid argument with no argument value was given. */ + /* This happens when an optional argument value was invoked. */ + /* leave parent arguiment value unaltered but still count the argument. */ + parent->count++; + } + else + { + parent->filename[parent->count] = argval; + parent->basename[parent->count] = arg_basename(argval); + parent->extension[parent->count] = + arg_extension(parent->basename[parent->count]); /* only seek extensions within the basename (not the file path)*/ + parent->count++; + } + + ARG_TRACE(("%s4:scanfn(%p) returns %d\n", __FILE__, parent, errorcode)); + return errorcode; +} + + +static int arg_file_checkfn(struct arg_file *parent) +{ + int errorcode = (parent->count < parent->hdr.mincount) ? EMINCOUNT : 0; + + ARG_TRACE(("%s:checkfn(%p) returns %d\n", __FILE__, parent, errorcode)); + return errorcode; +} + + +static void arg_file_errorfn( + struct arg_file *parent, + FILE *fp, + int errorcode, + const char *argval, + const char *progname) +{ + const char *shortopts = parent->hdr.shortopts; + const char *longopts = parent->hdr.longopts; + const char *datatype = parent->hdr.datatype; + + /* make argval NULL safe */ + argval = argval ? argval : ""; + + fprintf(fp, "%s: ", progname); + switch(errorcode) + { + case EMINCOUNT: + fputs("missing option ", fp); + arg_print_option(fp, shortopts, longopts, datatype, "\n"); + break; + + case EMAXCOUNT: + fputs("excess option ", fp); + arg_print_option(fp, shortopts, longopts, argval, "\n"); + break; + + default: + fprintf(fp, "unknown error at \"%s\"\n", argval); + } +} + + +struct arg_file * arg_file0( + const char * shortopts, + const char * longopts, + const char *datatype, + const char *glossary) +{ + return arg_filen(shortopts, longopts, datatype, 0, 1, glossary); +} + + +struct arg_file * arg_file1( + const char * shortopts, + const char * longopts, + const char *datatype, + const char *glossary) +{ + return arg_filen(shortopts, longopts, datatype, 1, 1, glossary); +} + + +struct arg_file * arg_filen( + const char * shortopts, + const char * longopts, + const char *datatype, + int mincount, + int maxcount, + const char *glossary) +{ + size_t nbytes; + struct arg_file *result; + + /* foolproof things by ensuring maxcount is not less than mincount */ + maxcount = (maxcount < mincount) ? mincount : maxcount; + + nbytes = sizeof(struct arg_file) /* storage for struct arg_file */ + + sizeof(char *) * maxcount /* storage for filename[maxcount] array */ + + sizeof(char *) * maxcount /* storage for basename[maxcount] array */ + + sizeof(char *) * maxcount; /* storage for extension[maxcount] array */ + + result = (struct arg_file *)malloc(nbytes); + if (result) + { + int i; + + /* init the arg_hdr struct */ + result->hdr.flag = ARG_HASVALUE; + result->hdr.shortopts = shortopts; + result->hdr.longopts = longopts; + result->hdr.glossary = glossary; + result->hdr.datatype = datatype ? datatype : ""; + result->hdr.mincount = mincount; + result->hdr.maxcount = maxcount; + result->hdr.parent = result; + result->hdr.resetfn = (arg_resetfn *)arg_file_resetfn; + result->hdr.scanfn = (arg_scanfn *)arg_file_scanfn; + result->hdr.checkfn = (arg_checkfn *)arg_file_checkfn; + result->hdr.errorfn = (arg_errorfn *)arg_file_errorfn; + + /* store the filename,basename,extension arrays immediately after the arg_file struct */ + result->filename = (const char * *)(result + 1); + result->basename = result->filename + maxcount; + result->extension = result->basename + maxcount; + result->count = 0; + + /* foolproof the string pointers by initialising them with empty strings */ + for (i = 0; i < maxcount; i++) + { + result->filename[i] = ""; + result->basename[i] = ""; + result->extension[i] = ""; + } + } + + ARG_TRACE(("arg_filen() returns %p\n", result)); + return result; +} +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include +#include +#include + +#include "argtable3.h" + + +static void arg_int_resetfn(struct arg_int *parent) +{ + ARG_TRACE(("%s:resetfn(%p)\n", __FILE__, parent)); + parent->count = 0; +} + + +/* strtol0x() is like strtol() except that the numeric string is */ +/* expected to be prefixed by "0X" where X is a user supplied char. */ +/* The string may optionally be prefixed by white space and + or - */ +/* as in +0X123 or -0X123. */ +/* Once the prefix has been scanned, the remainder of the numeric */ +/* string is converted using strtol() with the given base. */ +/* eg: to parse hex str="-0X12324", specify X='X' and base=16. */ +/* eg: to parse oct str="+0o12324", specify X='O' and base=8. */ +/* eg: to parse bin str="-0B01010", specify X='B' and base=2. */ +/* Failure of conversion is indicated by result where *endptr==str. */ +static long int strtol0X(const char * str, + const char * *endptr, + char X, + int base) +{ + long int val; /* stores result */ + int s = 1; /* sign is +1 or -1 */ + const char *ptr = str; /* ptr to current position in str */ + + /* skip leading whitespace */ + while (isspace(*ptr)) + ptr++; + /* printf("1) %s\n",ptr); */ + + /* scan optional sign character */ + switch (*ptr) + { + case '+': + ptr++; + s = 1; + break; + case '-': + ptr++; + s = -1; + break; + default: + s = 1; + break; + } + /* printf("2) %s\n",ptr); */ + + /* '0X' prefix */ + if ((*ptr++) != '0') + { + /* printf("failed to detect '0'\n"); */ + *endptr = str; + return 0; + } + /* printf("3) %s\n",ptr); */ + if (toupper(*ptr++) != toupper(X)) + { + /* printf("failed to detect '%c'\n",X); */ + *endptr = str; + return 0; + } + /* printf("4) %s\n",ptr); */ + + /* attempt conversion on remainder of string using strtol() */ + val = strtol(ptr, (char * *)endptr, base); + if (*endptr == ptr) + { + /* conversion failed */ + *endptr = str; + return 0; + } + + /* success */ + return s * val; +} + + +/* Returns 1 if str matches suffix (case insensitive). */ +/* Str may contain trailing whitespace, but nothing else. */ +static int detectsuffix(const char *str, const char *suffix) +{ + /* scan pairwise through strings until mismatch detected */ + while( toupper(*str) == toupper(*suffix) ) + { + /* printf("'%c' '%c'\n", *str, *suffix); */ + + /* return 1 (success) if match persists until the string terminator */ + if (*str == '\0') + return 1; + + /* next chars */ + str++; + suffix++; + } + /* printf("'%c' '%c' mismatch\n", *str, *suffix); */ + + /* return 0 (fail) if the matching did not consume the entire suffix */ + if (*suffix != 0) + return 0; /* failed to consume entire suffix */ + + /* skip any remaining whitespace in str */ + while (isspace(*str)) + str++; + + /* return 1 (success) if we have reached end of str else return 0 (fail) */ + return (*str == '\0') ? 1 : 0; +} + + +static int arg_int_scanfn(struct arg_int *parent, const char *argval) +{ + int errorcode = 0; + + if (parent->count == parent->hdr.maxcount) + { + /* maximum number of arguments exceeded */ + errorcode = EMAXCOUNT; + } + else if (!argval) + { + /* a valid argument with no argument value was given. */ + /* This happens when an optional argument value was invoked. */ + /* leave parent arguiment value unaltered but still count the argument. */ + parent->count++; + } + else + { + long int val; + const char *end; + + /* attempt to extract hex integer (eg: +0x123) from argval into val conversion */ + val = strtol0X(argval, &end, 'X', 16); + if (end == argval) + { + /* hex failed, attempt octal conversion (eg +0o123) */ + val = strtol0X(argval, &end, 'O', 8); + if (end == argval) + { + /* octal failed, attempt binary conversion (eg +0B101) */ + val = strtol0X(argval, &end, 'B', 2); + if (end == argval) + { + /* binary failed, attempt decimal conversion with no prefix (eg 1234) */ + val = strtol(argval, (char * *)&end, 10); + if (end == argval) + { + /* all supported number formats failed */ + return EBADINT; + } + } + } + } + + /* Safety check for integer overflow. WARNING: this check */ + /* achieves nothing on machines where size(int)==size(long). */ + if ( val > INT_MAX || val < INT_MIN ) + errorcode = EOVERFLOW; + + /* Detect any suffixes (KB,MB,GB) and multiply argument value appropriately. */ + /* We need to be mindful of integer overflows when using such big numbers. */ + if (detectsuffix(end, "KB")) /* kilobytes */ + { + if ( val > (INT_MAX / 1024) || val < (INT_MIN / 1024) ) + errorcode = EOVERFLOW; /* Overflow would occur if we proceed */ + else + val *= 1024; /* 1KB = 1024 */ + } + else if (detectsuffix(end, "MB")) /* megabytes */ + { + if ( val > (INT_MAX / 1048576) || val < (INT_MIN / 1048576) ) + errorcode = EOVERFLOW; /* Overflow would occur if we proceed */ + else + val *= 1048576; /* 1MB = 1024*1024 */ + } + else if (detectsuffix(end, "GB")) /* gigabytes */ + { + if ( val > (INT_MAX / 1073741824) || val < (INT_MIN / 1073741824) ) + errorcode = EOVERFLOW; /* Overflow would occur if we proceed */ + else + val *= 1073741824; /* 1GB = 1024*1024*1024 */ + } + else if (!detectsuffix(end, "")) + errorcode = EBADINT; /* invalid suffix detected */ + + /* if success then store result in parent->ival[] array */ + if (errorcode == 0) + parent->ival[parent->count++] = val; + } + + /* printf("%s:scanfn(%p,%p) returns %d\n",__FILE__,parent,argval,errorcode); */ + return errorcode; +} + + +static int arg_int_checkfn(struct arg_int *parent) +{ + int errorcode = (parent->count < parent->hdr.mincount) ? EMINCOUNT : 0; + /*printf("%s:checkfn(%p) returns %d\n",__FILE__,parent,errorcode);*/ + return errorcode; +} + + +static void arg_int_errorfn( + struct arg_int *parent, + FILE *fp, + int errorcode, + const char *argval, + const char *progname) +{ + const char *shortopts = parent->hdr.shortopts; + const char *longopts = parent->hdr.longopts; + const char *datatype = parent->hdr.datatype; + + /* make argval NULL safe */ + argval = argval ? argval : ""; + + fprintf(fp, "%s: ", progname); + switch(errorcode) + { + case EMINCOUNT: + fputs("missing option ", fp); + arg_print_option(fp, shortopts, longopts, datatype, "\n"); + break; + + case EMAXCOUNT: + fputs("excess option ", fp); + arg_print_option(fp, shortopts, longopts, argval, "\n"); + break; + + case EBADINT: + fprintf(fp, "invalid argument \"%s\" to option ", argval); + arg_print_option(fp, shortopts, longopts, datatype, "\n"); + break; + + case EOVERFLOW: + fputs("integer overflow at option ", fp); + arg_print_option(fp, shortopts, longopts, datatype, " "); + fprintf(fp, "(%s is too large)\n", argval); + break; + } +} + + +struct arg_int * arg_int0( + const char *shortopts, + const char *longopts, + const char *datatype, + const char *glossary) +{ + return arg_intn(shortopts, longopts, datatype, 0, 1, glossary); +} + + +struct arg_int * arg_int1( + const char *shortopts, + const char *longopts, + const char *datatype, + const char *glossary) +{ + return arg_intn(shortopts, longopts, datatype, 1, 1, glossary); +} + + +struct arg_int * arg_intn( + const char *shortopts, + const char *longopts, + const char *datatype, + int mincount, + int maxcount, + const char *glossary) +{ + size_t nbytes; + struct arg_int *result; + + /* foolproof things by ensuring maxcount is not less than mincount */ + maxcount = (maxcount < mincount) ? mincount : maxcount; + + nbytes = sizeof(struct arg_int) /* storage for struct arg_int */ + + maxcount * sizeof(int); /* storage for ival[maxcount] array */ + + result = (struct arg_int *)malloc(nbytes); + if (result) + { + /* init the arg_hdr struct */ + result->hdr.flag = ARG_HASVALUE; + result->hdr.shortopts = shortopts; + result->hdr.longopts = longopts; + result->hdr.datatype = datatype ? datatype : ""; + result->hdr.glossary = glossary; + result->hdr.mincount = mincount; + result->hdr.maxcount = maxcount; + result->hdr.parent = result; + result->hdr.resetfn = (arg_resetfn *)arg_int_resetfn; + result->hdr.scanfn = (arg_scanfn *)arg_int_scanfn; + result->hdr.checkfn = (arg_checkfn *)arg_int_checkfn; + result->hdr.errorfn = (arg_errorfn *)arg_int_errorfn; + + /* store the ival[maxcount] array immediately after the arg_int struct */ + result->ival = (int *)(result + 1); + result->count = 0; + } + + ARG_TRACE(("arg_intn() returns %p\n", result)); + return result; +} +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include + +#include "argtable3.h" + + +static void arg_lit_resetfn(struct arg_lit *parent) +{ + ARG_TRACE(("%s:resetfn(%p)\n", __FILE__, parent)); + parent->count = 0; +} + + +static int arg_lit_scanfn(struct arg_lit *parent, const char *argval) +{ + int errorcode = 0; + if (parent->count < parent->hdr.maxcount ) + parent->count++; + else + errorcode = EMAXCOUNT; + + ARG_TRACE(("%s:scanfn(%p,%s) returns %d\n", __FILE__, parent, argval, + errorcode)); + return errorcode; +} + + +static int arg_lit_checkfn(struct arg_lit *parent) +{ + int errorcode = (parent->count < parent->hdr.mincount) ? EMINCOUNT : 0; + ARG_TRACE(("%s:checkfn(%p) returns %d\n", __FILE__, parent, errorcode)); + return errorcode; +} + + +static void arg_lit_errorfn( + struct arg_lit *parent, + FILE *fp, + int errorcode, + const char *argval, + const char *progname) +{ + const char *shortopts = parent->hdr.shortopts; + const char *longopts = parent->hdr.longopts; + const char *datatype = parent->hdr.datatype; + + switch(errorcode) + { + case EMINCOUNT: + fprintf(fp, "%s: missing option ", progname); + arg_print_option(fp, shortopts, longopts, datatype, "\n"); + fprintf(fp, "\n"); + break; + + case EMAXCOUNT: + fprintf(fp, "%s: extraneous option ", progname); + arg_print_option(fp, shortopts, longopts, datatype, "\n"); + break; + } + + ARG_TRACE(("%s:errorfn(%p, %p, %d, %s, %s)\n", __FILE__, parent, fp, + errorcode, argval, progname)); +} + + +struct arg_lit * arg_lit0( + const char * shortopts, + const char * longopts, + const char * glossary) +{ + return arg_litn(shortopts, longopts, 0, 1, glossary); +} + + +struct arg_lit * arg_lit1( + const char *shortopts, + const char *longopts, + const char *glossary) +{ + return arg_litn(shortopts, longopts, 1, 1, glossary); +} + + +struct arg_lit * arg_litn( + const char *shortopts, + const char *longopts, + int mincount, + int maxcount, + const char *glossary) +{ + struct arg_lit *result; + + /* foolproof things by ensuring maxcount is not less than mincount */ + maxcount = (maxcount < mincount) ? mincount : maxcount; + + result = (struct arg_lit *)malloc(sizeof(struct arg_lit)); + if (result) + { + /* init the arg_hdr struct */ + result->hdr.flag = 0; + result->hdr.shortopts = shortopts; + result->hdr.longopts = longopts; + result->hdr.datatype = NULL; + result->hdr.glossary = glossary; + result->hdr.mincount = mincount; + result->hdr.maxcount = maxcount; + result->hdr.parent = result; + result->hdr.resetfn = (arg_resetfn *)arg_lit_resetfn; + result->hdr.scanfn = (arg_scanfn *)arg_lit_scanfn; + result->hdr.checkfn = (arg_checkfn *)arg_lit_checkfn; + result->hdr.errorfn = (arg_errorfn *)arg_lit_errorfn; + + /* init local variables */ + result->count = 0; + } + + ARG_TRACE(("arg_litn() returns %p\n", result)); + return result; +} +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include + +#include "argtable3.h" + +struct arg_rem *arg_rem(const char *datatype, const char *glossary) +{ + struct arg_rem *result = (struct arg_rem *)malloc(sizeof(struct arg_rem)); + if (result) + { + result->hdr.flag = 0; + result->hdr.shortopts = NULL; + result->hdr.longopts = NULL; + result->hdr.datatype = datatype; + result->hdr.glossary = glossary; + result->hdr.mincount = 1; + result->hdr.maxcount = 1; + result->hdr.parent = result; + result->hdr.resetfn = NULL; + result->hdr.scanfn = NULL; + result->hdr.checkfn = NULL; + result->hdr.errorfn = NULL; + } + + ARG_TRACE(("arg_rem() returns %p\n", result)); + return result; +} + +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include +#include + +#include "argtable3.h" + + +#ifndef _TREX_H_ +#define _TREX_H_ +/*************************************************************** + T-Rex a tiny regular expression library + + Copyright (C) 2003-2006 Alberto Demichelis + + This software is provided 'as-is', without any express + or implied warranty. In no event will the authors be held + liable for any damages arising from the use of this software. + + Permission is granted to anyone to use this software for + any purpose, including commercial applications, and to alter + it and redistribute it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; + you must not claim that you wrote the original software. + If you use this software in a product, an acknowledgment + in the product documentation would be appreciated but + is not required. + + 2. Altered source versions must be plainly marked as such, + and must not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any + source distribution. + +****************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _UNICODE +#define TRexChar unsigned short +#define MAX_CHAR 0xFFFF +#define _TREXC(c) L##c +#define trex_strlen wcslen +#define trex_printf wprintf +#else +#define TRexChar char +#define MAX_CHAR 0xFF +#define _TREXC(c) (c) +#define trex_strlen strlen +#define trex_printf printf +#endif + +#ifndef TREX_API +#define TREX_API extern +#endif + +#define TRex_True 1 +#define TRex_False 0 + +#define TREX_ICASE ARG_REX_ICASE + +typedef unsigned int TRexBool; +typedef struct TRex TRex; + +typedef struct { + const TRexChar *begin; + int len; +} TRexMatch; + +TREX_API TRex *trex_compile(const TRexChar *pattern, const TRexChar **error, int flags); +TREX_API void trex_free(TRex *exp); +TREX_API TRexBool trex_match(TRex* exp, const TRexChar* text); +TREX_API TRexBool trex_search(TRex* exp, const TRexChar* text, const TRexChar** out_begin, const TRexChar** out_end); +TREX_API TRexBool trex_searchrange(TRex* exp, const TRexChar* text_begin, const TRexChar* text_end, const TRexChar** out_begin, const TRexChar** out_end); +TREX_API int trex_getsubexpcount(TRex* exp); +TREX_API TRexBool trex_getsubexp(TRex* exp, int n, TRexMatch *subexp); + +#ifdef __cplusplus +} +#endif + +#endif + + + +struct privhdr +{ + const char *pattern; + int flags; +}; + + +static void arg_rex_resetfn(struct arg_rex *parent) +{ + ARG_TRACE(("%s:resetfn(%p)\n", __FILE__, parent)); + parent->count = 0; +} + +static int arg_rex_scanfn(struct arg_rex *parent, const char *argval) +{ + int errorcode = 0; + const TRexChar *error = NULL; + TRex *rex = NULL; + TRexBool is_match = TRex_False; + + if (parent->count == parent->hdr.maxcount ) + { + /* maximum number of arguments exceeded */ + errorcode = EMAXCOUNT; + } + else if (!argval) + { + /* a valid argument with no argument value was given. */ + /* This happens when an optional argument value was invoked. */ + /* leave parent argument value unaltered but still count the argument. */ + parent->count++; + } + else + { + struct privhdr *priv = (struct privhdr *)parent->hdr.priv; + + /* test the current argument value for a match with the regular expression */ + /* if a match is detected, record the argument value in the arg_rex struct */ + + rex = trex_compile(priv->pattern, &error, priv->flags); + is_match = trex_match(rex, argval); + if (!is_match) + errorcode = EREGNOMATCH; + else + parent->sval[parent->count++] = argval; + + trex_free(rex); + } + + ARG_TRACE(("%s:scanfn(%p) returns %d\n",__FILE__,parent,errorcode)); + return errorcode; +} + +static int arg_rex_checkfn(struct arg_rex *parent) +{ + int errorcode = (parent->count < parent->hdr.mincount) ? EMINCOUNT : 0; + //struct privhdr *priv = (struct privhdr*)parent->hdr.priv; + + /* free the regex "program" we constructed in resetfn */ + //regfree(&(priv->regex)); + + /*printf("%s:checkfn(%p) returns %d\n",__FILE__,parent,errorcode);*/ + return errorcode; +} + +static void arg_rex_errorfn(struct arg_rex *parent, + FILE *fp, + int errorcode, + const char *argval, + const char *progname) +{ + const char *shortopts = parent->hdr.shortopts; + const char *longopts = parent->hdr.longopts; + const char *datatype = parent->hdr.datatype; + + /* make argval NULL safe */ + argval = argval ? argval : ""; + + fprintf(fp, "%s: ", progname); + switch(errorcode) + { + case EMINCOUNT: + fputs("missing option ", fp); + arg_print_option(fp, shortopts, longopts, datatype, "\n"); + break; + + case EMAXCOUNT: + fputs("excess option ", fp); + arg_print_option(fp, shortopts, longopts, argval, "\n"); + break; + + case EREGNOMATCH: + fputs("illegal value ", fp); + arg_print_option(fp, shortopts, longopts, argval, "\n"); + break; + + default: + { + //char errbuff[256]; + //regerror(errorcode, NULL, errbuff, sizeof(errbuff)); + //printf("%s\n", errbuff); + } + break; + } +} + + +struct arg_rex * arg_rex0(const char * shortopts, + const char * longopts, + const char * pattern, + const char *datatype, + int flags, + const char *glossary) +{ + return arg_rexn(shortopts, + longopts, + pattern, + datatype, + 0, + 1, + flags, + glossary); +} + +struct arg_rex * arg_rex1(const char * shortopts, + const char * longopts, + const char * pattern, + const char *datatype, + int flags, + const char *glossary) +{ + return arg_rexn(shortopts, + longopts, + pattern, + datatype, + 1, + 1, + flags, + glossary); +} + + +struct arg_rex * arg_rexn(const char * shortopts, + const char * longopts, + const char * pattern, + const char *datatype, + int mincount, + int maxcount, + int flags, + const char *glossary) +{ + size_t nbytes; + struct arg_rex *result; + struct privhdr *priv; + int i; + const TRexChar *error = NULL; + TRex *rex = NULL; + + if (!pattern) + { + printf( + "argtable: ERROR - illegal regular expression pattern \"(NULL)\"\n"); + printf("argtable: Bad argument table.\n"); + return NULL; + } + + /* foolproof things by ensuring maxcount is not less than mincount */ + maxcount = (maxcount < mincount) ? mincount : maxcount; + + nbytes = sizeof(struct arg_rex) /* storage for struct arg_rex */ + + sizeof(struct privhdr) /* storage for private arg_rex data */ + + maxcount * sizeof(char *); /* storage for sval[maxcount] array */ + + result = (struct arg_rex *)malloc(nbytes); + if (result == NULL) + return result; + + /* init the arg_hdr struct */ + result->hdr.flag = ARG_HASVALUE; + result->hdr.shortopts = shortopts; + result->hdr.longopts = longopts; + result->hdr.datatype = datatype ? datatype : pattern; + result->hdr.glossary = glossary; + result->hdr.mincount = mincount; + result->hdr.maxcount = maxcount; + result->hdr.parent = result; + result->hdr.resetfn = (arg_resetfn *)arg_rex_resetfn; + result->hdr.scanfn = (arg_scanfn *)arg_rex_scanfn; + result->hdr.checkfn = (arg_checkfn *)arg_rex_checkfn; + result->hdr.errorfn = (arg_errorfn *)arg_rex_errorfn; + + /* store the arg_rex_priv struct immediately after the arg_rex struct */ + result->hdr.priv = result + 1; + priv = (struct privhdr *)(result->hdr.priv); + priv->pattern = pattern; + priv->flags = flags; + + /* store the sval[maxcount] array immediately after the arg_rex_priv struct */ + result->sval = (const char * *)(priv + 1); + result->count = 0; + + /* foolproof the string pointers by initializing them to reference empty strings */ + for (i = 0; i < maxcount; i++) + result->sval[i] = ""; + + /* here we construct and destroy a regex representation of the regular + * expression for no other reason than to force any regex errors to be + * trapped now rather than later. If we don't, then errors may go undetected + * until an argument is actually parsed. + */ + + rex = trex_compile(priv->pattern, &error, priv->flags); + if (rex == NULL) + { + ARG_LOG(("argtable: %s \"%s\"\n", error ? error : _TREXC("undefined"), priv->pattern)); + ARG_LOG(("argtable: Bad argument table.\n")); + } + + trex_free(rex); + + ARG_TRACE(("arg_rexn() returns %p\n", result)); + return result; +} + + + +/* see copyright notice in trex.h */ +#include +#include +#include +#include + +#ifdef _UINCODE +#define scisprint iswprint +#define scstrlen wcslen +#define scprintf wprintf +#define _SC(x) L(x) +#else +#define scisprint isprint +#define scstrlen strlen +#define scprintf printf +#define _SC(x) (x) +#endif + +#ifdef _DEBUG +#include + +static const TRexChar *g_nnames[] = +{ + _SC("NONE"),_SC("OP_GREEDY"), _SC("OP_OR"), + _SC("OP_EXPR"),_SC("OP_NOCAPEXPR"),_SC("OP_DOT"), _SC("OP_CLASS"), + _SC("OP_CCLASS"),_SC("OP_NCLASS"),_SC("OP_RANGE"),_SC("OP_CHAR"), + _SC("OP_EOL"),_SC("OP_BOL"),_SC("OP_WB") +}; + +#endif +#define OP_GREEDY (MAX_CHAR+1) // * + ? {n} +#define OP_OR (MAX_CHAR+2) +#define OP_EXPR (MAX_CHAR+3) //parentesis () +#define OP_NOCAPEXPR (MAX_CHAR+4) //parentesis (?:) +#define OP_DOT (MAX_CHAR+5) +#define OP_CLASS (MAX_CHAR+6) +#define OP_CCLASS (MAX_CHAR+7) +#define OP_NCLASS (MAX_CHAR+8) //negates class the [^ +#define OP_RANGE (MAX_CHAR+9) +#define OP_CHAR (MAX_CHAR+10) +#define OP_EOL (MAX_CHAR+11) +#define OP_BOL (MAX_CHAR+12) +#define OP_WB (MAX_CHAR+13) + +#define TREX_SYMBOL_ANY_CHAR ('.') +#define TREX_SYMBOL_GREEDY_ONE_OR_MORE ('+') +#define TREX_SYMBOL_GREEDY_ZERO_OR_MORE ('*') +#define TREX_SYMBOL_GREEDY_ZERO_OR_ONE ('?') +#define TREX_SYMBOL_BRANCH ('|') +#define TREX_SYMBOL_END_OF_STRING ('$') +#define TREX_SYMBOL_BEGINNING_OF_STRING ('^') +#define TREX_SYMBOL_ESCAPE_CHAR ('\\') + + +typedef int TRexNodeType; + +typedef struct tagTRexNode{ + TRexNodeType type; + int left; + int right; + int next; +}TRexNode; + +struct TRex{ + const TRexChar *_eol; + const TRexChar *_bol; + const TRexChar *_p; + int _first; + int _op; + TRexNode *_nodes; + int _nallocated; + int _nsize; + int _nsubexpr; + TRexMatch *_matches; + int _currsubexp; + void *_jmpbuf; + const TRexChar **_error; + int _flags; +}; + +static int trex_list(TRex *exp); + +static int trex_newnode(TRex *exp, TRexNodeType type) +{ + TRexNode n; + int newid; + n.type = type; + n.next = n.right = n.left = -1; + if(type == OP_EXPR) + n.right = exp->_nsubexpr++; + if(exp->_nallocated < (exp->_nsize + 1)) { + exp->_nallocated *= 2; + exp->_nodes = (TRexNode *)realloc(exp->_nodes, exp->_nallocated * sizeof(TRexNode)); + } + exp->_nodes[exp->_nsize++] = n; + newid = exp->_nsize - 1; + return (int)newid; +} + +static void trex_error(TRex *exp,const TRexChar *error) +{ + if(exp->_error) *exp->_error = error; + longjmp(*((jmp_buf*)exp->_jmpbuf),-1); +} + +static void trex_expect(TRex *exp, int n){ + if((*exp->_p) != n) + trex_error(exp, _SC("expected paren")); + exp->_p++; +} + +static TRexChar trex_escapechar(TRex *exp) +{ + if(*exp->_p == TREX_SYMBOL_ESCAPE_CHAR){ + exp->_p++; + switch(*exp->_p) { + case 'v': exp->_p++; return '\v'; + case 'n': exp->_p++; return '\n'; + case 't': exp->_p++; return '\t'; + case 'r': exp->_p++; return '\r'; + case 'f': exp->_p++; return '\f'; + default: return (*exp->_p++); + } + } else if(!scisprint(*exp->_p)) trex_error(exp,_SC("letter expected")); + return (*exp->_p++); +} + +static int trex_charclass(TRex *exp,int classid) +{ + int n = trex_newnode(exp,OP_CCLASS); + exp->_nodes[n].left = classid; + return n; +} + +static int trex_charnode(TRex *exp,TRexBool isclass) +{ + TRexChar t; + if(*exp->_p == TREX_SYMBOL_ESCAPE_CHAR) { + exp->_p++; + switch(*exp->_p) { + case 'n': exp->_p++; return trex_newnode(exp,'\n'); + case 't': exp->_p++; return trex_newnode(exp,'\t'); + case 'r': exp->_p++; return trex_newnode(exp,'\r'); + case 'f': exp->_p++; return trex_newnode(exp,'\f'); + case 'v': exp->_p++; return trex_newnode(exp,'\v'); + case 'a': case 'A': case 'w': case 'W': case 's': case 'S': + case 'd': case 'D': case 'x': case 'X': case 'c': case 'C': + case 'p': case 'P': case 'l': case 'u': + { + t = *exp->_p; exp->_p++; + return trex_charclass(exp,t); + } + case 'b': + case 'B': + if(!isclass) { + int node = trex_newnode(exp,OP_WB); + exp->_nodes[node].left = *exp->_p; + exp->_p++; + return node; + } //else default + default: + t = *exp->_p; exp->_p++; + return trex_newnode(exp,t); + } + } + else if(!scisprint(*exp->_p)) { + + trex_error(exp,_SC("letter expected")); + } + t = *exp->_p; exp->_p++; + return trex_newnode(exp,t); +} +static int trex_class(TRex *exp) +{ + int ret = -1; + int first = -1,chain; + if(*exp->_p == TREX_SYMBOL_BEGINNING_OF_STRING){ + ret = trex_newnode(exp,OP_NCLASS); + exp->_p++; + }else ret = trex_newnode(exp,OP_CLASS); + + if(*exp->_p == ']') trex_error(exp,_SC("empty class")); + chain = ret; + while(*exp->_p != ']' && exp->_p != exp->_eol) { + if(*exp->_p == '-' && first != -1){ + int r,t; + if(*exp->_p++ == ']') trex_error(exp,_SC("unfinished range")); + r = trex_newnode(exp,OP_RANGE); + if(first>*exp->_p) trex_error(exp,_SC("invalid range")); + if(exp->_nodes[first].type == OP_CCLASS) trex_error(exp,_SC("cannot use character classes in ranges")); + exp->_nodes[r].left = exp->_nodes[first].type; + t = trex_escapechar(exp); + exp->_nodes[r].right = t; + exp->_nodes[chain].next = r; + chain = r; + first = -1; + } + else{ + if(first!=-1){ + int c = first; + exp->_nodes[chain].next = c; + chain = c; + first = trex_charnode(exp,TRex_True); + } + else{ + first = trex_charnode(exp,TRex_True); + } + } + } + if(first!=-1){ + int c = first; + exp->_nodes[chain].next = c; + chain = c; + first = -1; + } + /* hack? */ + exp->_nodes[ret].left = exp->_nodes[ret].next; + exp->_nodes[ret].next = -1; + return ret; +} + +static int trex_parsenumber(TRex *exp) +{ + int ret = *exp->_p-'0'; + int positions = 10; + exp->_p++; + while(isdigit(*exp->_p)) { + ret = ret*10+(*exp->_p++-'0'); + if(positions==1000000000) trex_error(exp,_SC("overflow in numeric constant")); + positions *= 10; + }; + return ret; +} + +static int trex_element(TRex *exp) +{ + int ret = -1; + switch(*exp->_p) + { + case '(': { + int expr,newn; + exp->_p++; + + + if(*exp->_p =='?') { + exp->_p++; + trex_expect(exp,':'); + expr = trex_newnode(exp,OP_NOCAPEXPR); + } + else + expr = trex_newnode(exp,OP_EXPR); + newn = trex_list(exp); + exp->_nodes[expr].left = newn; + ret = expr; + trex_expect(exp,')'); + } + break; + case '[': + exp->_p++; + ret = trex_class(exp); + trex_expect(exp,']'); + break; + case TREX_SYMBOL_END_OF_STRING: exp->_p++; ret = trex_newnode(exp,OP_EOL);break; + case TREX_SYMBOL_ANY_CHAR: exp->_p++; ret = trex_newnode(exp,OP_DOT);break; + default: + ret = trex_charnode(exp,TRex_False); + break; + } + + { + TRexBool isgreedy = TRex_False; + unsigned short p0 = 0, p1 = 0; + switch(*exp->_p){ + case TREX_SYMBOL_GREEDY_ZERO_OR_MORE: p0 = 0; p1 = 0xFFFF; exp->_p++; isgreedy = TRex_True; break; + case TREX_SYMBOL_GREEDY_ONE_OR_MORE: p0 = 1; p1 = 0xFFFF; exp->_p++; isgreedy = TRex_True; break; + case TREX_SYMBOL_GREEDY_ZERO_OR_ONE: p0 = 0; p1 = 1; exp->_p++; isgreedy = TRex_True; break; + case '{': + exp->_p++; + if(!isdigit(*exp->_p)) trex_error(exp,_SC("number expected")); + p0 = (unsigned short)trex_parsenumber(exp); + /*******************************/ + switch(*exp->_p) { + case '}': + p1 = p0; exp->_p++; + break; + case ',': + exp->_p++; + p1 = 0xFFFF; + if(isdigit(*exp->_p)){ + p1 = (unsigned short)trex_parsenumber(exp); + } + trex_expect(exp,'}'); + break; + default: + trex_error(exp,_SC(", or } expected")); + } + /*******************************/ + isgreedy = TRex_True; + break; + + } + if(isgreedy) { + int nnode = trex_newnode(exp,OP_GREEDY); + exp->_nodes[nnode].left = ret; + exp->_nodes[nnode].right = ((p0)<<16)|p1; + ret = nnode; + } + } + if((*exp->_p != TREX_SYMBOL_BRANCH) && (*exp->_p != ')') && (*exp->_p != TREX_SYMBOL_GREEDY_ZERO_OR_MORE) && (*exp->_p != TREX_SYMBOL_GREEDY_ONE_OR_MORE) && (*exp->_p != '\0')) { + int nnode = trex_element(exp); + exp->_nodes[ret].next = nnode; + } + + return ret; +} + +static int trex_list(TRex *exp) +{ + int ret=-1,e; + if(*exp->_p == TREX_SYMBOL_BEGINNING_OF_STRING) { + exp->_p++; + ret = trex_newnode(exp,OP_BOL); + } + e = trex_element(exp); + if(ret != -1) { + exp->_nodes[ret].next = e; + } + else ret = e; + + if(*exp->_p == TREX_SYMBOL_BRANCH) { + int temp,tright; + exp->_p++; + temp = trex_newnode(exp,OP_OR); + exp->_nodes[temp].left = ret; + tright = trex_list(exp); + exp->_nodes[temp].right = tright; + ret = temp; + } + return ret; +} + +static TRexBool trex_matchcclass(int cclass,TRexChar c) +{ + switch(cclass) { + case 'a': return isalpha(c)?TRex_True:TRex_False; + case 'A': return !isalpha(c)?TRex_True:TRex_False; + case 'w': return (isalnum(c) || c == '_')?TRex_True:TRex_False; + case 'W': return (!isalnum(c) && c != '_')?TRex_True:TRex_False; + case 's': return isspace(c)?TRex_True:TRex_False; + case 'S': return !isspace(c)?TRex_True:TRex_False; + case 'd': return isdigit(c)?TRex_True:TRex_False; + case 'D': return !isdigit(c)?TRex_True:TRex_False; + case 'x': return isxdigit(c)?TRex_True:TRex_False; + case 'X': return !isxdigit(c)?TRex_True:TRex_False; + case 'c': return iscntrl(c)?TRex_True:TRex_False; + case 'C': return !iscntrl(c)?TRex_True:TRex_False; + case 'p': return ispunct(c)?TRex_True:TRex_False; + case 'P': return !ispunct(c)?TRex_True:TRex_False; + case 'l': return islower(c)?TRex_True:TRex_False; + case 'u': return isupper(c)?TRex_True:TRex_False; + } + return TRex_False; /*cannot happen*/ +} + +static TRexBool trex_matchclass(TRex* exp,TRexNode *node,TRexChar c) +{ + do { + switch(node->type) { + case OP_RANGE: + if (exp->_flags & TREX_ICASE) + { + if(c >= toupper(node->left) && c <= toupper(node->right)) return TRex_True; + if(c >= tolower(node->left) && c <= tolower(node->right)) return TRex_True; + } + else + { + if(c >= node->left && c <= node->right) return TRex_True; + } + break; + case OP_CCLASS: + if(trex_matchcclass(node->left,c)) return TRex_True; + break; + default: + if (exp->_flags & TREX_ICASE) + { + if (c == tolower(node->type) || c == toupper(node->type)) return TRex_True; + } + else + { + if(c == node->type)return TRex_True; + } + + } + } while((node->next != -1) && (node = &exp->_nodes[node->next])); + return TRex_False; +} + +static const TRexChar *trex_matchnode(TRex* exp,TRexNode *node,const TRexChar *str,TRexNode *next) +{ + + TRexNodeType type = node->type; + switch(type) { + case OP_GREEDY: { + //TRexNode *greedystop = (node->next != -1) ? &exp->_nodes[node->next] : NULL; + TRexNode *greedystop = NULL; + int p0 = (node->right >> 16)&0x0000FFFF, p1 = node->right&0x0000FFFF, nmaches = 0; + const TRexChar *s=str, *good = str; + + if(node->next != -1) { + greedystop = &exp->_nodes[node->next]; + } + else { + greedystop = next; + } + + while((nmaches == 0xFFFF || nmaches < p1)) { + + const TRexChar *stop; + if(!(s = trex_matchnode(exp,&exp->_nodes[node->left],s,greedystop))) + break; + nmaches++; + good=s; + if(greedystop) { + //checks that 0 matches satisfy the expression(if so skips) + //if not would always stop(for instance if is a '?') + if(greedystop->type != OP_GREEDY || + (greedystop->type == OP_GREEDY && ((greedystop->right >> 16)&0x0000FFFF) != 0)) + { + TRexNode *gnext = NULL; + if(greedystop->next != -1) { + gnext = &exp->_nodes[greedystop->next]; + }else if(next && next->next != -1){ + gnext = &exp->_nodes[next->next]; + } + stop = trex_matchnode(exp,greedystop,s,gnext); + if(stop) { + //if satisfied stop it + if(p0 == p1 && p0 == nmaches) break; + else if(nmaches >= p0 && p1 == 0xFFFF) break; + else if(nmaches >= p0 && nmaches <= p1) break; + } + } + } + + if(s >= exp->_eol) + break; + } + if(p0 == p1 && p0 == nmaches) return good; + else if(nmaches >= p0 && p1 == 0xFFFF) return good; + else if(nmaches >= p0 && nmaches <= p1) return good; + return NULL; + } + case OP_OR: { + const TRexChar *asd = str; + TRexNode *temp=&exp->_nodes[node->left]; + while( (asd = trex_matchnode(exp,temp,asd,NULL)) ) { + if(temp->next != -1) + temp = &exp->_nodes[temp->next]; + else + return asd; + } + asd = str; + temp = &exp->_nodes[node->right]; + while( (asd = trex_matchnode(exp,temp,asd,NULL)) ) { + if(temp->next != -1) + temp = &exp->_nodes[temp->next]; + else + return asd; + } + return NULL; + break; + } + case OP_EXPR: + case OP_NOCAPEXPR:{ + TRexNode *n = &exp->_nodes[node->left]; + const TRexChar *cur = str; + int capture = -1; + if(node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) { + capture = exp->_currsubexp; + exp->_matches[capture].begin = cur; + exp->_currsubexp++; + } + + do { + TRexNode *subnext = NULL; + if(n->next != -1) { + subnext = &exp->_nodes[n->next]; + }else { + subnext = next; + } + if(!(cur = trex_matchnode(exp,n,cur,subnext))) { + if(capture != -1){ + exp->_matches[capture].begin = 0; + exp->_matches[capture].len = 0; + } + return NULL; + } + } while((n->next != -1) && (n = &exp->_nodes[n->next])); + + if(capture != -1) + exp->_matches[capture].len = cur - exp->_matches[capture].begin; + return cur; + } + case OP_WB: + if((str == exp->_bol && !isspace(*str)) + || ((str == exp->_eol && !isspace(*(str-1)))) + || ((!isspace(*str) && isspace(*(str+1)))) + || ((isspace(*str) && !isspace(*(str+1)))) ) { + return (node->left == 'b')?str:NULL; + } + return (node->left == 'b')?NULL:str; + case OP_BOL: + if(str == exp->_bol) return str; + return NULL; + case OP_EOL: + if(str == exp->_eol) return str; + return NULL; + case OP_DOT: + str++; + return str; + case OP_NCLASS: + case OP_CLASS: + if(trex_matchclass(exp,&exp->_nodes[node->left],*str)?(type == OP_CLASS?TRex_True:TRex_False):(type == OP_NCLASS?TRex_True:TRex_False)) { + str++; + return str; + } + return NULL; + case OP_CCLASS: + if(trex_matchcclass(node->left,*str)) { + str++; + return str; + } + return NULL; + default: /* char */ + if (exp->_flags & TREX_ICASE) + { + if(*str != tolower(node->type) && *str != toupper(node->type)) return NULL; + } + else + { + if (*str != node->type) return NULL; + } + str++; + return str; + } + return NULL; +} + +/* public api */ +TRex *trex_compile(const TRexChar *pattern,const TRexChar **error,int flags) +{ + TRex *exp = (TRex *)malloc(sizeof(TRex)); + exp->_eol = exp->_bol = NULL; + exp->_p = pattern; + exp->_nallocated = (int)scstrlen(pattern) * sizeof(TRexChar); + exp->_nodes = (TRexNode *)malloc(exp->_nallocated * sizeof(TRexNode)); + exp->_nsize = 0; + exp->_matches = 0; + exp->_nsubexpr = 0; + exp->_first = trex_newnode(exp,OP_EXPR); + exp->_error = error; + exp->_jmpbuf = malloc(sizeof(jmp_buf)); + exp->_flags = flags; + if(setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) { + int res = trex_list(exp); + exp->_nodes[exp->_first].left = res; + if(*exp->_p!='\0') + trex_error(exp,_SC("unexpected character")); +#ifdef _DEBUG + { + int nsize,i; + TRexNode *t; + nsize = exp->_nsize; + t = &exp->_nodes[0]; + scprintf(_SC("\n")); + for(i = 0;i < nsize; i++) { + if(exp->_nodes[i].type>MAX_CHAR) + scprintf(_SC("[%02d] %10s "),i,g_nnames[exp->_nodes[i].type-MAX_CHAR]); + else + scprintf(_SC("[%02d] %10c "),i,exp->_nodes[i].type); + scprintf(_SC("left %02d right %02d next %02d\n"),exp->_nodes[i].left,exp->_nodes[i].right,exp->_nodes[i].next); + } + scprintf(_SC("\n")); + } +#endif + exp->_matches = (TRexMatch *) malloc(exp->_nsubexpr * sizeof(TRexMatch)); + memset(exp->_matches,0,exp->_nsubexpr * sizeof(TRexMatch)); + } + else{ + trex_free(exp); + return NULL; + } + return exp; +} + +void trex_free(TRex *exp) +{ + if(exp) { + if(exp->_nodes) free(exp->_nodes); + if(exp->_jmpbuf) free(exp->_jmpbuf); + if(exp->_matches) free(exp->_matches); + free(exp); + } +} + +TRexBool trex_match(TRex* exp,const TRexChar* text) +{ + const TRexChar* res = NULL; + exp->_bol = text; + exp->_eol = text + scstrlen(text); + exp->_currsubexp = 0; + res = trex_matchnode(exp,exp->_nodes,text,NULL); + if(res == NULL || res != exp->_eol) + return TRex_False; + return TRex_True; +} + +TRexBool trex_searchrange(TRex* exp,const TRexChar* text_begin,const TRexChar* text_end,const TRexChar** out_begin, const TRexChar** out_end) +{ + const TRexChar *cur = NULL; + int node = exp->_first; + if(text_begin >= text_end) return TRex_False; + exp->_bol = text_begin; + exp->_eol = text_end; + do { + cur = text_begin; + while(node != -1) { + exp->_currsubexp = 0; + cur = trex_matchnode(exp,&exp->_nodes[node],cur,NULL); + if(!cur) + break; + node = exp->_nodes[node].next; + } + text_begin++; + } while(cur == NULL && text_begin != text_end); + + if(cur == NULL) + return TRex_False; + + --text_begin; + + if(out_begin) *out_begin = text_begin; + if(out_end) *out_end = cur; + return TRex_True; +} + +TRexBool trex_search(TRex* exp,const TRexChar* text, const TRexChar** out_begin, const TRexChar** out_end) +{ + return trex_searchrange(exp,text,text + scstrlen(text),out_begin,out_end); +} + +int trex_getsubexpcount(TRex* exp) +{ + return exp->_nsubexpr; +} + +TRexBool trex_getsubexp(TRex* exp, int n, TRexMatch *subexp) +{ + if( n<0 || n >= exp->_nsubexpr) return TRex_False; + *subexp = exp->_matches[n]; + return TRex_True; +} +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include + +#include "argtable3.h" + + +static void arg_str_resetfn(struct arg_str *parent) +{ + ARG_TRACE(("%s:resetfn(%p)\n", __FILE__, parent)); + parent->count = 0; +} + + +static int arg_str_scanfn(struct arg_str *parent, const char *argval) +{ + int errorcode = 0; + + if (parent->count == parent->hdr.maxcount) + { + /* maximum number of arguments exceeded */ + errorcode = EMAXCOUNT; + } + else if (!argval) + { + /* a valid argument with no argument value was given. */ + /* This happens when an optional argument value was invoked. */ + /* leave parent arguiment value unaltered but still count the argument. */ + parent->count++; + } + else + { + parent->sval[parent->count++] = argval; + } + + ARG_TRACE(("%s:scanfn(%p) returns %d\n", __FILE__, parent, errorcode)); + return errorcode; +} + + +static int arg_str_checkfn(struct arg_str *parent) +{ + int errorcode = (parent->count < parent->hdr.mincount) ? EMINCOUNT : 0; + + ARG_TRACE(("%s:checkfn(%p) returns %d\n", __FILE__, parent, errorcode)); + return errorcode; +} + + +static void arg_str_errorfn( + struct arg_str *parent, + FILE *fp, + int errorcode, + const char *argval, + const char *progname) +{ + const char *shortopts = parent->hdr.shortopts; + const char *longopts = parent->hdr.longopts; + const char *datatype = parent->hdr.datatype; + + /* make argval NULL safe */ + argval = argval ? argval : ""; + + fprintf(fp, "%s: ", progname); + switch(errorcode) + { + case EMINCOUNT: + fputs("missing option ", fp); + arg_print_option(fp, shortopts, longopts, datatype, "\n"); + break; + + case EMAXCOUNT: + fputs("excess option ", fp); + arg_print_option(fp, shortopts, longopts, argval, "\n"); + break; + } +} + + +struct arg_str * arg_str0( + const char *shortopts, + const char *longopts, + const char *datatype, + const char *glossary) +{ + return arg_strn(shortopts, longopts, datatype, 0, 1, glossary); +} + + +struct arg_str * arg_str1( + const char *shortopts, + const char *longopts, + const char *datatype, + const char *glossary) +{ + return arg_strn(shortopts, longopts, datatype, 1, 1, glossary); +} + + +struct arg_str * arg_strn( + const char *shortopts, + const char *longopts, + const char *datatype, + int mincount, + int maxcount, + const char *glossary) +{ + size_t nbytes; + struct arg_str *result; + + /* should not allow this stupid error */ + /* we should return an error code warning this logic error */ + /* foolproof things by ensuring maxcount is not less than mincount */ + maxcount = (maxcount < mincount) ? mincount : maxcount; + + nbytes = sizeof(struct arg_str) /* storage for struct arg_str */ + + maxcount * sizeof(char *); /* storage for sval[maxcount] array */ + + result = (struct arg_str *)malloc(nbytes); + if (result) + { + int i; + + /* init the arg_hdr struct */ + result->hdr.flag = ARG_HASVALUE; + result->hdr.shortopts = shortopts; + result->hdr.longopts = longopts; + result->hdr.datatype = datatype ? datatype : ""; + result->hdr.glossary = glossary; + result->hdr.mincount = mincount; + result->hdr.maxcount = maxcount; + result->hdr.parent = result; + result->hdr.resetfn = (arg_resetfn *)arg_str_resetfn; + result->hdr.scanfn = (arg_scanfn *)arg_str_scanfn; + result->hdr.checkfn = (arg_checkfn *)arg_str_checkfn; + result->hdr.errorfn = (arg_errorfn *)arg_str_errorfn; + + /* store the sval[maxcount] array immediately after the arg_str struct */ + result->sval = (const char * *)(result + 1); + result->count = 0; + + /* foolproof the string pointers by initialising them to reference empty strings */ + for (i = 0; i < maxcount; i++) + result->sval[i] = ""; + } + + ARG_TRACE(("arg_strn() returns %p\n", result)); + return result; +} +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#include +#include +#include +#include + +#include "argtable3.h" + +static +void arg_register_error(struct arg_end *end, + void *parent, + int error, + const char *argval) +{ + /* printf("arg_register_error(%p,%p,%d,%s)\n",end,parent,error,argval); */ + if (end->count < end->hdr.maxcount) + { + end->error[end->count] = error; + end->parent[end->count] = parent; + end->argval[end->count] = argval; + end->count++; + } + else + { + end->error[end->hdr.maxcount - 1] = ARG_ELIMIT; + end->parent[end->hdr.maxcount - 1] = end; + end->argval[end->hdr.maxcount - 1] = NULL; + } +} + + +/* + * Return index of first table entry with a matching short option + * or -1 if no match was found. + */ +static +int find_shortoption(struct arg_hdr * *table, char shortopt) +{ + int tabindex; + for(tabindex = 0; !(table[tabindex]->flag & ARG_TERMINATOR); tabindex++) + { + if (table[tabindex]->shortopts && + strchr(table[tabindex]->shortopts, shortopt)) + return tabindex; + } + return -1; +} + + +struct longoptions +{ + int getoptval; + int noptions; + struct option *options; +}; + +#if 0 +static +void dump_longoptions(struct longoptions * longoptions) +{ + int i; + printf("getoptval = %d\n", longoptions->getoptval); + printf("noptions = %d\n", longoptions->noptions); + for (i = 0; i < longoptions->noptions; i++) + { + printf("options[%d].name = \"%s\"\n", + i, + longoptions->options[i].name); + printf("options[%d].has_arg = %d\n", i, longoptions->options[i].has_arg); + printf("options[%d].flag = %p\n", i, longoptions->options[i].flag); + printf("options[%d].val = %d\n", i, longoptions->options[i].val); + } +} +#endif + +static +struct longoptions * alloc_longoptions(struct arg_hdr * *table) +{ + struct longoptions *result; + size_t nbytes; + int noptions = 1; + size_t longoptlen = 0; + int tabindex; + + /* + * Determine the total number of option structs required + * by counting the number of comma separated long options + * in all table entries and return the count in noptions. + * note: noptions starts at 1 not 0 because we getoptlong + * requires a NULL option entry to terminate the option array. + * While we are at it, count the number of chars required + * to store private copies of all the longoption strings + * and return that count in logoptlen. + */ + tabindex = 0; + do + { + const char *longopts = table[tabindex]->longopts; + longoptlen += (longopts ? strlen(longopts) : 0) + 1; + while (longopts) + { + noptions++; + longopts = strchr(longopts + 1, ','); + } + } while(!(table[tabindex++]->flag & ARG_TERMINATOR)); + /*printf("%d long options consuming %d chars in total\n",noptions,longoptlen);*/ + + + /* allocate storage for return data structure as: */ + /* (struct longoptions) + (struct options)[noptions] + char[longoptlen] */ + nbytes = sizeof(struct longoptions) + + sizeof(struct option) * noptions + + longoptlen; + result = (struct longoptions *)malloc(nbytes); + if (result) + { + int option_index = 0; + char *store; + + result->getoptval = 0; + result->noptions = noptions; + result->options = (struct option *)(result + 1); + store = (char *)(result->options + noptions); + + for(tabindex = 0; !(table[tabindex]->flag & ARG_TERMINATOR); tabindex++) + { + const char *longopts = table[tabindex]->longopts; + + while(longopts && *longopts) + { + char *storestart = store; + + /* copy progressive longopt strings into the store */ + while (*longopts != 0 && *longopts != ',') + *store++ = *longopts++; + *store++ = 0; + if (*longopts == ',') + longopts++; + /*fprintf(stderr,"storestart=\"%s\"\n",storestart);*/ + + result->options[option_index].name = storestart; + result->options[option_index].flag = &(result->getoptval); + result->options[option_index].val = tabindex; + if (table[tabindex]->flag & ARG_HASOPTVALUE) + result->options[option_index].has_arg = 2; + else if (table[tabindex]->flag & ARG_HASVALUE) + result->options[option_index].has_arg = 1; + else + result->options[option_index].has_arg = 0; + + option_index++; + } + } + /* terminate the options array with a zero-filled entry */ + result->options[option_index].name = 0; + result->options[option_index].has_arg = 0; + result->options[option_index].flag = 0; + result->options[option_index].val = 0; + } + + /*dump_longoptions(result);*/ + return result; +} + +static +char * alloc_shortoptions(struct arg_hdr * *table) +{ + char *result; + size_t len = 2; + int tabindex; + + /* determine the total number of option chars required */ + for(tabindex = 0; !(table[tabindex]->flag & ARG_TERMINATOR); tabindex++) + { + struct arg_hdr *hdr = table[tabindex]; + len += 3 * (hdr->shortopts ? strlen(hdr->shortopts) : 0); + } + + result = malloc(len); + if (result) + { + char *res = result; + + /* add a leading ':' so getopt return codes distinguish */ + /* unrecognised option and options missing argument values */ + *res++ = ':'; + + for(tabindex = 0; !(table[tabindex]->flag & ARG_TERMINATOR); tabindex++) + { + struct arg_hdr *hdr = table[tabindex]; + const char *shortopts = hdr->shortopts; + while(shortopts && *shortopts) + { + *res++ = *shortopts++; + if (hdr->flag & ARG_HASVALUE) + *res++ = ':'; + if (hdr->flag & ARG_HASOPTVALUE) + *res++ = ':'; + } + } + /* null terminate the string */ + *res = 0; + } + + /*printf("alloc_shortoptions() returns \"%s\"\n",(result?result:"NULL"));*/ + return result; +} + + +/* return index of the table terminator entry */ +static +int arg_endindex(struct arg_hdr * *table) +{ + int tabindex = 0; + while (!(table[tabindex]->flag & ARG_TERMINATOR)) + tabindex++; + return tabindex; +} + + +static +void arg_parse_tagged(int argc, + char * *argv, + struct arg_hdr * *table, + struct arg_end *endtable) +{ + struct longoptions *longoptions; + char *shortoptions; + int copt; + + /*printf("arg_parse_tagged(%d,%p,%p,%p)\n",argc,argv,table,endtable);*/ + + /* allocate short and long option arrays for the given opttable[]. */ + /* if the allocs fail then put an error msg in the last table entry. */ + longoptions = alloc_longoptions(table); + shortoptions = alloc_shortoptions(table); + if (!longoptions || !shortoptions) + { + /* one or both memory allocs failed */ + arg_register_error(endtable, endtable, ARG_EMALLOC, NULL); + /* free anything that was allocated (this is null safe) */ + free(shortoptions); + free(longoptions); + return; + } + + /*dump_longoptions(longoptions);*/ + + /* reset getopts internal option-index to zero, and disable error reporting */ + optind = 0; + opterr = 0; + + /* fetch and process args using getopt_long */ + while( (copt = + getopt_long(argc, argv, shortoptions, longoptions->options, + NULL)) != -1) + { + /* + printf("optarg='%s'\n",optarg); + printf("optind=%d\n",optind); + printf("copt=%c\n",(char)copt); + printf("optopt=%c (%d)\n",optopt, (int)(optopt)); + */ + switch(copt) + { + case 0: + { + int tabindex = longoptions->getoptval; + void *parent = table[tabindex]->parent; + /*printf("long option detected from argtable[%d]\n", tabindex);*/ + if (optarg && optarg[0] == 0 && + (table[tabindex]->flag & ARG_HASVALUE)) + { + /* printf(": long option %s requires an argument\n",argv[optind-1]); */ + arg_register_error(endtable, endtable, ARG_EMISSARG, + argv[optind - 1]); + /* continue to scan the (empty) argument value to enforce argument count checking */ + } + if (table[tabindex]->scanfn) + { + int errorcode = table[tabindex]->scanfn(parent, optarg); + if (errorcode != 0) + arg_register_error(endtable, parent, errorcode, optarg); + } + } + break; + + case '?': + /* + * getopt_long() found an unrecognised short option. + * if it was a short option its value is in optopt + * if it was a long option then optopt=0 + */ + switch (optopt) + { + case 0: + /*printf("?0 unrecognised long option %s\n",argv[optind-1]);*/ + arg_register_error(endtable, endtable, ARG_ELONGOPT, + argv[optind - 1]); + break; + default: + /*printf("?* unrecognised short option '%c'\n",optopt);*/ + arg_register_error(endtable, endtable, optopt, NULL); + break; + } + break; + + case ':': + /* + * getopt_long() found an option with its argument missing. + */ + /*printf(": option %s requires an argument\n",argv[optind-1]); */ + arg_register_error(endtable, endtable, ARG_EMISSARG, + argv[optind - 1]); + break; + + default: + { + /* getopt_long() found a valid short option */ + int tabindex = find_shortoption(table, (char)copt); + /*printf("short option detected from argtable[%d]\n", tabindex);*/ + if (tabindex == -1) + { + /* should never get here - but handle it just in case */ + /*printf("unrecognised short option %d\n",copt);*/ + arg_register_error(endtable, endtable, copt, NULL); + } + else + { + if (table[tabindex]->scanfn) + { + void *parent = table[tabindex]->parent; + int errorcode = table[tabindex]->scanfn(parent, optarg); + if (errorcode != 0) + arg_register_error(endtable, parent, errorcode, optarg); + } + } + break; + } + } + } + + free(shortoptions); + free(longoptions); +} + + +static +void arg_parse_untagged(int argc, + char * *argv, + struct arg_hdr * *table, + struct arg_end *endtable) +{ + int tabindex = 0; + int errorlast = 0; + const char *optarglast = NULL; + void *parentlast = NULL; + + /*printf("arg_parse_untagged(%d,%p,%p,%p)\n",argc,argv,table,endtable);*/ + while (!(table[tabindex]->flag & ARG_TERMINATOR)) + { + void *parent; + int errorcode; + + /* if we have exhausted our argv[optind] entries then we have finished */ + if (optind >= argc) + { + /*printf("arg_parse_untagged(): argv[] exhausted\n");*/ + return; + } + + /* skip table entries with non-null long or short options (they are not untagged entries) */ + if (table[tabindex]->longopts || table[tabindex]->shortopts) + { + /*printf("arg_parse_untagged(): skipping argtable[%d] (tagged argument)\n",tabindex);*/ + tabindex++; + continue; + } + + /* skip table entries with NULL scanfn */ + if (!(table[tabindex]->scanfn)) + { + /*printf("arg_parse_untagged(): skipping argtable[%d] (NULL scanfn)\n",tabindex);*/ + tabindex++; + continue; + } + + /* attempt to scan the current argv[optind] with the current */ + /* table[tabindex] entry. If it succeeds then keep it, otherwise */ + /* try again with the next table[] entry. */ + parent = table[tabindex]->parent; + errorcode = table[tabindex]->scanfn(parent, argv[optind]); + if (errorcode == 0) + { + /* success, move onto next argv[optind] but stay with same table[tabindex] */ + /*printf("arg_parse_untagged(): argtable[%d] successfully matched\n",tabindex);*/ + optind++; + + /* clear the last tentative error */ + errorlast = 0; + } + else + { + /* failure, try same argv[optind] with next table[tabindex] entry */ + /*printf("arg_parse_untagged(): argtable[%d] failed match\n",tabindex);*/ + tabindex++; + + /* remember this as a tentative error we may wish to reinstate later */ + errorlast = errorcode; + optarglast = argv[optind]; + parentlast = parent; + } + + } + + /* if a tenative error still remains at this point then register it as a proper error */ + if (errorlast) + { + arg_register_error(endtable, parentlast, errorlast, optarglast); + optind++; + } + + /* only get here when not all argv[] entries were consumed */ + /* register an error for each unused argv[] entry */ + while (optind < argc) + { + /*printf("arg_parse_untagged(): argv[%d]=\"%s\" not consumed\n",optind,argv[optind]);*/ + arg_register_error(endtable, endtable, ARG_ENOMATCH, argv[optind++]); + } + + return; +} + + +static +void arg_parse_check(struct arg_hdr * *table, struct arg_end *endtable) +{ + int tabindex = 0; + /* printf("arg_parse_check()\n"); */ + do + { + if (table[tabindex]->checkfn) + { + void *parent = table[tabindex]->parent; + int errorcode = table[tabindex]->checkfn(parent); + if (errorcode != 0) + arg_register_error(endtable, parent, errorcode, NULL); + } + } while(!(table[tabindex++]->flag & ARG_TERMINATOR)); +} + + +static +void arg_reset(void * *argtable) +{ + struct arg_hdr * *table = (struct arg_hdr * *)argtable; + int tabindex = 0; + /*printf("arg_reset(%p)\n",argtable);*/ + do + { + if (table[tabindex]->resetfn) + table[tabindex]->resetfn(table[tabindex]->parent); + } while(!(table[tabindex++]->flag & ARG_TERMINATOR)); +} + + +int arg_parse(int argc, char * *argv, void * *argtable) +{ + struct arg_hdr * *table = (struct arg_hdr * *)argtable; + struct arg_end *endtable; + int endindex; + char * *argvcopy = NULL; + + /*printf("arg_parse(%d,%p,%p)\n",argc,argv,argtable);*/ + + /* reset any argtable data from previous invocations */ + arg_reset(argtable); + + /* locate the first end-of-table marker within the array */ + endindex = arg_endindex(table); + endtable = (struct arg_end *)table[endindex]; + + /* Special case of argc==0. This can occur on Texas Instruments DSP. */ + /* Failure to trap this case results in an unwanted NULL result from */ + /* the malloc for argvcopy (next code block). */ + if (argc == 0) + { + /* We must still perform post-parse checks despite the absence of command line arguments */ + arg_parse_check(table, endtable); + + /* Now we are finished */ + return endtable->count; + } + + argvcopy = (char **)malloc(sizeof(char *) * (argc + 1)); + if (argvcopy) + { + int i; + + /* + Fill in the local copy of argv[]. We need a local copy + because getopt rearranges argv[] which adversely affects + susbsequent parsing attempts. + */ + for (i = 0; i < argc; i++) + argvcopy[i] = argv[i]; + + argvcopy[argc] = NULL; + + /* parse the command line (local copy) for tagged options */ + arg_parse_tagged(argc, argvcopy, table, endtable); + + /* parse the command line (local copy) for untagged options */ + arg_parse_untagged(argc, argvcopy, table, endtable); + + /* if no errors so far then perform post-parse checks otherwise dont bother */ + if (endtable->count == 0) + arg_parse_check(table, endtable); + + /* release the local copt of argv[] */ + free(argvcopy); + } + else + { + /* memory alloc failed */ + arg_register_error(endtable, endtable, ARG_EMALLOC, NULL); + } + + return endtable->count; +} + + +/* + * Concatenate contents of src[] string onto *pdest[] string. + * The *pdest pointer is altered to point to the end of the + * target string and *pndest is decremented by the same number + * of chars. + * Does not append more than *pndest chars into *pdest[] + * so as to prevent buffer overruns. + * Its something like strncat() but more efficient for repeated + * calls on the same destination string. + * Example of use: + * char dest[30] = "good" + * size_t ndest = sizeof(dest); + * char *pdest = dest; + * arg_char(&pdest,"bye ",&ndest); + * arg_char(&pdest,"cruel ",&ndest); + * arg_char(&pdest,"world!",&ndest); + * Results in: + * dest[] == "goodbye cruel world!" + * ndest == 10 + */ +static +void arg_cat(char * *pdest, const char *src, size_t *pndest) +{ + char *dest = *pdest; + char *end = dest + *pndest; + + /*locate null terminator of dest string */ + while(dest < end && *dest != 0) + dest++; + + /* concat src string to dest string */ + while(dest < end && *src != 0) + *dest++ = *src++; + + /* null terminate dest string */ + *dest = 0; + + /* update *pdest and *pndest */ + *pndest = end - dest; + *pdest = dest; +} + + +static +void arg_cat_option(char *dest, + size_t ndest, + const char *shortopts, + const char *longopts, + const char *datatype, + int optvalue) +{ + if (shortopts) + { + char option[3]; + + /* note: option array[] is initialiazed dynamically here to satisfy */ + /* a deficiency in the watcom compiler wrt static array initializers. */ + option[0] = '-'; + option[1] = shortopts[0]; + option[2] = 0; + + arg_cat(&dest, option, &ndest); + if (datatype) + { + arg_cat(&dest, " ", &ndest); + if (optvalue) + { + arg_cat(&dest, "[", &ndest); + arg_cat(&dest, datatype, &ndest); + arg_cat(&dest, "]", &ndest); + } + else + arg_cat(&dest, datatype, &ndest); + } + } + else if (longopts) + { + size_t ncspn; + + /* add "--" tag prefix */ + arg_cat(&dest, "--", &ndest); + + /* add comma separated option tag */ + ncspn = strcspn(longopts, ","); + strncat(dest, longopts, (ncspn < ndest) ? ncspn : ndest); + + if (datatype) + { + arg_cat(&dest, "=", &ndest); + if (optvalue) + { + arg_cat(&dest, "[", &ndest); + arg_cat(&dest, datatype, &ndest); + arg_cat(&dest, "]", &ndest); + } + else + arg_cat(&dest, datatype, &ndest); + } + } + else if (datatype) + { + if (optvalue) + { + arg_cat(&dest, "[", &ndest); + arg_cat(&dest, datatype, &ndest); + arg_cat(&dest, "]", &ndest); + } + else + arg_cat(&dest, datatype, &ndest); + } +} + +static +void arg_cat_optionv(char *dest, + size_t ndest, + const char *shortopts, + const char *longopts, + const char *datatype, + int optvalue, + const char *separator) +{ + separator = separator ? separator : ""; + + if (shortopts) + { + const char *c = shortopts; + while(*c) + { + /* "-a|-b|-c" */ + char shortopt[3]; + + /* note: shortopt array[] is initialiazed dynamically here to satisfy */ + /* a deficiency in the watcom compiler wrt static array initializers. */ + shortopt[0] = '-'; + shortopt[1] = *c; + shortopt[2] = 0; + + arg_cat(&dest, shortopt, &ndest); + if (*++c) + arg_cat(&dest, separator, &ndest); + } + } + + /* put separator between long opts and short opts */ + if (shortopts && longopts) + arg_cat(&dest, separator, &ndest); + + if (longopts) + { + const char *c = longopts; + while(*c) + { + size_t ncspn; + + /* add "--" tag prefix */ + arg_cat(&dest, "--", &ndest); + + /* add comma separated option tag */ + ncspn = strcspn(c, ","); + strncat(dest, c, (ncspn < ndest) ? ncspn : ndest); + c += ncspn; + + /* add given separator in place of comma */ + if (*c == ',') + { + arg_cat(&dest, separator, &ndest); + c++; + } + } + } + + if (datatype) + { + if (longopts) + arg_cat(&dest, "=", &ndest); + else if (shortopts) + arg_cat(&dest, " ", &ndest); + + if (optvalue) + { + arg_cat(&dest, "[", &ndest); + arg_cat(&dest, datatype, &ndest); + arg_cat(&dest, "]", &ndest); + } + else + arg_cat(&dest, datatype, &ndest); + } +} + + +/* this function should be deprecated because it doesnt consider optional argument values (ARG_HASOPTVALUE) */ +void arg_print_option(FILE *fp, + const char *shortopts, + const char *longopts, + const char *datatype, + const char *suffix) +{ + char syntax[200] = ""; + suffix = suffix ? suffix : ""; + + /* there is no way of passing the proper optvalue for optional argument values here, so we must ignore it */ + arg_cat_optionv(syntax, + sizeof(syntax), + shortopts, + longopts, + datatype, + 0, + "|"); + + fputs(syntax, fp); + fputs(suffix, fp); +} + + +/* + * Print a GNU style [OPTION] string in which all short options that + * do not take argument values are presented in abbreviated form, as + * in: -xvfsd, or -xvf[sd], or [-xvsfd] + */ +static +void arg_print_gnuswitch(FILE *fp, struct arg_hdr * *table) +{ + int tabindex; + char *format1 = " -%c"; + char *format2 = " [-%c"; + char *suffix = ""; + + /* print all mandatory switches that are without argument values */ + for(tabindex = 0; + table[tabindex] && !(table[tabindex]->flag & ARG_TERMINATOR); + tabindex++) + { + /* skip optional options */ + if (table[tabindex]->mincount < 1) + continue; + + /* skip non-short options */ + if (table[tabindex]->shortopts == NULL) + continue; + + /* skip options that take argument values */ + if (table[tabindex]->flag & ARG_HASVALUE) + continue; + + /* print the short option (only the first short option char, ignore multiple choices)*/ + fprintf(fp, format1, table[tabindex]->shortopts[0]); + format1 = "%c"; + format2 = "[%c"; + } + + /* print all optional switches that are without argument values */ + for(tabindex = 0; + table[tabindex] && !(table[tabindex]->flag & ARG_TERMINATOR); + tabindex++) + { + /* skip mandatory args */ + if (table[tabindex]->mincount > 0) + continue; + + /* skip args without short options */ + if (table[tabindex]->shortopts == NULL) + continue; + + /* skip args with values */ + if (table[tabindex]->flag & ARG_HASVALUE) + continue; + + /* print first short option */ + fprintf(fp, format2, table[tabindex]->shortopts[0]); + format2 = "%c"; + suffix = "]"; + } + + fprintf(fp, "%s", suffix); +} + + +void arg_print_syntax(FILE *fp, void * *argtable, const char *suffix) +{ + struct arg_hdr * *table = (struct arg_hdr * *)argtable; + int i, tabindex; + + /* print GNU style [OPTION] string */ + arg_print_gnuswitch(fp, table); + + /* print remaining options in abbreviated style */ + for(tabindex = 0; + table[tabindex] && !(table[tabindex]->flag & ARG_TERMINATOR); + tabindex++) + { + char syntax[200] = ""; + const char *shortopts, *longopts, *datatype; + + /* skip short options without arg values (they were printed by arg_print_gnu_switch) */ + if (table[tabindex]->shortopts && + !(table[tabindex]->flag & ARG_HASVALUE)) + continue; + + shortopts = table[tabindex]->shortopts; + longopts = table[tabindex]->longopts; + datatype = table[tabindex]->datatype; + arg_cat_option(syntax, + sizeof(syntax), + shortopts, + longopts, + datatype, + table[tabindex]->flag & ARG_HASOPTVALUE); + + if (strlen(syntax) > 0) + { + /* print mandatory instances of this option */ + for (i = 0; i < table[tabindex]->mincount; i++) + fprintf(fp, " %s", syntax); + + /* print optional instances enclosed in "[..]" */ + switch ( table[tabindex]->maxcount - table[tabindex]->mincount ) + { + case 0: + break; + case 1: + fprintf(fp, " [%s]", syntax); + break; + case 2: + fprintf(fp, " [%s] [%s]", syntax, syntax); + break; + default: + fprintf(fp, " [%s]...", syntax); + break; + } + } + } + + if (suffix) + fprintf(fp, "%s", suffix); +} + + +void arg_print_syntaxv(FILE *fp, void * *argtable, const char *suffix) +{ + struct arg_hdr * *table = (struct arg_hdr * *)argtable; + int i, tabindex; + + /* print remaining options in abbreviated style */ + for(tabindex = 0; + table[tabindex] && !(table[tabindex]->flag & ARG_TERMINATOR); + tabindex++) + { + char syntax[200] = ""; + const char *shortopts, *longopts, *datatype; + + shortopts = table[tabindex]->shortopts; + longopts = table[tabindex]->longopts; + datatype = table[tabindex]->datatype; + arg_cat_optionv(syntax, + sizeof(syntax), + shortopts, + longopts, + datatype, + table[tabindex]->flag & ARG_HASOPTVALUE, + "|"); + + /* print mandatory options */ + for (i = 0; i < table[tabindex]->mincount; i++) + fprintf(fp, " %s", syntax); + + /* print optional args enclosed in "[..]" */ + switch ( table[tabindex]->maxcount - table[tabindex]->mincount ) + { + case 0: + break; + case 1: + fprintf(fp, " [%s]", syntax); + break; + case 2: + fprintf(fp, " [%s] [%s]", syntax, syntax); + break; + default: + fprintf(fp, " [%s]...", syntax); + break; + } + } + + if (suffix) + fprintf(fp, "%s", suffix); +} + + +void arg_print_glossary(FILE *fp, void * *argtable, const char *format) +{ + struct arg_hdr * *table = (struct arg_hdr * *)argtable; + int tabindex; + + format = format ? format : " %-20s %s\n"; + for (tabindex = 0; !(table[tabindex]->flag & ARG_TERMINATOR); tabindex++) + { + if (table[tabindex]->glossary) + { + char syntax[200] = ""; + const char *shortopts = table[tabindex]->shortopts; + const char *longopts = table[tabindex]->longopts; + const char *datatype = table[tabindex]->datatype; + const char *glossary = table[tabindex]->glossary; + arg_cat_optionv(syntax, + sizeof(syntax), + shortopts, + longopts, + datatype, + table[tabindex]->flag & ARG_HASOPTVALUE, + ", "); + fprintf(fp, format, syntax, glossary); + } + } +} + + +/** + * Print a piece of text formatted, which means in a column with a + * left and a right margin. The lines are wrapped at whitspaces next + * to right margin. The function does not indent the first line, but + * only the following ones. + * + * Example: + * arg_print_formatted( fp, 0, 5, "Some text that doesn't fit." ) + * will result in the following output: + * + * Some + * text + * that + * doesn' + * t fit. + * + * Too long lines will be wrapped in the middle of a word. + * + * arg_print_formatted( fp, 2, 7, "Some text that doesn't fit." ) + * will result in the following output: + * + * Some + * text + * that + * doesn' + * t fit. + * + * As you see, the first line is not indented. This enables output of + * lines, which start in a line where output already happened. + * + * Author: Uli Fouquet + */ +static +void arg_print_formatted( FILE *fp, + const unsigned lmargin, + const unsigned rmargin, + const char *text ) +{ + const unsigned textlen = strlen( text ); + unsigned line_start = 0; + unsigned line_end = textlen + 1; + const unsigned colwidth = (rmargin - lmargin) + 1; + + /* Someone doesn't like us... */ + if ( line_end < line_start ) + { fprintf( fp, "%s\n", text ); } + + while (line_end - 1 > line_start ) + { + /* Eat leading whitespaces. This is essential because while + wrapping lines, there will often be a whitespace at beginning + of line */ + while ( isspace(*(text + line_start)) ) + { line_start++; } + + if ((line_end - line_start) > colwidth ) + { line_end = line_start + colwidth; } + + /* Find last whitespace, that fits into line */ + while ( ( line_end > line_start ) + && ( line_end - line_start > colwidth ) + && !isspace(*(text + line_end))) + { line_end--; } + + /* Do not print trailing whitespace. If this text + has got only one line, line_end now points to the + last char due to initialization. */ + line_end--; + + /* Output line of text */ + while ( line_start < line_end ) + { + fputc(*(text + line_start), fp ); + line_start++; + } + fputc( '\n', fp ); + + /* Initialize another line */ + if ( line_end + 1 < textlen ) + { + unsigned i; + + for (i = 0; i < lmargin; i++ ) + { fputc( ' ', fp ); } + + line_end = textlen; + } + + /* If we have to print another line, get also the last char. */ + line_end++; + + } /* lines of text */ +} + +/** + * Prints the glossary in strict GNU format. + * Differences to arg_print_glossary() are: + * - wraps lines after 80 chars + * - indents lines without shortops + * - does not accept formatstrings + * + * Contributed by Uli Fouquet + */ +void arg_print_glossary_gnu(FILE *fp, void * *argtable ) +{ + struct arg_hdr * *table = (struct arg_hdr * *)argtable; + int tabindex; + + for(tabindex = 0; !(table[tabindex]->flag & ARG_TERMINATOR); tabindex++) + { + if (table[tabindex]->glossary) + { + char syntax[200] = ""; + const char *shortopts = table[tabindex]->shortopts; + const char *longopts = table[tabindex]->longopts; + const char *datatype = table[tabindex]->datatype; + const char *glossary = table[tabindex]->glossary; + + if ( !shortopts && longopts ) + { + /* Indent trailing line by 4 spaces... */ + memset( syntax, ' ', 4 ); + *(syntax + 4) = '\0'; + } + + arg_cat_optionv(syntax, + sizeof(syntax), + shortopts, + longopts, + datatype, + table[tabindex]->flag & ARG_HASOPTVALUE, + ", "); + + /* If syntax fits not into column, print glossary in new line... */ + if ( strlen(syntax) > 25 ) + { + fprintf( fp, " %-25s %s\n", syntax, "" ); + *syntax = '\0'; + } + + fprintf( fp, " %-25s ", syntax ); + arg_print_formatted( fp, 28, 79, glossary ); + } + } /* for each table entry */ + + fputc( '\n', fp ); +} + + +/** + * Checks the argtable[] array for NULL entries and returns 1 + * if any are found, zero otherwise. + */ +int arg_nullcheck(void * *argtable) +{ + struct arg_hdr * *table = (struct arg_hdr * *)argtable; + int tabindex; + /*printf("arg_nullcheck(%p)\n",argtable);*/ + + if (!table) + return 1; + + tabindex = 0; + do + { + /*printf("argtable[%d]=%p\n",tabindex,argtable[tabindex]);*/ + if (!table[tabindex]) + return 1; + } while(!(table[tabindex++]->flag & ARG_TERMINATOR)); + + return 0; +} + + +/* + * arg_free() is deprecated in favour of arg_freetable() due to a flaw in its design. + * The flaw results in memory leak in the (very rare) case that an intermediate + * entry in the argtable array failed its memory allocation while others following + * that entry were still allocated ok. Those subsequent allocations will not be + * deallocated by arg_free(). + * Despite the unlikeliness of the problem occurring, and the even unlikelier event + * that it has any deliterious effect, it is fixed regardless by replacing arg_free() + * with the newer arg_freetable() function. + * We still keep arg_free() for backwards compatibility. + */ +void arg_free(void * *argtable) +{ + struct arg_hdr * *table = (struct arg_hdr * *)argtable; + int tabindex = 0; + int flag; + /*printf("arg_free(%p)\n",argtable);*/ + do + { + /* + if we encounter a NULL entry then somewhat incorrectly we presume + we have come to the end of the array. It isnt strictly true because + an intermediate entry could be NULL with other non-NULL entries to follow. + The subsequent argtable entries would then not be freed as they should. + */ + if (table[tabindex] == NULL) + break; + + flag = table[tabindex]->flag; + free(table[tabindex]); + table[tabindex++] = NULL; + + } while(!(flag & ARG_TERMINATOR)); +} + +/* frees each non-NULL element of argtable[], where n is the size of the number of entries in the array */ +void arg_freetable(void * *argtable, size_t n) +{ + struct arg_hdr * *table = (struct arg_hdr * *)argtable; + size_t tabindex = 0; + /*printf("arg_freetable(%p)\n",argtable);*/ + for (tabindex = 0; tabindex < n; tabindex++) + { + if (table[tabindex] == NULL) + continue; + + free(table[tabindex]); + table[tabindex] = NULL; + }; +} + diff --git a/src/argtable3.h b/src/argtable3.h new file mode 100755 index 0000000..1107de2 --- /dev/null +++ b/src/argtable3.h @@ -0,0 +1,305 @@ +/******************************************************************************* + * This file is part of the argtable3 library. + * + * Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of STEWART HEITMANN nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL STEWART HEITMANN BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +#ifndef ARGTABLE3 +#define ARGTABLE3 + +#include /* FILE */ +#include /* struct tm */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define ARG_REX_ICASE 1 + +/* bit masks for arg_hdr.flag */ +enum +{ + ARG_TERMINATOR=0x1, + ARG_HASVALUE=0x2, + ARG_HASOPTVALUE=0x4 +}; + +typedef void (arg_resetfn)(void *parent); +typedef int (arg_scanfn)(void *parent, const char *argval); +typedef int (arg_checkfn)(void *parent); +typedef void (arg_errorfn)(void *parent, FILE *fp, int error, const char *argval, const char *progname); + + +/* +* The arg_hdr struct defines properties that are common to all arg_xxx structs. +* The argtable library requires each arg_xxx struct to have an arg_hdr +* struct as its first data member. +* The argtable library functions then use this data to identify the +* properties of the command line option, such as its option tags, +* datatype string, and glossary strings, and so on. +* Moreover, the arg_hdr struct contains pointers to custom functions that +* are provided by each arg_xxx struct which perform the tasks of parsing +* that particular arg_xxx arguments, performing post-parse checks, and +* reporting errors. +* These functions are private to the individual arg_xxx source code +* and are the pointer to them are initiliased by that arg_xxx struct's +* constructor function. The user could alter them after construction +* if desired, but the original intention is for them to be set by the +* constructor and left unaltered. +*/ +struct arg_hdr +{ + char flag; /* Modifier flags: ARG_TERMINATOR, ARG_HASVALUE. */ + const char *shortopts; /* String defining the short options */ + const char *longopts; /* String defiing the long options */ + const char *datatype; /* Description of the argument data type */ + const char *glossary; /* Description of the option as shown by arg_print_glossary function */ + int mincount; /* Minimum number of occurences of this option accepted */ + int maxcount; /* Maximum number of occurences if this option accepted */ + void *parent; /* Pointer to parent arg_xxx struct */ + arg_resetfn *resetfn; /* Pointer to parent arg_xxx reset function */ + arg_scanfn *scanfn; /* Pointer to parent arg_xxx scan function */ + arg_checkfn *checkfn; /* Pointer to parent arg_xxx check function */ + arg_errorfn *errorfn; /* Pointer to parent arg_xxx error function */ + void *priv; /* Pointer to private header data for use by arg_xxx functions */ +}; + +struct arg_rem +{ + struct arg_hdr hdr; /* The mandatory argtable header struct */ +}; + +struct arg_lit +{ + struct arg_hdr hdr; /* The mandatory argtable header struct */ + int count; /* Number of matching command line args */ +}; + +struct arg_int +{ + struct arg_hdr hdr; /* The mandatory argtable header struct */ + int count; /* Number of matching command line args */ + int *ival; /* Array of parsed argument values */ +}; + +struct arg_dbl +{ + struct arg_hdr hdr; /* The mandatory argtable header struct */ + int count; /* Number of matching command line args */ + double *dval; /* Array of parsed argument values */ +}; + +struct arg_str +{ + struct arg_hdr hdr; /* The mandatory argtable header struct */ + int count; /* Number of matching command line args */ + const char **sval; /* Array of parsed argument values */ +}; + +struct arg_rex +{ + struct arg_hdr hdr; /* The mandatory argtable header struct */ + int count; /* Number of matching command line args */ + const char **sval; /* Array of parsed argument values */ +}; + +struct arg_file +{ + struct arg_hdr hdr; /* The mandatory argtable header struct */ + int count; /* Number of matching command line args*/ + const char **filename; /* Array of parsed filenames (eg: /home/foo.bar) */ + const char **basename; /* Array of parsed basenames (eg: foo.bar) */ + const char **extension; /* Array of parsed extensions (eg: .bar) */ +}; + +struct arg_date +{ + struct arg_hdr hdr; /* The mandatory argtable header struct */ + const char *format; /* strptime format string used to parse the date */ + int count; /* Number of matching command line args */ + struct tm *tmval; /* Array of parsed time values */ +}; + +enum {ARG_ELIMIT=1, ARG_EMALLOC, ARG_ENOMATCH, ARG_ELONGOPT, ARG_EMISSARG}; +struct arg_end +{ + struct arg_hdr hdr; /* The mandatory argtable header struct */ + int count; /* Number of errors encountered */ + int *error; /* Array of error codes */ + void **parent; /* Array of pointers to offending arg_xxx struct */ + const char **argval; /* Array of pointers to offending argv[] string */ +}; + + +/**** arg_xxx constructor functions *********************************/ + +struct arg_rem* arg_rem(const char* datatype, const char* glossary); + +struct arg_lit* arg_lit0(const char* shortopts, + const char* longopts, + const char* glossary); +struct arg_lit* arg_lit1(const char* shortopts, + const char* longopts, + const char *glossary); +struct arg_lit* arg_litn(const char* shortopts, + const char* longopts, + int mincount, + int maxcount, + const char *glossary); + +struct arg_key* arg_key0(const char* keyword, + int flags, + const char* glossary); +struct arg_key* arg_key1(const char* keyword, + int flags, + const char* glossary); +struct arg_key* arg_keyn(const char* keyword, + int flags, + int mincount, + int maxcount, + const char* glossary); + +struct arg_int* arg_int0(const char* shortopts, + const char* longopts, + const char* datatype, + const char* glossary); +struct arg_int* arg_int1(const char* shortopts, + const char* longopts, + const char* datatype, + const char *glossary); +struct arg_int* arg_intn(const char* shortopts, + const char* longopts, + const char *datatype, + int mincount, + int maxcount, + const char *glossary); + +struct arg_dbl* arg_dbl0(const char* shortopts, + const char* longopts, + const char* datatype, + const char* glossary); +struct arg_dbl* arg_dbl1(const char* shortopts, + const char* longopts, + const char* datatype, + const char *glossary); +struct arg_dbl* arg_dbln(const char* shortopts, + const char* longopts, + const char *datatype, + int mincount, + int maxcount, + const char *glossary); + +struct arg_str* arg_str0(const char* shortopts, + const char* longopts, + const char* datatype, + const char* glossary); +struct arg_str* arg_str1(const char* shortopts, + const char* longopts, + const char* datatype, + const char *glossary); +struct arg_str* arg_strn(const char* shortopts, + const char* longopts, + const char* datatype, + int mincount, + int maxcount, + const char *glossary); + +struct arg_rex* arg_rex0(const char* shortopts, + const char* longopts, + const char* pattern, + const char* datatype, + int flags, + const char* glossary); +struct arg_rex* arg_rex1(const char* shortopts, + const char* longopts, + const char* pattern, + const char* datatype, + int flags, + const char *glossary); +struct arg_rex* arg_rexn(const char* shortopts, + const char* longopts, + const char* pattern, + const char* datatype, + int mincount, + int maxcount, + int flags, + const char *glossary); + +struct arg_file* arg_file0(const char* shortopts, + const char* longopts, + const char* datatype, + const char* glossary); +struct arg_file* arg_file1(const char* shortopts, + const char* longopts, + const char* datatype, + const char *glossary); +struct arg_file* arg_filen(const char* shortopts, + const char* longopts, + const char* datatype, + int mincount, + int maxcount, + const char *glossary); + +struct arg_date* arg_date0(const char* shortopts, + const char* longopts, + const char* format, + const char* datatype, + const char* glossary); +struct arg_date* arg_date1(const char* shortopts, + const char* longopts, + const char* format, + const char* datatype, + const char *glossary); +struct arg_date* arg_daten(const char* shortopts, + const char* longopts, + const char* format, + const char* datatype, + int mincount, + int maxcount, + const char *glossary); + +struct arg_end* arg_end(int maxerrors); + + +/**** other functions *******************************************/ +int arg_nullcheck(void **argtable); +int arg_parse(int argc, char **argv, void **argtable); +void arg_print_option(FILE *fp, const char *shortopts, const char *longopts, const char *datatype, const char *suffix); +void arg_print_syntax(FILE *fp, void **argtable, const char *suffix); +void arg_print_syntaxv(FILE *fp, void **argtable, const char *suffix); +void arg_print_glossary(FILE *fp, void **argtable, const char *format); +void arg_print_glossary_gnu(FILE *fp, void **argtable); +void arg_print_errors(FILE* fp, struct arg_end* end, const char* progname); +void arg_freetable(void **argtable, size_t n); + +/**** deprecated functions, for back-compatibility only ********/ +void arg_free(void **argtable); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/char.c b/src/char.c new file mode 100644 index 0000000..f9d1259 --- /dev/null +++ b/src/char.c @@ -0,0 +1,154 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file char.c + + @brief Character lookup utility functions + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include + +#include "char.h" + + +/// Create this lookup table using char_lookup.c +static unsigned char smart_char_type[256] = { + 16, 0, 0, 0, 0, 0, 0, 0, 0, 1, 16, 0, 0, 16, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 2, 2, 2, 2, 2, + 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, + 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + + +static int CHAR_ALPHANUMERIC = CHAR_ALPHA | CHAR_DIGIT; + +static int CHAR_WHITESPACE_OR_PUNCTUATION = CHAR_WHITESPACE | CHAR_PUNCTUATION; + +static int CHAR_WHITESPACE_OR_LINE_ENDING = CHAR_WHITESPACE | CHAR_LINE_ENDING; + +static int CHAR_WHITESPACE_OR_LINE_ENDING_OR_PUNCTUATION = CHAR_WHITESPACE | CHAR_LINE_ENDING | CHAR_PUNCTUATION; + + +// Is character whitespace? +int char_is_whitespace(char c) { + return smart_char_type[(unsigned char) c] & CHAR_WHITESPACE; +} + +// Is character a newline, return, or EOF? +int char_is_line_ending(char c) { + return smart_char_type[(unsigned char) c] & CHAR_LINE_ENDING; +} + +// Is character part of Windows line ending ('\r\n')? +int char_is_windows_line_ending(char * c) { + if (*c == '\n') + return (*(c - 1) == '\r') ? 1 : 0; + + if (*c == '\r') + return (*(c + 1) == '\n') ? 1 : 0; + + return 0; +} + +#ifdef TEST +void Test_char_is_windows_line_ending(CuTest* tc) { + char * test = "\r\n\n"; + + CuAssertIntEquals(tc, 1, char_is_windows_line_ending(&test[0])); + CuAssertIntEquals(tc, 1, char_is_windows_line_ending(&test[1])); + CuAssertIntEquals(tc, 0, char_is_windows_line_ending(&test[2])); +} +#endif + +// Is character punctuation? +int char_is_punctuation(char c) { + return smart_char_type[(unsigned char) c] & CHAR_PUNCTUATION; +} + +// Is character alpha? +int char_is_alpha(char c) { + return smart_char_type[(unsigned char) c] & CHAR_ALPHA; +} + +// Is character digit? +int char_is_digit(char c) { + return smart_char_type[(unsigned char) c] & CHAR_DIGIT; +} + +// Is character alphanumeric? +int char_is_alphanumeric(char c) { + return smart_char_type[(unsigned char) c] & CHAR_ALPHANUMERIC; +} + +// Is character either whitespace or line ending? +int char_is_whitespace_or_line_ending(char c) { + return smart_char_type[(unsigned char) c] & CHAR_WHITESPACE_OR_LINE_ENDING; +} + +// Is character either whitespace or punctuation? +int char_is_whitespace_or_punctuation(char c) { + return smart_char_type[(unsigned char) c] & CHAR_WHITESPACE_OR_PUNCTUATION; +} + +// Is character either whitespace or line ending or punctuation? +int char_is_whitespace_or_line_ending_or_punctuation(char c) { + return smart_char_type[(unsigned char) c] & CHAR_WHITESPACE_OR_LINE_ENDING_OR_PUNCTUATION; +} diff --git a/src/char.h b/src/char.h new file mode 100644 index 0000000..acd50af --- /dev/null +++ b/src/char.h @@ -0,0 +1,107 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file char.h + + @brief Character lookup utility functions + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +#ifndef CHAR_SMART_STRING_H +#define CHAR_SMART_STRING_H + +#ifdef TEST +#include "CuTest.h" +#endif + +/// Define character types +enum char_types { + CHAR_WHITESPACE = 1 << 0, //!< ' ','\t' + CHAR_PUNCTUATION = 1 << 1, //!< .!?,;:"'`~(){}[]#$%+-=<>&@\/^*_| + CHAR_ALPHA = 1 << 2, //!< a-zA-Z + CHAR_DIGIT = 1 << 3, //!< 0-9 + CHAR_LINE_ENDING = 1 << 4, //!< \n,\r,\0 +}; + + +// Is character whitespace? +int char_is_whitespace(char c); + +// Is character a newline, return, or EOF? +int char_is_line_ending(char c); + +// Is character part of Windows line ending ('\r\n')? +int char_is_windows_line_ending(char * c); + +// Is character punctuation? +int char_is_punctuation(char c); + +// Is character alpha? +int char_is_alpha(char c); + +// Is character digit? +int char_is_digit(char c); + +// Is character alphanumeric? +int char_is_alphanumeric(char c); + +// Is character either whitespace or line ending? +int char_is_whitespace_or_line_ending(char c); + +// Is character either whitespace or punctuation? +int char_is_whitespace_or_punctuation(char c); + +// Is character either whitespace or line ending or punctuation? +int char_is_whitespace_or_line_ending_or_punctuation(char c); + +// Is byte a UTF-8 continuation byte +#define char_is_continuation_byte(x) ((x & 0xC0) == 0x80) + +#endif + diff --git a/src/char_lookup.c b/src/char_lookup.c new file mode 100644 index 0000000..200e682 --- /dev/null +++ b/src/char_lookup.c @@ -0,0 +1,157 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file char_lookup.c + + @brief Create lookup table for char.c + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +#include +#include + +#include "char.h" + +// Shortcuts to assign values to characters in lookup table +#define punctuation(x) table[x] |= CHAR_PUNCTUATION +#define whitespace(x) table[x] |= CHAR_WHITESPACE +#define alpha(x) table[x] |= CHAR_ALPHA +#define digit(x) table[x] |= CHAR_DIGIT +#define line_ending(x) table[x] |= CHAR_LINE_ENDING + + +int main( int argc, char** argv ) { + unsigned char table[256] = {0}; + + // Define punctuation + // TODO: Need to go through extended ASCII codes for + // additional whitespace characters + punctuation('.'); + punctuation('!'); + punctuation('?'); + + punctuation(','); + punctuation(';'); + punctuation(':'); + + punctuation('"'); + punctuation('\''); + punctuation('`'); + punctuation('~'); + + punctuation('('); + punctuation(')'); + punctuation('{'); + punctuation('}'); + punctuation('['); + punctuation(']'); + + punctuation('#'); + punctuation('$'); + punctuation('%'); + punctuation('+'); + punctuation('-'); + punctuation('='); + punctuation('<'); + punctuation('>'); + + punctuation('&'); + punctuation('@'); + punctuation('\\'); + punctuation('/'); + punctuation('^'); + + punctuation('*'); + punctuation('_'); + + punctuation('|'); + + + // Define whitespace + // TODO: Need to go through extended ASCII codes for + // additional whitespace characters + whitespace(' '); + whitespace('\t'); + + + // Define line endings + line_ending('\n'); + line_ending('\r'); + line_ending('\0'); // Count EOF as line ending + + + // Define digits + for (char i = '0'; i <= '9'; ++i) + { + digit(i); + } + + // Define alpha + // TODO: Need to go through extended ASCII codes for + // additional alpha characters + for (char i = 'a'; i <= 'z'; ++i) + { + alpha(i); + } + for (char i = 'A'; i <= 'Z'; ++i) + { + alpha(i); + } + + // Print output as 16 x 16 table + for (int i = 0; i < 16; ++i) + { + for (int j = 0; j < 16; ++j) + { + fprintf(stdout, "%3d,", table[i * 16 + j]); + } + + fprintf(stdout, "\n"); + } +} diff --git a/src/d_string.c b/src/d_string.c index 93d8654..60f12db 100644 --- a/src/d_string.c +++ b/src/d_string.c @@ -1,13 +1,15 @@ /** - Smart String -- Library to abstract smart typing features from MMD Composer + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. @file d_string.c - @brief Dynamic string -- refactoring of old GLibFacade + @brief Dynamic string -- refactoring of old GLibFacade. Provides a string + "object" that can grow to accomodate any size content that is appended. @author Daniel Jalkut, modified by Fletcher T. Penney and Dan Lowe + @bug **/ @@ -15,11 +17,11 @@ /* Copyright © 2011 Daniel Jalkut. - Modifications by Fletcher T. Penney, Copyright © 2011-2016 Fletcher T. Penney. + Modifications by Fletcher T. Penney, Copyright © 2011-2017 Fletcher T. Penney. Modifications by Dan Lowe, Copyright © 2011 Dan Lowe. - The `c-template` project is released under the MIT License. + The `MultiMarkdown 6` project is released under the MIT License.. GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: @@ -54,6 +56,7 @@ */ + #include #include #include @@ -99,12 +102,18 @@ int asprintf( char **sptr, char *fmt, ... ) /* DString */ -#define kStringBufferStartingSize 1024 -#define kStringBufferGrowthMultiplier 2 +#define kStringBufferStartingSize 1024 //!< Default size of string buffer capacity +#define kStringBufferGrowthMultiplier 2 //!< Multiply capacity by this factor when more space is needed +#define kStringBufferMaxIncrement 1024 * 1024 * 100 //!< Maximum growth increment when resizing (to limit exponential growth) -DString* d_string_new(const char *startingString) + +/// Create a new dynamic string +DString* d_string_new(const char * startingString) { DString* newString = malloc(sizeof(DString)); + + if (!newString) + return NULL; if (startingString == NULL) startingString = ""; @@ -116,6 +125,12 @@ DString* d_string_new(const char *startingString) } newString->str = malloc(startingBufferSize); + + if (!newString->str) { + free(newString); + return NULL; + } + newString->currentStringBufferSize = startingBufferSize; strncpy(newString->str, startingString, startingStringSize); newString->str[startingStringSize] = '\0'; @@ -124,7 +139,9 @@ DString* d_string_new(const char *startingString) return newString; } -char* d_string_free(DString* ripString, bool freeCharacterData) + +/// Free dynamic string +char* d_string_free(DString * ripString, bool freeCharacterData) { if (ripString == NULL) return NULL; @@ -144,7 +161,9 @@ char* d_string_free(DString* ripString, bool freeCharacterData) return returnedString; } -static void ensureStringBufferCanHold(DString* baseString, size_t newStringSize) + +/// Ensure that dynamic string has specified capacity +static void ensureStringBufferCanHold(DString * baseString, size_t newStringSize) { size_t newBufferSizeNeeded = newStringSize + 1; if (newBufferSizeNeeded > baseString->currentStringBufferSize) @@ -153,7 +172,11 @@ static void ensureStringBufferCanHold(DString* baseString, size_t newStringSize) while (newBufferSizeNeeded > newBufferSize) { - newBufferSize *= kStringBufferGrowthMultiplier; + if (newBufferSize > kStringBufferMaxIncrement) { + newBufferSize += kStringBufferMaxIncrement; + } else { + newBufferSize *= kStringBufferGrowthMultiplier; + } } char *temp; @@ -161,7 +184,7 @@ static void ensureStringBufferCanHold(DString* baseString, size_t newStringSize) if (temp == NULL) { /* realloc failed */ - fprintf(stderr, "error reallocating memory\n"); + fprintf(stderr, "Error reallocating memory for d_string. Current buffer size %lu.\n",baseString->currentStringBufferSize); exit(1); } @@ -170,11 +193,14 @@ static void ensureStringBufferCanHold(DString* baseString, size_t newStringSize) } } -void d_string_append(DString* baseString, char* appendedString) + +/// Append null-terminated string to end of dynamic string +void d_string_append(DString * baseString, const char * appendedString) { - if ((appendedString != NULL) && (strlen(appendedString) > 0)) + size_t appendedStringLength = strlen(appendedString); + + if ((appendedString != NULL) && (appendedStringLength > 0)) { - size_t appendedStringLength = strlen(appendedString); size_t newStringLength = baseString->currentStringLength + appendedStringLength; ensureStringBufferCanHold(baseString, newStringLength); @@ -184,7 +210,9 @@ void d_string_append(DString* baseString, char* appendedString) } } -void d_string_append_c(DString* baseString, char appendedCharacter) + +/// Append single character to end of dynamic string +void d_string_append_c(DString * baseString, char appendedCharacter) { size_t newSizeNeeded = baseString->currentStringLength + 1; ensureStringBufferCanHold(baseString, newSizeNeeded); @@ -194,7 +222,9 @@ void d_string_append_c(DString* baseString, char appendedCharacter) baseString->str[baseString->currentStringLength] = '\0'; } -void d_string_append_c_array(DString *baseString, const char * appendedChars, size_t bytes) + +/// Append array of characters to end of dynamic string +void d_string_append_c_array(DString * baseString, const char * appendedChars, size_t bytes) { size_t newSizeNeeded = baseString->currentStringLength + bytes; ensureStringBufferCanHold(baseString, newSizeNeeded); @@ -205,7 +235,9 @@ void d_string_append_c_array(DString *baseString, const char * appendedChars, si baseString->str[baseString->currentStringLength] = '\0'; } -void d_string_append_printf(DString* baseString, char* format, ...) + +/// Append to end of dynamic string using format specifier +void d_string_append_printf(DString * baseString, const char * format, ...) { va_list args; va_start(args, format); @@ -220,11 +252,14 @@ void d_string_append_printf(DString* baseString, char* format, ...) va_end(args); } -void d_string_prepend(DString* baseString, char* prependedString) + +/// Prepend null-terminated string to end of dynamic string +void d_string_prepend(DString * baseString, const char * prependedString) { - if ((prependedString != NULL) && (strlen(prependedString) > 0)) + size_t prependedStringLength = strlen(prependedString); + + if ((prependedString != NULL) && (prependedStringLength > 0)) { - size_t prependedStringLength = strlen(prependedString); size_t newStringLength = baseString->currentStringLength + prependedStringLength; ensureStringBufferCanHold(baseString, newStringLength); @@ -235,14 +270,17 @@ void d_string_prepend(DString* baseString, char* prependedString) } } -void d_string_insert(DString* baseString, size_t pos, const char * insertedString) + +/// Insert null-terminated string inside dynamic string +void d_string_insert(DString * baseString, size_t pos, const char * insertedString) { - if ((insertedString != NULL) && (strlen(insertedString) > 0)) + size_t insertedStringLength = strlen(insertedString); + + if ((insertedString != NULL) && (insertedStringLength > 0)) { if (pos > baseString->currentStringLength) pos = baseString->currentStringLength; - size_t insertedStringLength = strlen(insertedString); size_t newStringLength = baseString->currentStringLength + insertedStringLength; ensureStringBufferCanHold(baseString, newStringLength); @@ -254,7 +292,9 @@ void d_string_insert(DString* baseString, size_t pos, const char * insertedStrin } } -void d_string_insert_c(DString* baseString, size_t pos, char insertedCharacter) + +/// Insert single character inside dynamic string +void d_string_insert_c(DString * baseString, size_t pos, char insertedCharacter) { if (pos > baseString->currentStringLength) pos = baseString->currentStringLength; @@ -271,7 +311,8 @@ void d_string_insert_c(DString* baseString, size_t pos, char insertedCharacter) } -void d_string_insert_printf(DString* baseString, size_t pos, char* format, ...) +/// Insert inside dynamic string using format specifier +void d_string_insert_printf(DString * baseString, size_t pos, const char * format, ...) { va_list args; va_start(args, format); @@ -286,7 +327,9 @@ void d_string_insert_printf(DString* baseString, size_t pos, char* format, ...) va_end(args); } -void d_string_erase(DString* baseString, size_t pos, size_t len) + +/// Erase portion of dynamic string +void d_string_erase(DString * baseString, size_t pos, size_t len) { if ((pos > baseString->currentStringLength) || (len <= 0)) return; @@ -300,5 +343,28 @@ void d_string_erase(DString* baseString, size_t pos, size_t len) memmove(baseString->str + pos, baseString->str + pos + len, baseString->currentStringLength - pos - len); baseString->currentStringLength -= len; } + baseString->str[baseString->currentStringLength] = '\0'; } + +/// Copy a portion of dynamic string +char * d_string_copy_substring(DString * d, size_t start, size_t len) { + char * result; + + if (len == -1) { + len = d->currentStringLength - start; + } else { + if (start + len > d->currentStringLength) { + fprintf(stderr, "d_string: Asked to copy invalid substring range.\n"); + fprintf(stderr, "start: %lu len: %lu string: %lu\n", start, len, + d->currentStringLength); + return NULL; + } + } + + result = malloc(len + 1); + strncpy(result, &d->str[start], len); + result[len] = '\0'; + + return result; +} diff --git a/src/d_string.h b/src/d_string.h index 79dfb67..6ac2fbd 100644 --- a/src/d_string.h +++ b/src/d_string.h @@ -1,13 +1,15 @@ /** - Smart String -- Library to abstract smart typing features from MMD Composer + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. @file d_string.h - @brief Dynamic string -- refactoring of old GLibFacade + @brief Dynamic string -- refactoring of old GLibFacade. Provides a string + "object" that can grow to accomodate any size content that is appended. @author Daniel Jalkut, modified by Fletcher T. Penney and Dan Lowe + @bug **/ @@ -15,11 +17,11 @@ /* Copyright © 2011 Daniel Jalkut. - Modifications by Fletcher T. Penney, Copyright © 2011-2016 Fletcher T. Penney. + Modifications by Fletcher T. Penney, Copyright © 2011-2017 Fletcher T. Penney. Modifications by Dan Lowe, Copyright © 2011 Dan Lowe. - The `c-template` project is released under the MIT License. + The `MultiMarkdown 6` project is released under the MIT License.. GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: @@ -59,6 +61,7 @@ #define D_STRING_SMART_STRING_H #include +#include /* WE implement minimal mirror implementations of GLib's GString * sufficient to cover the functionality required by MultiMarkdown. @@ -67,34 +70,104 @@ * GLib function prototype as guide for behavior. */ + +/// Structure for dynamic string typedef struct { - /* Current UTF8 byte stream this string represents */ - char* str; - - /* Where in the str buffer will we add new characters */ - /* or append new strings? */ - unsigned long currentStringBufferSize; - unsigned long currentStringLength; + char * str; //!< Pointer to UTF-8 byte stream for string + unsigned long currentStringBufferSize; //!< Size of buffer currently allocated + unsigned long currentStringLength; //!< Size of current string } DString; -DString* d_string_new(const char *startingString); -char* d_string_free(DString* ripString, bool freeCharacterData); +/// Create a new dynamic string +DString * d_string_new( + const char * startingString //!< Initial contents for string +); + + +/// Free dynamic string +char * d_string_free( + DString * ripString, //!< DString to be freed + bool freeCharacterData //!< Should the underlying str be freed as well? +); + + +/// Append null-terminated string to end of dynamic string +void d_string_append( + DString * baseString, //!< DString to be appended + const char * appendedString //!< String to be appended +); + + +/// Append single character to end of dynamic string +void d_string_append_c( + DString * baseString, //!< DString to be appended + char appendedCharacter //!< Character to append +); + + +/// Append array of characters to end of dynamic string +void d_string_append_c_array( + DString * baseString, //!< DString to be appended + const char * appendedChars, //!< String to be appended + size_t bytes //!< Number of bytes to append +); + + +/// Append to end of dynamic string using format specifier +void d_string_append_printf( + DString * baseString, //!< DString to be appended + const char * format, //!< Format specifier for appending + ... //!< Arguments for format specifier +); + + +/// Prepend null-terminated string to end of dynamic string +void d_string_prepend( + DString * baseString, //!< DString to be appended + const char * prependedString //!< String to be prepended +); + + +/// Insert null-terminated string inside dynamic string +void d_string_insert( + DString * baseString, //!< DString to be appended + size_t pos, //!< Offset at which to insert string + const char * insertedString //!< String to be inserted +); + + +/// Insert single character inside dynamic string +void d_string_insert_c( + DString * baseString, //!< DString to be appended + size_t pos, //!< Offset at which to insert string + char insertedCharacter //!< Character to insert +); -void d_string_append_c(DString* baseString, char appendedCharacter); -void d_string_append_c_array(DString *baseString, const char * appendedChars, size_t bytes); -void d_string_append(DString* baseString, char *appendedString); -void d_string_prepend(DString* baseString, char* prependedString); +/// Insert inside dynamic string using format specifier +void d_string_insert_printf( + DString * baseString, //!< DString to be appended + size_t pos, //!< Offset at which to insert string + const char * format, //!< Format specifier for appending + ... //!< Arguments for format specifier +); -void d_string_append_printf(DString* baseString, char* format, ...); -void d_string_insert(DString* baseString, size_t pos, const char * insertedString); -void d_string_insert_c(DString* baseString, size_t pos, char insertedCharacter); -void d_string_insert_printf(DString* baseString, size_t pos, char* format, ...); +/// Erase portion of dynamic string +void d_string_erase( + DString * baseString, //!< DString to be appended + size_t pos, //!< Offset at which to erase portion of string + size_t len //!< Character to append +); -void d_string_erase(DString* baseString, size_t pos, size_t len); +/// Copy a portion of dynamic string +char * d_string_copy_substring( + DString * d, //!< DString to copy + size_t start, //!< Start position for copy + size_t len //!< How many characters(bytes) to copy +); #endif diff --git a/src/html.c b/src/html.c new file mode 100644 index 0000000..431c119 --- /dev/null +++ b/src/html.c @@ -0,0 +1,1113 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file html.c + + @brief Convert token tree to HTML output. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include +#include +#include + +#include "char.h" +#include "d_string.h" +#include "html.h" +#include "libMultiMarkdown.h" +#include "parser.h" +#include "token.h" +#include "scanners.h" +#include "writer.h" + + +#define LC(x) x + + +#define print(x) d_string_append(out, x) +#define print_char(x) d_string_append_c(out, x) +#define printf(...) d_string_append_printf(out, __VA_ARGS__) +//#define print_token(t) d_string_append_c_array(out, &(source[t->start + offset]), t->len) +#define print_token(t) d_string_append_c_array(out, &(source[t->start]), t->len) +#define print_localized(x) mmd_print_localized_char_html(out, x, scratch) + +// Use Knuth's pseudo random generator to obfuscate email addresses predictably +long ran_num_next(); + +void mmd_print_char_html(DString * out, char c, bool obfuscate) { + switch (c) { + case '"': + print("""); + break; + case '&': + print("&"); + break; + case '<': + print("<"); + break; + case '>': + print(">"); + break; + default: + if (obfuscate && ((int) c == (((int) c) & 127))) { + if (ran_num_next() % 2 == 0) + printf("&#%d;", (int) c); + else + printf("&#x%x;", (unsigned int) c); + } else { + print_char(c); + } + break; + } +} + + +void mmd_print_string_html(DString * out, const char * str, bool obfuscate) { + while (*str != '\0') { + mmd_print_char_html(out, *str, obfuscate); + str++; + } +} + + +void mmd_print_localized_char_html(DString * out, unsigned short type, scratch_pad * scratch) { + // TODO: Is smart typography enabled? Which language? + int language = 0; + + switch (type) { + case DASH_N: + print("–"); + break; + case DASH_M: + print("—"); + break; + case ELLIPSIS: + print("…"); + break; + case APOSTROPHE: + print("’"); + break; + case QUOTE_LEFT_SINGLE: + switch (language) { + case SWEDISH: + print( "’"); + break; + case FRENCH: + print("'"); + break; + case GERMAN: + print("‚"); + break; + case GERMANGUILL: + print("›"); + break; + default: + print("‘"); + } + break; + case QUOTE_RIGHT_SINGLE: + switch (language) { + case GERMAN: + print("‘"); + break; + case GERMANGUILL: + print("‹"); + break; + default: + print("’"); + } + break; + case QUOTE_LEFT_DOUBLE: + switch (language) { + case DUTCH: + case GERMAN: + print("„"); + break; + case GERMANGUILL: + print("»"); + break; + case FRENCH: + print("«"); + break; + case SWEDISH: + print( "”"); + break; + default: + print("“"); + } + break; + case QUOTE_RIGHT_DOUBLE: + switch (language) { + case GERMAN: + print("“"); + break; + case GERMANGUILL: + print("«"); + break; + case FRENCH: + print("»"); + break; + case SWEDISH: + case DUTCH: + default: + print("”"); + } + break; + } +} + + +void mmd_export_link_html(DString * out, const char * source, token * text, link * link, size_t offset, scratch_pad * scratch) { + attr * a = link->attributes; + + if (link->url) { + print("url, false); + print("\""); + } else + print("title && link->title[0] != '\0') { + print(" title=\""); + mmd_print_string_html(out, link->title, false); + print("\""); + } + + while (a) { + print(" "); + print(a->key); + print("=\""); + print(a->value); + print("\""); + a = a->next; + } + + print(">"); + + mmd_export_token_tree_html(out, source, text->child, offset, scratch); + + print(""); +} + + +void mmd_export_image_html(DString * out, const char * source, token * text, link * link, size_t offset, scratch_pad * scratch) { + attr * a = link->attributes; + + if (link->url) + printf("url); + else + print("\"");child); + print("\""); + } + + if (0 && link->label) { + // \todo: Need to decide on approach to id's + char * label = label_from_token(source, link->label); + printf(" id=\"%s\"", label); + free(label); + } + + if (link->title && link->title[0] != '\0') + printf(" title=\"%s\"", link->title); + + while (a) { + print(" "); + print(a->key); + print("=\""); + print(a->value); + print("\""); + a = a->next; + } + + print(" />"); +} + + +void mmd_export_token_html(DString * out, const char * source, token * t, size_t offset, scratch_pad * scratch) { + if (t == NULL) + return; + + short temp_short; + link * temp_link = NULL; + char * temp_char = NULL; + bool temp_bool = 0; + token * temp_token = NULL; + + switch (t->type) { + case AMPERSAND: + case AMPERSAND_LONG: + print("&"); + break; + case ANGLE_LEFT: + print("<"); + break; + case ANGLE_RIGHT: + print(">"); + break; + case APOSTROPHE: + if (!(scratch->extensions & EXT_SMART)) { + print_token(t); + } else { + print_localized(APOSTROPHE); + } + break; + case BACKTICK: + if (t->mate == NULL) + print_token(t); + else if (t->mate->type == QUOTE_RIGHT_ALT) + if (!(scratch->extensions & EXT_SMART)) { + print_token(t); + } else { + print_localized(QUOTE_LEFT_DOUBLE); + } + else if (t->start < t->mate->start) { + print(""); + } else { + print(""); + } + break; + case BLOCK_BLOCKQUOTE: + pad(out, 2, scratch); + print("
\n"); + scratch->padded = 2; + mmd_export_token_tree_html(out, source, t->child, t->start + offset, scratch); + pad(out, 1, scratch); + print("
"); + scratch->padded = 0; + break; + case BLOCK_CODE_FENCED: + case BLOCK_CODE_INDENTED: + pad(out, 2, scratch); + print("
");
+			mmd_export_token_tree_html_raw(out, source, t->child, t->start + offset, scratch);
+			print("
"); + scratch->padded = 0; + break; + case BLOCK_EMPTY: + break; + case BLOCK_H1: + case BLOCK_H2: + case BLOCK_H3: + case BLOCK_H4: + case BLOCK_H5: + case BLOCK_H6: + pad(out, 2, scratch); + temp_short = t->type - BLOCK_H1 + 1; + if (scratch->extensions & EXT_NO_LABELS) { + printf("", temp_short); + } else { + temp_char = label_from_token(source, t); + printf("", temp_short, temp_char); + free(temp_char); + } + mmd_export_token_tree_html(out, source, t->child, t->start + offset, scratch); + printf("", temp_short); + scratch->padded = 0; + break; + case BLOCK_HR: + pad(out, 2, scratch); + print("
"); + scratch->padded = 0; + break; + case BLOCK_HTML: + pad(out, 2, scratch); + print_token_raw(out, source, t); + scratch->padded = 1; + break; + case BLOCK_LIST_BULLETED_LOOSE: + case BLOCK_LIST_BULLETED: + temp_short = scratch->list_is_tight; + switch (t->type) { + case BLOCK_LIST_BULLETED_LOOSE: + scratch->list_is_tight = false; + break; + case BLOCK_LIST_BULLETED: + scratch->list_is_tight = true; + break; + } + pad(out, 2, scratch); + print("
    "); + scratch->padded = 0; + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + pad(out, 1, scratch); + print("
"); + scratch->padded = 0; + scratch->list_is_tight = temp_short; + break; + case BLOCK_LIST_ENUMERATED_LOOSE: + case BLOCK_LIST_ENUMERATED: + temp_short = scratch->list_is_tight; + switch (t->type) { + case BLOCK_LIST_ENUMERATED_LOOSE: + scratch->list_is_tight = false; + break; + case BLOCK_LIST_ENUMERATED: + scratch->list_is_tight = true; + break; + } + pad(out, 2, scratch); + print("
    "); + scratch->padded = 0; + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + pad(out, 1, scratch); + print("
"); + scratch->padded = 0; + scratch->list_is_tight = temp_short; + break; + case BLOCK_LIST_ITEM: + pad(out, 1, scratch); + print("
  • "); + scratch->padded = 2; + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + print("
  • "); + scratch->padded = 0; + break; + case BLOCK_LIST_ITEM_TIGHT: + pad(out, 1, scratch); + print("
  • "); + + if (!scratch->list_is_tight) + print("

    "); + + scratch->padded = 2; + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + + if (!scratch->list_is_tight) + print("

    "); + + print("
  • "); + scratch->padded = 0; + break; + case BLOCK_PARA: + case BLOCK_DEF_CITATION: + case BLOCK_DEF_FOOTNOTE: + pad(out, 2, scratch); + + if (!scratch->list_is_tight) + print("

    "); + + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + + if (scratch->footnote_being_printed) { + scratch->footnote_para_counter--; + + if (scratch->footnote_para_counter == 0) { + printf("  ↩", scratch->footnote_being_printed, LC("return to body")); + } + } + + if (scratch->citation_being_printed) { + scratch->footnote_para_counter--; + + if (scratch->footnote_para_counter == 0) { + printf("  ↩", scratch->citation_being_printed, LC("return to body")); + } + } + + if (!scratch->list_is_tight) + print("

    "); + scratch->padded = 0; + break; + case BRACE_DOUBLE_LEFT: + print("{{"); + break; + case BRACE_DOUBLE_RIGHT: + print("}}"); + break; + case BRACKET_LEFT: + print("["); + break; + case BRACKET_CITATION_LEFT: + print("[#"); + break; + case BRACKET_FOOTNOTE_LEFT: + print("[^"); + break; + case BRACKET_IMAGE_LEFT: + print("!["); + break; + case BRACKET_VARIABLE_LEFT: + print("[\%"); + break; + case BRACKET_RIGHT: + print("]"); + break; + case COLON: + print(":"); + break; + case CRITIC_ADD_OPEN: + print("{++"); + break; + case CRITIC_ADD_CLOSE: + print("++}"); + break; + case CRITIC_COM_OPEN: + print("{>>"); + break; + case CRITIC_COM_CLOSE: + print("<<}"); + break; + case CRITIC_DEL_OPEN: + print("{--"); + break; + case CRITIC_DEL_CLOSE: + print("--}"); + break; + case CRITIC_HI_OPEN: + print("{=="); + break; + case CRITIC_HI_CLOSE: + print("==}"); + break; + case CRITIC_SUB_OPEN: + print("{~~"); + break; + case CRITIC_SUB_DIV: + print("~>"); + break; + case CRITIC_SUB_CLOSE: + print("~~}"); + break; + case DASH_M: + if (!(scratch->extensions & EXT_SMART)) { + print_token(t); + } else { + print_localized(DASH_M); + } + break; + case DASH_N: + if (!(scratch->extensions & EXT_SMART)) { + print_token(t); + } else { + print_localized(DASH_N); + } + break; + case DOC_START_TOKEN: + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + break; + case ELLIPSIS: + if (!(scratch->extensions & EXT_SMART)) { + print_token(t); + } else { + print_localized(ELLIPSIS); + } + break; + case EMPH_START: + print(""); + break; + case EMPH_STOP: + print(""); + break; + case ESCAPED_CHARACTER: + mmd_print_char_html(out, source[t->start + 1], false); + break; + case HASH1: + case HASH2: + case HASH3: + case HASH4: + case HASH5: + case HASH6: + print_token(t); + break; + case INDENT_SPACE: + print_char(' '); + break; + case INDENT_TAB: + print_char('\t'); + break; + case LINE_LIST_BULLETED: + case LINE_LIST_ENUMERATED: + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + break; + case MARKER_BLOCKQUOTE: + case MARKER_H1: + case MARKER_H2: + case MARKER_H3: + case MARKER_H4: + case MARKER_H5: + case MARKER_H6: + break; + case MARKER_LIST_BULLET: + case MARKER_LIST_ENUMERATOR: + break; + case MATH_BRACKET_OPEN: + if (t->mate) { + print("\\["); + } else + print("\\["); + break; + case MATH_BRACKET_CLOSE: + if (t->mate) { + print("\\]"); + } else + print("\\]"); + break; + case MATH_DOLLAR_SINGLE: + if (t->mate) { + (t->start < t->mate->start) ? ( print("\\(") ) : ( print("\\)") ); + } else { + print("$"); + } + break; + case MATH_DOLLAR_DOUBLE: + if (t->mate) { + (t->start < t->mate->start) ? ( print("\\[") ) : ( print("\\]") ); + } else { + print("$$"); + } + break; + case MATH_PAREN_OPEN: + if (t->mate) { + print("\\("); + } else + print("\\("); + break; + case MATH_PAREN_CLOSE: + if (t->mate) { + print("\\)"); + } else + print("\\)"); + break; + case NON_INDENT_SPACE: + print_char(' '); + break; + case PAIR_BACKTICK: + // Strip leading whitespace + switch (t->child->next->type) { + case TEXT_NL: + case INDENT_TAB: + case INDENT_SPACE: + case NON_INDENT_SPACE: + t->child->next->type = TEXT_EMPTY; + break; + case TEXT_PLAIN: + while (t->child->next->len && char_is_whitespace(source[t->child->next->start])) { + t->child->next->start++; + t->child->next->len--; + } + break; + } + + // Strip trailing whitespace + switch (t->child->mate->prev->type) { + case TEXT_NL: + case INDENT_TAB: + case INDENT_SPACE: + case NON_INDENT_SPACE: + t->child->mate->prev->type = TEXT_EMPTY; + break; + case TEXT_PLAIN: + while (t->child->mate->prev->len && char_is_whitespace(source[t->child->mate->prev->start + t->child->mate->prev->len - 1])) { + t->child->mate->prev->len--; + } + break; + } + t->child->type = TEXT_EMPTY; + t->child->mate->type = TEXT_EMPTY; + print(""); + mmd_export_token_tree_html_raw(out, source, t->child, offset, scratch); + print(""); + break; + case PAIR_ANGLE: + temp_token = t; + + temp_char = url_accept(source, &temp_token, true); + + if (temp_char) { + if (scan_email(temp_char)) + temp_bool = true; + else + temp_bool = false; + print(""); + mmd_print_string_html(out, temp_char, temp_bool); + print(""); + } else if (scan_html(&source[t->start])) { + print_token(t); + } else { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } + + free(temp_char); + break; + case PAIR_BRACES: + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + break; + case PAIR_BRACKET: + case PAIR_BRACKET_IMAGE: + parse_brackets(source, scratch, t, &temp_link, &temp_short, &temp_bool); + + if (temp_link) { + if (t->type == PAIR_BRACKET) { + // Link + mmd_export_link_html(out, source, t, temp_link, offset, scratch); + } else { + // Image + mmd_export_image_html(out, source, t, temp_link, offset, scratch); + } + + if (temp_bool) { + link_free(temp_link); + } + + scratch->skip_token = temp_short; + + return; + } + + // No links exist, so treat as normal + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + break; + case PAIR_BRACKET_CITATION: + if (scratch->extensions & EXT_NOTES) { + citation_from_bracket(source, scratch, t, &temp_short); + + if (temp_short < scratch->used_citations->size) { + // Re-using previous citation + printf("[%d]", + temp_short, LC("see citation"), temp_short); + } else { + // This is a new citation + printf("[%d]", + temp_short, temp_short, LC("see citation"), temp_short); + } + } else { + // Footnotes disabled + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } + break; + case PAIR_BRACKET_FOOTNOTE: + if (scratch->extensions & EXT_NOTES) { + footnote_from_bracket(source, scratch, t, &temp_short); + + if (temp_short < scratch->used_footnotes->size) { + // Re-using previous footnote + printf("[%d]", + temp_short, LC("see footnote"), temp_short); + } else { + // This is a new footnote + printf("[%d]", + temp_short, temp_short, LC("see footnote"), temp_short); + } + } else { + // Footnotes disabled + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } + break; + case PAIR_CRITIC_ADD: + // Ignore if we're rejecting + if (scratch->extensions & EXT_CRITIC_REJECT) + break; + if (scratch->extensions & EXT_CRITIC) { + t->child->type = TEXT_EMPTY; + t->child->mate->type = TEXT_EMPTY; + if (scratch->extensions & EXT_CRITIC_ACCEPT) { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } else { + print(""); + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + print(""); + } + } else { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } + break; + case PAIR_CRITIC_DEL: + // Ignore if we're accepting + if (scratch->extensions & EXT_CRITIC_ACCEPT) + break; + if (scratch->extensions & EXT_CRITIC) { + t->child->type = TEXT_EMPTY; + t->child->mate->type = TEXT_EMPTY; + if (scratch->extensions & EXT_CRITIC_REJECT) { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } else { + print(""); + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + print(""); + } + } else { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } + break; + case PAIR_CRITIC_COM: + // Ignore if we're rejecting or accepting + if ((scratch->extensions & EXT_CRITIC_REJECT) || + (scratch->extensions & EXT_CRITIC_ACCEPT)) + break; + if (scratch->extensions & EXT_CRITIC) { + t->child->type = TEXT_EMPTY; + t->child->mate->type = TEXT_EMPTY; + print(""); + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + print(""); + } else { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } + break; + case PAIR_CRITIC_HI: + // Ignore if we're rejecting or accepting + if ((scratch->extensions & EXT_CRITIC_REJECT) || + (scratch->extensions & EXT_CRITIC_ACCEPT)) + break; + if (scratch->extensions & EXT_CRITIC) { + t->child->type = TEXT_EMPTY; + t->child->mate->type = TEXT_EMPTY; + print(""); + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + print(""); + } else { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } + break; + case CRITIC_SUB_DIV_A: + print("~"); + break; + case CRITIC_SUB_DIV_B: + print(">"); + break; + case PAIR_CRITIC_SUB_DEL: + if ((scratch->extensions & EXT_CRITIC) && + (t->next->type == PAIR_CRITIC_SUB_ADD)) { + t->child->type = TEXT_EMPTY; + t->child->mate->type = TEXT_EMPTY; + if (scratch->extensions & EXT_CRITIC_ACCEPT) { + + } else if (scratch->extensions & EXT_CRITIC_REJECT) { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } else { + print(""); + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + print(""); + } + } else { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } + break; + case PAIR_CRITIC_SUB_ADD: + if ((scratch->extensions & EXT_CRITIC) && + (t->prev->type == PAIR_CRITIC_SUB_DEL)) { + t->child->type = TEXT_EMPTY; + t->child->mate->type = TEXT_EMPTY; + if (scratch->extensions & EXT_CRITIC_REJECT) { + + } else if (scratch->extensions & EXT_CRITIC_ACCEPT) { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } else { + print(""); + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + print(""); + } + } else { + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + } + break; + case PAIR_MATH: + case PAIR_PAREN: + case PAIR_QUOTE_DOUBLE: + case PAIR_QUOTE_SINGLE: + case PAIR_STAR: + case PAIR_UL: + mmd_export_token_tree_html(out, source, t->child, offset, scratch); + break; + case PAREN_LEFT: + print("("); + break; + case PAREN_RIGHT: + print(")"); + break; + case PIPE: + print_token(t); + break; + case PLUS: + print_token(t); + break; + case QUOTE_SINGLE: + if ((t->mate == NULL) || (!(scratch->extensions & EXT_SMART))) + print("'"); + else + (t->start < t->mate->start) ? ( print_localized(QUOTE_LEFT_SINGLE) ) : ( print_localized(QUOTE_RIGHT_SINGLE) ); + break; + case QUOTE_DOUBLE: + if ((t->mate == NULL) || (!(scratch->extensions & EXT_SMART))) + print("""); + else + (t->start < t->mate->start) ? ( print_localized(QUOTE_LEFT_DOUBLE) ) : ( print_localized(QUOTE_RIGHT_DOUBLE) ); + break; + case QUOTE_RIGHT_ALT: + if ((t->mate == NULL) || (!(scratch->extensions & EXT_SMART))) + print("''"); + else + print_localized(QUOTE_RIGHT_DOUBLE); + break; + case STAR: + print_token(t); + break; + case STRONG_START: + print(""); + break; + case STRONG_STOP: + print(""); + break; + case SUBSCRIPT: + if (t->mate) { + (t->start < t->mate->start) ? (print("")) : (print("")); + } else if (t->len != 1) { + print(""); + mmd_export_token_html(out, source, t->child, offset, scratch); + print(""); + } else { + print("~"); + } + break; + case SUPERSCRIPT: + if (t->mate) { + (t->start < t->mate->start) ? (print("")) : (print("")); + } else if (t->len != 1) { + print(""); + mmd_export_token_html(out, source, t->child, offset, scratch); + print(""); + } else { + print("^"); + } + break; + case TEXT_LINEBREAK: + if (t->next) { + print("
    \n"); + scratch->padded = 1; + } + break; + case CODE_FENCE: + case TEXT_EMPTY: + break; + case TEXT_NL: + if (t->next) + print_char('\n'); + break; + case TEXT_NUMBER_POSS_LIST: + case TEXT_PERIOD: + case TEXT_PLAIN: + print_token(t); + break; + case UL: + print_token(t); + break; + default: + fprintf(stderr, "Unknown token type: %d\n", t->type); + break; + } +} + + +void mmd_export_token_tree_html(DString * out, const char * source, token * t, size_t offset, scratch_pad * scratch) { + while (t != NULL) { + if (scratch->skip_token) { + scratch->skip_token--; + } else { + mmd_export_token_html(out, source, t, offset, scratch); + } + + t = t->next; + } +} + + +void mmd_export_token_html_raw(DString * out, const char * source, token * t, size_t offset, scratch_pad * scratch) { + if (t == NULL) + return; + + switch (t->type) { + case BACKTICK: + print_token(t); + break; + case AMPERSAND: + print("&"); + break; + case AMPERSAND_LONG: + print("&amp;"); + break; + case ANGLE_RIGHT: + print(">"); + break; + case ANGLE_LEFT: + print("<"); + break; + case ESCAPED_CHARACTER: + print("\\"); + mmd_print_char_html(out, source[t->start + 1], false); + break; + case QUOTE_DOUBLE: + print("""); + break; + case CODE_FENCE: + t->next->type = TEXT_EMPTY; + case TEXT_EMPTY: + break; + default: + if (t->child) + mmd_export_token_tree_html_raw(out, source, t->child, offset, scratch); + else + print_token(t); + break; + } +} + + +void mmd_export_token_tree_html_raw(DString * out, const char * source, token * t, size_t offset, scratch_pad * scratch) { + while (t != NULL) { + if (scratch->skip_token) { + scratch->skip_token--; + } else { + mmd_export_token_html_raw(out, source, t, offset, scratch); + } + + t = t->next; + } +} + + +void mmd_export_footnote_list_html(DString * out, const char * source, scratch_pad * scratch) { + if (scratch->used_footnotes->size > 0) { + footnote * note; + token * content; + + pad(out, 2, scratch); + print("
    \n
    \n
      "); + scratch->padded = 0; + + for (int i = 0; i < scratch->used_footnotes->size; ++i) + { + // Export footnote + pad(out, 2, scratch); + + printf("
    1. \n", i + 1); + scratch->padded = 6; + + note = stack_peek_index(scratch->used_footnotes, i); + content = note->content; + + scratch->footnote_para_counter = 0; + + // We need to know which block is the last one in the footnote + while(content) { + if (content->type == BLOCK_PARA) + scratch->footnote_para_counter++; + + content = content->next; + } + + content = note->content; + scratch->footnote_being_printed = i + 1; + + mmd_export_token_tree_html(out, source, content, 0, scratch); + + pad(out, 1, scratch); + printf("
    2. "); + scratch->padded = 0; + } + + pad(out, 2, scratch); + print("
    \n
    "); + scratch->padded = 0; + } +} + + +void mmd_export_citation_list_html(DString * out, const char * source, scratch_pad * scratch) { + if (scratch->used_citations->size > 0) { + footnote * note; + token * content; + + pad(out, 2, scratch); + print("
    \n
    \n
      "); + scratch->padded = 0; + + for (int i = 0; i < scratch->used_citations->size; ++i) + { + // Export footnote + pad(out, 2, scratch); + + printf("
    1. \n", i + 1); + scratch->padded = 6; + + note = stack_peek_index(scratch->used_citations, i); + content = note->content; + + scratch->footnote_para_counter = 0; + + // We need to know which block is the last one in the footnote + while(content) { + if (content->type == BLOCK_PARA) + scratch->footnote_para_counter++; + + content = content->next; + } + + content = note->content; + scratch->citation_being_printed = i + 1; + + mmd_export_token_tree_html(out, source, content, 0, scratch); + + pad(out, 1, scratch); + printf("
    2. "); + scratch->padded = 0; + } + + pad(out, 2, scratch); + print("
    \n
    "); + scratch->padded = 0; + } +} + + + diff --git a/src/html.h b/src/html.h new file mode 100644 index 0000000..7caf98e --- /dev/null +++ b/src/html.h @@ -0,0 +1,74 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file html.h + + @brief Convert token tree to HTML output. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +#ifndef HTML_MULTIMARKDOWN_H +#define HTML_MULTIMARKDOWN_H + +#include "d_string.h" +#include "token.h" +#include "writer.h" + +void mmd_export_token_html(DString * out, const char * source, token * t, size_t offset, scratch_pad * scratch); +void mmd_export_token_tree_html(DString * out, const char * source, token * t, size_t offset, scratch_pad * scratch); + + +void mmd_export_token_html_raw(DString * out, const char * source, token * t, size_t offset, scratch_pad * scratch); +void mmd_export_token_tree_html_raw(DString * out, const char * source, token * t, size_t offset, scratch_pad * scratch); + +void mmd_export_citation_list_html(DString * out, const char * source, scratch_pad * scratch); +void mmd_export_footnote_list_html(DString * out, const char * source, scratch_pad * scratch); + + +#endif diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..2ab5080 --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,988 @@ +/* Generated by re2c 0.14.3 on Wed Jan 18 22:23:18 2017 */ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file lexer.re + + @brief Description of the regular expressions used to define tokens, + used by re2c to create a lexer/tokenizer. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include "lexer.h" +#include "libMultiMarkdown.h" +#include "parser.h" + + +// Basic scanner struct + +#define YYCTYPE char +#define YYCURSOR s->cur +#define YYMARKER s->ptr +#define YYCTXMARKER s->ctx + + +int scan(Scanner * s, const char * stop) { + + scan: + + if (s->cur >= stop) { + return 0; + } + + s->start = s->cur; + + +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + + yych = *YYCURSOR; + switch (yych) { + case '\t': goto yy39; + case '\n': goto yy44; + case '\r': goto yy46; + case ' ': goto yy41; + case '!': goto yy17; + case '"': goto yy25; + case '#': goto yy42; + case '$': goto yy35; + case '&': goto yy32; + case '\'': goto yy27; + case '(': goto yy18; + case ')': goto yy20; + case '*': goto yy47; + case '+': goto yy4; + case '-': goto yy6; + case '.': goto yy29; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy43; + case ':': goto yy30; + case '<': goto yy8; + case '=': goto yy12; + case '>': goto yy22; + case '[': goto yy13; + case '\\': goto yy34; + case ']': goto yy15; + case '^': goto yy37; + case '_': goto yy49; + case '`': goto yy51; + case '{': goto yy2; + case '|': goto yy53; + case '}': goto yy24; + case '~': goto yy10; + default: goto yy55; + } +yy2: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '+': goto yy248; + case '-': goto yy247; + case '=': goto yy244; + case '>': goto yy246; + case '{': goto yy242; + case '~': goto yy245; + default: goto yy3; + } +yy3: + { goto scan; } +yy4: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '+': goto yy239; + default: goto yy5; + } +yy5: + { return PLUS; } +yy6: + ++YYCURSOR; + switch ((yych = *YYCURSOR)) { + case '-': goto yy233; + default: goto yy7; + } +yy7: + { return DASH_N; } +yy8: + yyaccept = 2; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '<': goto yy230; + default: goto yy9; + } +yy9: + { return ANGLE_LEFT; } +yy10: + yyaccept = 3; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '>': goto yy226; + case '~': goto yy225; + default: goto yy11; + } +yy11: + { return SUBSCRIPT; } +yy12: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '=': goto yy222; + default: goto yy3; + } +yy13: + ++YYCURSOR; + switch ((yych = *YYCURSOR)) { + case '#': goto yy218; + case '%': goto yy216; + case '^': goto yy220; + default: goto yy14; + } +yy14: + { return BRACKET_LEFT; } +yy15: + ++YYCURSOR; + { return BRACKET_RIGHT; } +yy17: + yych = *++YYCURSOR; + switch (yych) { + case '[': goto yy214; + default: goto yy3; + } +yy18: + ++YYCURSOR; + { return PAREN_LEFT; } +yy20: + ++YYCURSOR; + { return PAREN_RIGHT; } +yy22: + ++YYCURSOR; + { return ANGLE_RIGHT; } +yy24: + yych = *++YYCURSOR; + switch (yych) { + case '}': goto yy212; + default: goto yy3; + } +yy25: + ++YYCURSOR; + { return QUOTE_DOUBLE; } +yy27: + ++YYCURSOR; + switch ((yych = *YYCURSOR)) { + case '\'': goto yy210; + default: goto yy28; + } +yy28: + { return QUOTE_SINGLE; } +yy29: + YYCTXMARKER = YYCURSOR + 1; + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '\t': goto yy200; + case '\n': goto yy197; + case '\r': goto yy199; + case ' ': goto yy202; + case '.': goto yy203; + default: goto yy3; + } +yy30: + ++YYCURSOR; + { return COLON; } +yy32: + yyaccept = 4; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'A': + case 'a': goto yy192; + default: goto yy33; + } +yy33: + { return AMPERSAND; } +yy34: + yych = *++YYCURSOR; + switch (yych) { + case '!': goto yy180; + case '"': goto yy170; + case '#': goto yy150; + case '$': goto yy148; + case '%': goto yy146; + case '&': goto yy134; + case '\'': goto yy168; + case '(': goto yy162; + case ')': goto yy160; + case '*': goto yy126; + case '+': goto yy144; + case ',': goto yy176; + case '-': goto yy142; + case '.': goto yy182; + case '/': goto yy130; + case ':': goto yy172; + case ';': goto yy174; + case '<': goto yy138; + case '=': goto yy140; + case '>': goto yy136; + case '?': goto yy178; + case '@': goto yy132; + case '[': goto yy154; + case '\\': goto yy120; + case ']': goto yy152; + case '^': goto yy128; + case '_': goto yy124; + case '`': goto yy166; + case '{': goto yy158; + case '|': goto yy122; + case '}': goto yy156; + case '~': goto yy164; + default: goto yy3; + } +yy35: + ++YYCURSOR; + switch ((yych = *YYCURSOR)) { + case '$': goto yy118; + default: goto yy36; + } +yy36: + { return MATH_DOLLAR_SINGLE; } +yy37: + ++YYCURSOR; + { return SUPERSCRIPT; } +yy39: + ++YYCURSOR; + { return INDENT_TAB; } +yy41: + yych = *++YYCURSOR; + switch (yych) { + case ' ': goto yy110; + default: goto yy3; + } +yy42: + YYCTXMARKER = YYCURSOR + 1; + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '\t': + case '\n': + case '\r': + case ' ': goto yy71; + case '#': goto yy69; + default: goto yy3; + } +yy43: + YYCTXMARKER = YYCURSOR + 1; + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '.': goto yy60; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy62; + default: goto yy3; + } +yy44: + ++YYCURSOR; +yy45: + { return TEXT_NL; } +yy46: + yych = *++YYCURSOR; + switch (yych) { + case '\n': goto yy44; + default: goto yy45; + } +yy47: + ++YYCURSOR; + { return STAR; } +yy49: + ++YYCURSOR; + { return UL; } +yy51: + ++YYCURSOR; + yych = *YYCURSOR; + goto yy59; +yy52: + { return BACKTICK; } +yy53: + ++YYCURSOR; + yych = *YYCURSOR; + goto yy57; +yy54: + { return PIPE; } +yy55: + yych = *++YYCURSOR; + goto yy3; +yy56: + ++YYCURSOR; + yych = *YYCURSOR; +yy57: + switch (yych) { + case '|': goto yy56; + default: goto yy54; + } +yy58: + ++YYCURSOR; + yych = *YYCURSOR; +yy59: + switch (yych) { + case '`': goto yy58; + default: goto yy52; + } +yy60: + yych = *++YYCURSOR; + switch (yych) { + case '\t': + case ' ': goto yy67; + case '\n': goto yy64; + case '\r': goto yy66; + default: goto yy61; + } +yy61: + YYCURSOR = YYMARKER; + switch (yyaccept) { + case 0: goto yy3; + case 1: goto yy5; + case 2: goto yy9; + case 3: goto yy11; + case 4: goto yy33; + default: goto yy198; + } +yy62: + YYCTXMARKER = YYCURSOR + 1; + ++YYCURSOR; + yych = *YYCURSOR; + switch (yych) { + case '.': goto yy60; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy62; + default: goto yy61; + } +yy64: + ++YYCURSOR; +yy65: + YYCURSOR = YYCTXMARKER; + { return TEXT_NUMBER_POSS_LIST; } +yy66: + yych = *++YYCURSOR; + switch (yych) { + case '\n': goto yy64; + default: goto yy65; + } +yy67: + ++YYCURSOR; + yych = *YYCURSOR; + switch (yych) { + case '\t': + case ' ': goto yy67; + default: goto yy65; + } +yy69: + YYCTXMARKER = YYCURSOR + 1; + yych = *++YYCURSOR; + switch (yych) { + case '\t': + case '\n': + case '\r': + case ' ': goto yy77; + case '#': goto yy82; + default: goto yy61; + } +yy70: + ++YYCURSOR; + yych = *YYCURSOR; +yy71: + switch (yych) { + case '\t': + case ' ': goto yy70; + case '\n': goto yy73; + case '\r': goto yy75; + default: goto yy72; + } +yy72: + { return HASH1; } +yy73: + ++YYCURSOR; +yy74: + YYCURSOR = YYCTXMARKER; + { return HASH1; } +yy75: + yych = *++YYCURSOR; + switch (yych) { + case '\n': goto yy73; + default: goto yy74; + } +yy76: + ++YYCURSOR; + yych = *YYCURSOR; +yy77: + switch (yych) { + case '\t': + case ' ': goto yy76; + case '\n': goto yy79; + case '\r': goto yy81; + default: goto yy78; + } +yy78: + { return HASH2; } +yy79: + ++YYCURSOR; +yy80: + YYCURSOR = YYCTXMARKER; + { return HASH2; } +yy81: + yych = *++YYCURSOR; + switch (yych) { + case '\n': goto yy79; + default: goto yy80; + } +yy82: + YYCTXMARKER = YYCURSOR + 1; + yych = *++YYCURSOR; + switch (yych) { + case '\t': + case '\n': + case '\r': + case ' ': goto yy85; + case '#': goto yy83; + default: goto yy61; + } +yy83: + YYCTXMARKER = YYCURSOR + 1; + yych = *++YYCURSOR; + switch (yych) { + case '\t': + case '\n': + case '\r': + case ' ': goto yy91; + case '#': goto yy96; + default: goto yy61; + } +yy84: + ++YYCURSOR; + yych = *YYCURSOR; +yy85: + switch (yych) { + case '\t': + case ' ': goto yy84; + case '\n': goto yy87; + case '\r': goto yy89; + default: goto yy86; + } +yy86: + { return HASH3; } +yy87: + ++YYCURSOR; +yy88: + YYCURSOR = YYCTXMARKER; + { return HASH3; } +yy89: + yych = *++YYCURSOR; + switch (yych) { + case '\n': goto yy87; + default: goto yy88; + } +yy90: + ++YYCURSOR; + yych = *YYCURSOR; +yy91: + switch (yych) { + case '\t': + case ' ': goto yy90; + case '\n': goto yy93; + case '\r': goto yy95; + default: goto yy92; + } +yy92: + { return HASH4; } +yy93: + ++YYCURSOR; +yy94: + YYCURSOR = YYCTXMARKER; + { return HASH4; } +yy95: + yych = *++YYCURSOR; + switch (yych) { + case '\n': goto yy93; + default: goto yy94; + } +yy96: + YYCTXMARKER = YYCURSOR + 1; + yych = *++YYCURSOR; + switch (yych) { + case '\t': + case '\n': + case '\r': + case ' ': goto yy99; + case '#': goto yy97; + default: goto yy61; + } +yy97: + YYCTXMARKER = YYCURSOR + 1; + yych = *++YYCURSOR; + switch (yych) { + case '\t': + case '\n': + case '\r': + case ' ': goto yy105; + default: goto yy61; + } +yy98: + ++YYCURSOR; + yych = *YYCURSOR; +yy99: + switch (yych) { + case '\t': + case ' ': goto yy98; + case '\n': goto yy101; + case '\r': goto yy103; + default: goto yy100; + } +yy100: + { return HASH5; } +yy101: + ++YYCURSOR; +yy102: + YYCURSOR = YYCTXMARKER; + { return HASH5; } +yy103: + yych = *++YYCURSOR; + switch (yych) { + case '\n': goto yy101; + default: goto yy102; + } +yy104: + ++YYCURSOR; + yych = *YYCURSOR; +yy105: + switch (yych) { + case '\t': + case ' ': goto yy104; + case '\n': goto yy107; + case '\r': goto yy109; + default: goto yy106; + } +yy106: + { return HASH6; } +yy107: + ++YYCURSOR; +yy108: + YYCURSOR = YYCTXMARKER; + { return HASH6; } +yy109: + yych = *++YYCURSOR; + switch (yych) { + case '\n': goto yy107; + default: goto yy108; + } +yy110: + ++YYCURSOR; + switch ((yych = *YYCURSOR)) { + case '\n': goto yy113; + case '\r': goto yy115; + case ' ': goto yy112; + default: goto yy111; + } +yy111: + { return NON_INDENT_SPACE; } +yy112: + yych = *++YYCURSOR; + switch (yych) { + case ' ': goto yy116; + default: goto yy111; + } +yy113: + ++YYCURSOR; +yy114: + { return TEXT_LINEBREAK; } +yy115: + yych = *++YYCURSOR; + switch (yych) { + case '\n': goto yy113; + default: goto yy114; + } +yy116: + ++YYCURSOR; + { return INDENT_SPACE; } +yy118: + ++YYCURSOR; + { return MATH_DOLLAR_DOUBLE; } +yy120: + ++YYCURSOR; + switch ((yych = *YYCURSOR)) { + case '(': goto yy184; + case ')': goto yy186; + case '[': goto yy188; + case ']': goto yy190; + default: goto yy121; + } +yy121: + { return ESCAPED_CHARACTER; } +yy122: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy124: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy126: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy128: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy130: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy132: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy134: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy136: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy138: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy140: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy142: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy144: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy146: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy148: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy150: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy152: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy154: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy156: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy158: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy160: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy162: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy164: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy166: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy168: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy170: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy172: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy174: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy176: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy178: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy180: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy182: + ++YYCURSOR; + { return ESCAPED_CHARACTER; } +yy184: + ++YYCURSOR; + { return MATH_PAREN_OPEN; } +yy186: + ++YYCURSOR; + { return MATH_PAREN_CLOSE; } +yy188: + ++YYCURSOR; + { return MATH_BRACKET_OPEN; } +yy190: + ++YYCURSOR; + { return MATH_BRACKET_CLOSE; } +yy192: + yych = *++YYCURSOR; + switch (yych) { + case 'M': + case 'm': goto yy193; + default: goto yy61; + } +yy193: + yych = *++YYCURSOR; + switch (yych) { + case 'P': + case 'p': goto yy194; + default: goto yy61; + } +yy194: + yych = *++YYCURSOR; + switch (yych) { + case ';': goto yy195; + default: goto yy61; + } +yy195: + ++YYCURSOR; + { return AMPERSAND_LONG; } +yy197: + ++YYCURSOR; +yy198: + YYCURSOR = YYCTXMARKER; + { return TEXT_PERIOD; } +yy199: + yych = *++YYCURSOR; + switch (yych) { + case '\n': goto yy197; + default: goto yy198; + } +yy200: + ++YYCURSOR; + yych = *YYCURSOR; +yy201: + switch (yych) { + case '\t': + case ' ': goto yy200; + default: goto yy198; + } +yy202: + yyaccept = 5; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case '.': goto yy206; + default: goto yy201; + } +yy203: + yych = *++YYCURSOR; + switch (yych) { + case '.': goto yy204; + default: goto yy61; + } +yy204: + ++YYCURSOR; + { return ELLIPSIS; } +yy206: + yych = *++YYCURSOR; + switch (yych) { + case ' ': goto yy207; + default: goto yy61; + } +yy207: + yych = *++YYCURSOR; + switch (yych) { + case '.': goto yy208; + default: goto yy61; + } +yy208: + ++YYCURSOR; + { return ELLIPSIS; } +yy210: + ++YYCURSOR; + { return QUOTE_RIGHT_ALT; } +yy212: + ++YYCURSOR; + { return BRACE_DOUBLE_RIGHT; } +yy214: + ++YYCURSOR; + { return BRACKET_IMAGE_LEFT; } +yy216: + ++YYCURSOR; + { return BRACKET_VARIABLE_LEFT; } +yy218: + ++YYCURSOR; + { return BRACKET_CITATION_LEFT; } +yy220: + ++YYCURSOR; + { return BRACKET_FOOTNOTE_LEFT; } +yy222: + yych = *++YYCURSOR; + switch (yych) { + case '}': goto yy223; + default: goto yy61; + } +yy223: + ++YYCURSOR; + { return CRITIC_HI_CLOSE; } +yy225: + yych = *++YYCURSOR; + switch (yych) { + case '}': goto yy228; + default: goto yy61; + } +yy226: + ++YYCURSOR; + { return CRITIC_SUB_DIV; } +yy228: + ++YYCURSOR; + { return CRITIC_SUB_CLOSE; } +yy230: + yych = *++YYCURSOR; + switch (yych) { + case '}': goto yy231; + default: goto yy61; + } +yy231: + ++YYCURSOR; + { return CRITIC_COM_CLOSE; } +yy233: + ++YYCURSOR; + switch ((yych = *YYCURSOR)) { + case '-': goto yy237; + case '}': goto yy235; + default: goto yy234; + } +yy234: + { return DASH_N; } +yy235: + ++YYCURSOR; + { return CRITIC_DEL_CLOSE; } +yy237: + ++YYCURSOR; + { return DASH_M; } +yy239: + yych = *++YYCURSOR; + switch (yych) { + case '}': goto yy240; + default: goto yy61; + } +yy240: + ++YYCURSOR; + { return CRITIC_ADD_CLOSE; } +yy242: + ++YYCURSOR; + { return BRACE_DOUBLE_LEFT; } +yy244: + yych = *++YYCURSOR; + switch (yych) { + case '=': goto yy257; + default: goto yy61; + } +yy245: + yych = *++YYCURSOR; + switch (yych) { + case '~': goto yy255; + default: goto yy61; + } +yy246: + yych = *++YYCURSOR; + switch (yych) { + case '>': goto yy253; + default: goto yy61; + } +yy247: + yych = *++YYCURSOR; + switch (yych) { + case '-': goto yy251; + default: goto yy61; + } +yy248: + yych = *++YYCURSOR; + switch (yych) { + case '+': goto yy249; + default: goto yy61; + } +yy249: + ++YYCURSOR; + { return CRITIC_ADD_OPEN; } +yy251: + ++YYCURSOR; + { return CRITIC_DEL_OPEN; } +yy253: + ++YYCURSOR; + { return CRITIC_COM_OPEN; } +yy255: + ++YYCURSOR; + { return CRITIC_SUB_OPEN; } +yy257: + ++YYCURSOR; + { return CRITIC_HI_OPEN; } +} + +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..053692b --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,75 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file lexer.h + + @brief Description of the regular expressions used to define tokens, + used by re2c to create a lexer/tokenizer. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +/// Re2c scanner data -- this structure is used by the re2c +/// lexer to track progress and offsets within the source +/// string. They can be used to create "tokens" that match +/// sections of the text with an abstract syntax tree. +struct Scanner { + const char * start; //!< Start of current token + const char * cur; //!< Character currently being matched + const char * ptr; //!< Used for backtracking by re2c + const char * ctx; +}; + +typedef struct Scanner Scanner; + + +/// Scan for the next token +int scan( + Scanner * s, //!< Pointer to Scanner state structure + const char * stop //!< Pointer to position in string at which to stop parsing +); + diff --git a/src/lexer.re b/src/lexer.re new file mode 100644 index 0000000..2d7b6d8 --- /dev/null +++ b/src/lexer.re @@ -0,0 +1,227 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file lexer.re + + @brief Description of the regular expressions used to define tokens, + used by re2c to create a lexer/tokenizer. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include "lexer.h" +#include "libMultiMarkdown.h" +#include "parser.h" + + +// Basic scanner struct + +#define YYCTYPE char +#define YYCURSOR s->cur +#define YYMARKER s->ptr +#define YYCTXMARKER s->ctx + + +int scan(Scanner * s, const char * stop) { + + scan: + + if (s->cur >= stop) { + return 0; + } + + s->start = s->cur; + + /*!re2c + re2c:yyfill:enable = 0; + + NL = "\r\n" | '\n' | '\r'; + SP = [ \t]+; + + SPNL = [ \t]* NL; + + INDENT_TAB = '\t'; + INDENT_SPACE = ' '{4}; + NON_INDENT_SPACE = ' '{2,3}; + + TEXT_LINEBREAK = " " NL; + + // The order of these seems to matter + + "{++" { return CRITIC_ADD_OPEN; } + "++}" { return CRITIC_ADD_CLOSE; } + + "{--" { return CRITIC_DEL_OPEN; } + "--}" { return CRITIC_DEL_CLOSE; } + + "{>>" { return CRITIC_COM_OPEN; } + "<<}" { return CRITIC_COM_CLOSE; } + + "{~~" { return CRITIC_SUB_OPEN; } + "~>" { return CRITIC_SUB_DIV; } + "~~}" { return CRITIC_SUB_CLOSE; } + + "{==" { return CRITIC_HI_OPEN; } + "==}" { return CRITIC_HI_CLOSE; } + + "[" { return BRACKET_LEFT; } + "]" { return BRACKET_RIGHT; } + + "![" { return BRACKET_IMAGE_LEFT; } + "[^" { return BRACKET_FOOTNOTE_LEFT; } + "[#" { return BRACKET_CITATION_LEFT; } + "[%" { return BRACKET_VARIABLE_LEFT; } + + "(" { return PAREN_LEFT; } + ")" { return PAREN_RIGHT; } + + "<" { return ANGLE_LEFT; } + ">" { return ANGLE_RIGHT; } + + "{{" { return BRACE_DOUBLE_LEFT; } + "}}" { return BRACE_DOUBLE_RIGHT; } + + "\"" { return QUOTE_DOUBLE; } + "'" { return QUOTE_SINGLE; } + // "``" { return QUOTE_LEFT_ALT; } + "''" { return QUOTE_RIGHT_ALT; } + "-" { return DASH_N; } + "--" { return DASH_N; } + "---" { return DASH_M; } + "..." { return ELLIPSIS; } + ". . ." { return ELLIPSIS; } + ":" { return COLON; } + '&' { return AMPERSAND_LONG; } + "&" { return AMPERSAND; } + + "\\." { return ESCAPED_CHARACTER; } + "\\!" { return ESCAPED_CHARACTER; } + "\\?" { return ESCAPED_CHARACTER; } + + "\\," { return ESCAPED_CHARACTER; } + "\\;" { return ESCAPED_CHARACTER; } + "\\:" { return ESCAPED_CHARACTER; } + + "\\\"" { return ESCAPED_CHARACTER; } + "\\'" { return ESCAPED_CHARACTER; } + "\\`" { return ESCAPED_CHARACTER; } + "\\~" { return ESCAPED_CHARACTER; } + + "\\(" { return ESCAPED_CHARACTER; } + "\\)" { return ESCAPED_CHARACTER; } + "\\{" { return ESCAPED_CHARACTER; } + "\\}" { return ESCAPED_CHARACTER; } + "\\[" { return ESCAPED_CHARACTER; } + "\\]" { return ESCAPED_CHARACTER; } + + "\\#" { return ESCAPED_CHARACTER; } + "\\$" { return ESCAPED_CHARACTER; } + "\\%" { return ESCAPED_CHARACTER; } + "\\+" { return ESCAPED_CHARACTER; } + "\\-" { return ESCAPED_CHARACTER; } + "\\=" { return ESCAPED_CHARACTER; } + "\\<" { return ESCAPED_CHARACTER; } + "\\>" { return ESCAPED_CHARACTER; } + + "\\&" { return ESCAPED_CHARACTER; } + "\\@" { return ESCAPED_CHARACTER; } + "\\\\" { return ESCAPED_CHARACTER; } + "\\/" { return ESCAPED_CHARACTER; } + "\\^" { return ESCAPED_CHARACTER; } + + "\\*" { return ESCAPED_CHARACTER; } + "\\_" { return ESCAPED_CHARACTER; } + + "\\|" { return ESCAPED_CHARACTER; } + + "\\\\(" { return MATH_PAREN_OPEN; } + "\\\\)" { return MATH_PAREN_CLOSE; } + "\\\\[" { return MATH_BRACKET_OPEN; } + "\\\\]" { return MATH_BRACKET_CLOSE; } + "$" { return MATH_DOLLAR_SINGLE; } + "$$" { return MATH_DOLLAR_DOUBLE; } + + '^' { return SUPERSCRIPT; } + "~" { return SUBSCRIPT; } + + INDENT_TAB { return INDENT_TAB; } + INDENT_SPACE { return INDENT_SPACE; } + + '#' SP { return HASH1; } + '#' / SPNL { return HASH1; } + '#'{2} SP { return HASH2; } + '#'{2} / SPNL { return HASH2; } + '#'{3} SP { return HASH3; } + '#'{3} / SPNL { return HASH3; } + '#'{4} SP { return HASH4; } + '#'{4} / SPNL { return HASH4; } + '#'{5} SP { return HASH5; } + '#'{5} / SPNL { return HASH5; } + '#'{6} SP { return HASH6; } + '#'{6} / SPNL { return HASH6; } + + + [0-9]+ / ('.' (SP|NL)) { return TEXT_NUMBER_POSS_LIST; } + '.' / (SP|NL) { return TEXT_PERIOD; } + + TEXT_LINEBREAK { return TEXT_LINEBREAK; } + NL { return TEXT_NL; } + + NON_INDENT_SPACE { return NON_INDENT_SPACE; } + + "*" { return STAR; } + "+" { return PLUS; } + "_" { return UL; } + + '`'+ { return BACKTICK; } + + '|'+ { return PIPE; } + + // Skip over anything else - '.' does not include '\n' + . { goto scan; } + */ +} diff --git a/src/libMultiMarkdown.h b/src/libMultiMarkdown.h new file mode 100644 index 0000000..a0bcfe8 --- /dev/null +++ b/src/libMultiMarkdown.h @@ -0,0 +1,298 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file mmd.h + + @brief Header file for libMultiMarkdown. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +#ifndef MMD6_H +#define MMD6_H + +#include +#include + + +#include "d_string.h" +#include "token.h" + + +/// MMD Engine is used for storing configuration information for MMD parser +typedef struct mmd_engine mmd_engine; + + +/// Create MMD Engine using an existing DString (A new copy is *not* made) +mmd_engine * mmd_engine_create_with_dstring( + DString * d, + unsigned long extensions +); + + +/// Create MMD Engine using a C string (A private copy of the string will be +/// made. The one passed here can be freed by the calling function) +mmd_engine * mmd_engine_create_with_string( + const char * str, + unsigned long extensions +); + + +/// Free an existing MMD Engine +void mmd_engine_free( + mmd_engine * e, + bool freeDString +); + + +/// Parse part of the string into a token tree +token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byte_len); + + +/// Parse the entire string into a token tree +void mmd_engine_parse_string(mmd_engine * e); + + +void mmd_export_token_tree(DString * out, mmd_engine * e, short format); + + +/// Token types for parse tree +enum token_types { + DOC_START_TOKEN = 0, //!< DOC_START_TOKEN must be type 0 + + BLOCK_BLOCKQUOTE = 50, //!< This must start *after* the largest number in parser.h + BLOCK_CODE_FENCED, + BLOCK_CODE_INDENTED, + BLOCK_DEF_CITATION, + BLOCK_DEF_FOOTNOTE, + BLOCK_DEF_LINK, + BLOCK_EMPTY, + BLOCK_HEADING, //!< Placeholder for theme cascading + BLOCK_H1, //!< Leave H1, H2, etc. in order + BLOCK_H2, + BLOCK_H3, + BLOCK_H4, + BLOCK_H5, + BLOCK_H6, + BLOCK_HR, + BLOCK_HTML, + BLOCK_LIST_BULLETED, + BLOCK_LIST_BULLETED_LOOSE, + BLOCK_LIST_ENUMERATED, + BLOCK_LIST_ENUMERATED_LOOSE, + BLOCK_LIST_ITEM, + BLOCK_LIST_ITEM_TIGHT, + BLOCK_PARA, + BLOCK_TABLE, + ROW_TABLE, + + CRITIC_ADD_OPEN, + CRITIC_ADD_CLOSE, + CRITIC_DEL_OPEN, + CRITIC_DEL_CLOSE, + CRITIC_COM_OPEN, + CRITIC_COM_CLOSE, + CRITIC_SUB_OPEN, + CRITIC_SUB_DIV, + CRITIC_SUB_DIV_A, + CRITIC_SUB_DIV_B, + CRITIC_SUB_CLOSE, + CRITIC_HI_OPEN, + CRITIC_HI_CLOSE, + + PAIR_CRITIC_ADD, + PAIR_CRITIC_DEL, + PAIR_CRITIC_COM, + PAIR_CRITIC_SUB_ADD, + PAIR_CRITIC_SUB_DEL, + PAIR_CRITIC_HI, + + PAIRS, //!< Placeholder for theme cascading + PAIR_ANGLE, + PAIR_BACKTICK, + PAIR_BRACKET, + PAIR_BRACKET_FOOTNOTE, + PAIR_BRACKET_CITATION, + PAIR_BRACKET_IMAGE, + PAIR_BRACKET_VARIABLE, + PAIR_MATH, + PAIR_PAREN, + PAIR_QUOTE_SINGLE, + PAIR_QUOTE_DOUBLE, + PAIR_QUOTE_ALT, + PAIR_SUPERSCRIPT, + PAIR_STAR, + PAIR_UL, + PAIR_BRACES, + + STAR, + UL, + EMPH_START, + EMPH_STOP, + STRONG_START, + STRONG_STOP, + + BRACKET_LEFT, + BRACKET_RIGHT, + BRACKET_FOOTNOTE_LEFT, + BRACKET_CITATION_LEFT, + BRACKET_IMAGE_LEFT, + BRACKET_VARIABLE_LEFT, + + PAREN_LEFT, + PAREN_RIGHT, + + ANGLE_LEFT, + ANGLE_RIGHT, + + BRACE_DOUBLE_LEFT, + BRACE_DOUBLE_RIGHT, + + AMPERSAND, + AMPERSAND_LONG, + APOSTROPHE, + BACKTICK, + CODE_FENCE, + COLON, + DASH_M, + DASH_N, + ELLIPSIS, + QUOTE_SINGLE, + QUOTE_DOUBLE, + QUOTE_LEFT_SINGLE, + QUOTE_RIGHT_SINGLE, + QUOTE_LEFT_DOUBLE, + QUOTE_RIGHT_DOUBLE, + QUOTE_RIGHT_ALT, + + ESCAPED_CHARACTER, + + MATH_PAREN_OPEN, + MATH_PAREN_CLOSE, + MATH_BRACKET_OPEN, + MATH_BRACKET_CLOSE, + MATH_DOLLAR_SINGLE, + MATH_DOLLAR_DOUBLE, + + PIPE, + PLUS, + + SUPERSCRIPT, + SUBSCRIPT, + + INDENT_TAB, + INDENT_SPACE, + NON_INDENT_SPACE, + + HASH1, //!< Leave HASH1, HASH2, etc. in order + HASH2, + HASH3, + HASH4, + HASH5, + HASH6, + MARKER_BLOCKQUOTE, + MARKER_H1, //!< Leave MARKER_H1, MARKER_H2, etc. in order + MARKER_H2, + MARKER_H3, + MARKER_H4, + MARKER_H5, + MARKER_H6, + MARKER_LIST_BULLET, + MARKER_LIST_ENUMERATOR, + + TEXT_EMPTY, + TEXT_LINEBREAK, + TEXT_NL, + TEXT_NUMBER_POSS_LIST, + TEXT_PERIOD, + TEXT_PLAIN, +}; + + +/// Define smart typography languages -- first in list is default +enum smart_language { + ENGLISH, + DUTCH, + FRENCH, + GERMAN, + GERMANGUILL, + SWEDISH, +}; + + +enum output_format { + FORMAT_HTML +}; + + +enum parser_extensions { + EXT_COMPATIBILITY = 1 << 0, //!< Markdown compatibility mode + EXT_COMPLETE = 1 << 1, //!< Create complete document + EXT_SNIPPET = 1 << 2, //!< Create snippet only + EXT_HEAD_CLOSED = 1 << 3, //!< for use by parser + EXT_SMART = 1 << 4, //!< Enable Smart quotes + EXT_NOTES = 1 << 5, //!< Enable Footnotes + EXT_NO_LABELS = 1 << 6, //!< Don't add anchors to headers, etc. + EXT_FILTER_STYLES = 1 << 7, //!< Filter out style blocks + EXT_FILTER_HTML = 1 << 8, //!< Filter out raw HTML + EXT_PROCESS_HTML = 1 << 9, //!< Process Markdown inside HTML + EXT_NO_METADATA = 1 << 10, //!< Don't parse Metadata + EXT_OBFUSCATE = 1 << 11, //!< Mask email addresses + EXT_CRITIC = 1 << 12, //!< Critic Markup Support + EXT_CRITIC_ACCEPT = 1 << 13, //!< Accept all proposed changes + EXT_CRITIC_REJECT = 1 << 14, //!< Reject all proposed changes + EXT_RANDOM_FOOT = 1 << 15, //!< Use random numbers for footnote links + EXT_HEADINGSECTION = 1 << 16, //!< Group blocks under parent heading + EXT_ESCAPED_LINE_BREAKS = 1 << 17, //!< Escaped line break + EXT_NO_STRONG = 1 << 18, //!< Don't allow nested \'s + EXT_NO_EMPH = 1 << 19, //!< Don't allow nested \'s + EXT_FAKE = 1 << 31, //!< 31 is highest number allowed +}; + + +#endif diff --git a/src/main.c b/src/main.c index 7d8ab53..1ad3e8f 100644 --- a/src/main.c +++ b/src/main.c @@ -1,33 +1,82 @@ -/* +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file main.c + + @brief Create command-line frontend for libMultiMarkdown. - main.c -- Template main() - Copyright © 2015-2016 Fletcher T. Penney. + @author Fletcher T. Penney + @bug - This program is free software you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation either version 2 of the License, or - (at your option) any later version. +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. - You should have received a copy of the GNU General Public License - along with this program if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +#include #include #include +#include + +#include "argtable3.h" #include "d_string.h" +#include "libMultiMarkdown.h" +#include "html.h" +#include "mmd.h" +#include "token.h" +#include "version.h" #define kBUFFERSIZE 4096 // How many bytes to read at a time +// argtable structs +struct arg_lit *a_help, *a_version, *a_compatibility, *a_nolabels, *a_batch, *a_accept, *a_reject; +struct arg_str *a_format; +struct arg_file *a_file, *a_o; +struct arg_end *a_end; +struct arg_rem *a_rem1, *a_rem2, *a_rem3; + + DString * stdin_buffer() { /* Read from stdin and return a GString * `buffer` will need to be freed elsewhere */ @@ -37,17 +86,18 @@ DString * stdin_buffer() { DString * buffer = d_string_new(""); - while ((bytes = fread(chunk, 1, kBUFFERSIZE, stdin)) > 0) { - d_string_append_c_array(buffer, chunk, bytes); - } + while ((bytes = fread(chunk, 1, kBUFFERSIZE, stdin)) > 0) { + d_string_append_c_array(buffer, chunk, bytes); + } fclose(stdin); return buffer; } -DString * scan_file(char * fname) { - /* Read from stdin and return a GString * + +DString * scan_file(const char * fname) { + /* Read from a file and return a GString * `buffer` will need to be freed elsewhere */ char chunk[kBUFFERSIZE]; @@ -61,15 +111,285 @@ DString * scan_file(char * fname) { DString * buffer = d_string_new(""); - while ((bytes = fread(chunk, 1, kBUFFERSIZE, file)) > 0) { - d_string_append_c_array(buffer, chunk, bytes); - } + while ((bytes = fread(chunk, 1, kBUFFERSIZE, file)) > 0) { + d_string_append_c_array(buffer, chunk, bytes); + } fclose(file); return buffer; } -int main( int argc, char** argv ) { - /* Make your program do whatever you want */ + +/// Given a filename, remove the extension and replace it with a new one. +/// The next extension must include the leading '.', e.g. '.html' +char * filename_with_extension(const char * original, const char * new_extension) { + char * name_no_ext; + DString * new_name; + + // Determine output filename without file extension + name_no_ext = strdup(original); + + if (strrchr(name_no_ext, '.') != NULL) { + long count = strrchr(name_no_ext, '.') - name_no_ext; + + if (count != 0) { + name_no_ext[count] = '\0'; + } + } + + new_name = d_string_new(name_no_ext); + free(name_no_ext); + + d_string_append(new_name, new_extension); + + name_no_ext = new_name->str; + + d_string_free(new_name, false); + + return name_no_ext; +} + + +char * mmd_process(DString * buffer, unsigned long extensions, short format) { + char * result; + + mmd_engine * e = mmd_engine_create_with_dstring(buffer, extensions); + + mmd_engine_parse_string(e); + + DString * output = d_string_new(""); + + mmd_export_token_tree(output, e, format); + + result = output->str; + + mmd_engine_free(e, false); + d_string_free(output, false); + + return result; +} + + +int main(int argc, char** argv) { + int exitcode = EXIT_SUCCESS; + char * binname = "multimarkdown"; + short format = 0; + + // Initialize argtable structs + void *argtable[] = { + a_help = arg_lit0(NULL, "help", "display this help and exit"), + a_version = arg_lit0(NULL, "version", "display version info and exit"), + + a_rem1 = arg_rem("", ""), + + a_format = arg_str0("t", "to", "FORMAT", "convert to FORMAT"), + a_o = arg_file0("o", "output", "FILE", "send output to FILE"), + + a_batch = arg_lit0("b", "batch", "process each file separately"), + a_compatibility = arg_lit0("c", "compatibility", "Markdown compatibility mode"), + + a_rem2 = arg_rem("", ""), + + a_accept = arg_lit0("a", "accept", "accept all CriticMarkup changes"), + a_reject = arg_lit0("r", "reject", "reject all CriticMarkup changes"), + + a_rem3 = arg_rem("", ""), + + a_nolabels = arg_lit0(NULL, "nolabels", "Disable id attributes for headers"), + + a_file = arg_filen(NULL, NULL, "", 0, argc+2, "read input from file(s)"), + a_end = arg_end(20), + }; + + // Set default options + a_o->filename[0] = "-"; // Default to stdout if no option specified + + int nerrors = arg_parse(argc, argv, argtable); + + // '--help' takes precedence + if (a_help->count > 0) { + printf("\n%s v%s\n\n", MULTIMARKDOWN_6_NAME, MULTIMARKDOWN_6_VERSION); + printf("\tUsage: %s", binname); + arg_print_syntax(stdout, argtable, "\n\n"); + printf("Options:\n"); + arg_print_glossary(stdout, argtable, "\t%-25s %s\n"); + printf("\n"); + goto exit; + } + + if (nerrors > 0) { + // Report errors + arg_print_errors(stdout, a_end, MULTIMARKDOWN_6_NAME); + printf("Try '%s --help' for more information.\n", binname); + exitcode = 1; + goto exit; + } + + // '--version' also takes precedence + if (a_version->count > 0) { + printf("\nMultiMarkdown 6 v%s\n", MULTIMARKDOWN_6_VERSION); + printf("%s\n\n", MULTIMARKDOWN_6_COPYRIGHT); + printf("%s\n", MULTIMARKDOWN_6_LICENSE); + printf("\n"); + goto exit; + } + + + // Parse options + unsigned long extensions = EXT_SMART | EXT_NOTES | EXT_CRITIC; + + if (a_compatibility->count > 0) { + // Compatibility mode disables certain features + // Reset extensions + extensions = EXT_COMPATIBILITY | EXT_NO_LABELS | EXT_OBFUSCATE; + } + + if (a_nolabels->count > 0) { + // Disable header id attributes + extensions |= EXT_NO_LABELS; + } + + if (a_accept->count > 0) { + // Accept CriticMarkup changes + extensions |= EXT_CRITIC_ACCEPT | EXT_CRITIC; + } + + if (a_reject->count > 0) { + // Reject CriticMarkup changes + extensions |= EXT_CRITIC_REJECT | EXT_CRITIC; + } + + if (a_reject->count && a_accept->count) { + // Old options that don't apply now, so change them + extensions &= ~(EXT_CRITIC_REJECT | EXT_CRITIC_ACCEPT); + } + + if (a_format->count > 0) { + if (strcmp(a_format->sval[0], "html") == 0) + format = FORMAT_HTML; + else { + // No valid format found + fprintf(stderr, "%s: Unknown output format '%s'\n", binname, a_format->sval[0]); + exitcode = 1; + goto exit; + } + } + + // Determine input + if (a_file->count == 0) { + // Read from stdin + } else { + // Read from files + } + + DString * buffer = NULL; + char * result; + FILE * output_stream; + char * output_filename; + + // Prepare token pool +#ifdef kUseObjectPool + token_pool_init(); +#endif + + // Determine processing mode -- batch/stdin/files?? + + if ((a_batch->count) && (a_file->count)) { + // Batch process 1 or more files + for (int i = 0; i < a_file->count; ++i) + { + buffer = scan_file(a_file->filename[i]); + + if (buffer == NULL) { + fprintf(stderr, "Error reading file '%s'\n", a_file->filename[i]); + exitcode = 1; + goto exit; + } + + // Append output file extension + switch (format) { + case FORMAT_HTML: + output_filename = filename_with_extension(a_file->filename[i], ".html"); + break; + } + + result = mmd_process(buffer, extensions, format); + + if (!(output_stream = fopen(output_filename, "w"))) { + // Failed to open file + perror(output_filename); + } else { + fputs(result, output_stream); + fputc('\n', output_stream); + fclose(output_stream); + } + + d_string_free(buffer, true); + free(result); + free(output_filename); + } + } else { + if (a_file->count) { + // We have files to process + buffer = d_string_new(""); + DString * file_buffer; + + // Concatenate all input files + for (int i = 0; i < a_file->count; ++i) + { + file_buffer = scan_file(a_file->filename[i]); + + if (file_buffer == NULL) { + fprintf(stderr, "Error reading file '%s'\n", a_file->filename[i]); + exitcode = 1; + goto exit; + } + + d_string_append_c_array(buffer, file_buffer->str, file_buffer->currentStringLength); + d_string_free(file_buffer, true); + } + } else { + // Obtain input from stdin + buffer = stdin_buffer(); + } + + result = mmd_process(buffer, extensions, format); + + // Where does output go? + if (strcmp(a_o->filename[0], "-") == 0) { + // direct to stdout + output_stream = stdout; + } else if (!(output_stream = fopen(a_o->filename[0], "w"))) { + perror(a_o->filename[0]); + free(result); + d_string_free(buffer, true); + + exitcode = 1; + goto exit; + } + + fputs(result, output_stream); + fputc('\n', output_stream); + + if (output_stream != stdout) + fclose(output_stream); + + d_string_free(buffer, true); + + free(result); + } + + +exit: + + // Clean up token pool +#ifdef kUseObjectPool + token_pool_free(); +#endif + + // Clean up after argtable + arg_freetable(argtable, sizeof(argtable) / sizeof(argtable[0])); + return exitcode; } + diff --git a/src/mmd.c b/src/mmd.c new file mode 100644 index 0000000..d94fe08 --- /dev/null +++ b/src/mmd.c @@ -0,0 +1,1399 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file mmd.c + + @brief Create MMD parsing engine + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include +#include +#include + +#include "char.h" +#include "d_string.h" +#include "html.h" /// \todo: Remove this for production +#include "lexer.h" +#include "libMultiMarkdown.h" +#include "mmd.h" +#include "object_pool.h" +#include "parser.h" +#include "scanners.h" +#include "stack.h" +#include "token.h" +#include "token_pairs.h" + + +// Basic parser function declarations +void * ParseAlloc(); +void Parse(); +void ParseFree(); + +void mmd_pair_tokens_in_block(token * block, token_pair_engine * e, stack * s); + + + +/// Build MMD Engine +mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) { + mmd_engine * e = malloc(sizeof(mmd_engine)); + + if (e) { + e->dstr = d; + + e->root = NULL; + + e->extensions = extensions; + + e->citation_stack = stack_new(0); + e->definition_stack = stack_new(0); + e->footnote_stack = stack_new(0); + e->header_stack = stack_new(0); + e->link_stack = stack_new(0); + + e->pairings1 = token_pair_engine_new(); + e->pairings2 = token_pair_engine_new(); + e->pairings3 = token_pair_engine_new(); + + // CriticMarkup + if (extensions & EXT_CRITIC) { + token_pair_engine_add_pairing(e->pairings1, CRITIC_ADD_OPEN, CRITIC_ADD_CLOSE, PAIR_CRITIC_ADD, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings1, CRITIC_DEL_OPEN, CRITIC_DEL_CLOSE, PAIR_CRITIC_DEL, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings1, CRITIC_COM_OPEN, CRITIC_COM_CLOSE, PAIR_CRITIC_COM, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings1, CRITIC_SUB_OPEN, CRITIC_SUB_DIV_A, PAIR_CRITIC_SUB_DEL, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings1, CRITIC_SUB_DIV_B, CRITIC_SUB_CLOSE, PAIR_CRITIC_SUB_ADD, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings1, CRITIC_HI_OPEN, CRITIC_HI_CLOSE, PAIR_CRITIC_HI, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + } + + // Brackets, Parentheses, Angles + token_pair_engine_add_pairing(e->pairings2, BRACKET_LEFT, BRACKET_RIGHT, PAIR_BRACKET, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, BRACKET_CITATION_LEFT, BRACKET_RIGHT, PAIR_BRACKET_CITATION, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, BRACKET_FOOTNOTE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_FOOTNOTE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, BRACKET_IMAGE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_IMAGE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, BRACKET_VARIABLE_LEFT, BRACKET_RIGHT, PAIR_BRACKET_VARIABLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, PAREN_LEFT, PAREN_RIGHT, PAIR_PAREN, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, ANGLE_LEFT, ANGLE_RIGHT, PAIR_ANGLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, BRACE_DOUBLE_LEFT, BRACE_DOUBLE_RIGHT, PAIR_BRACES, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + + // Strong/Emph + token_pair_engine_add_pairing(e->pairings3, STAR, STAR, PAIR_STAR, 0); + token_pair_engine_add_pairing(e->pairings3, UL, UL, PAIR_UL, 0); + + // Quotes and Backticks + token_pair_engine_add_pairing(e->pairings2, BACKTICK, BACKTICK, PAIR_BACKTICK, PAIRING_PRUNE_MATCH | PAIRING_MATCH_LENGTH); + + token_pair_engine_add_pairing(e->pairings3, BACKTICK, QUOTE_RIGHT_ALT, PAIR_QUOTE_ALT, PAIRING_ALLOW_EMPTY | PAIRING_MATCH_LENGTH); + token_pair_engine_add_pairing(e->pairings3, QUOTE_SINGLE, QUOTE_SINGLE, PAIR_QUOTE_SINGLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings3, QUOTE_DOUBLE, QUOTE_DOUBLE, PAIR_QUOTE_DOUBLE, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + + // Math + if (!(extensions & EXT_COMPATIBILITY)) { + token_pair_engine_add_pairing(e->pairings2, MATH_PAREN_OPEN, MATH_PAREN_CLOSE, PAIR_MATH, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, MATH_BRACKET_OPEN, MATH_BRACKET_CLOSE, PAIR_MATH, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, MATH_DOLLAR_SINGLE, MATH_DOLLAR_SINGLE, PAIR_MATH, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + token_pair_engine_add_pairing(e->pairings2, MATH_DOLLAR_DOUBLE, MATH_DOLLAR_DOUBLE, PAIR_MATH, PAIRING_ALLOW_EMPTY | PAIRING_PRUNE_MATCH); + } + + // Superscript/Subscript + if (!(extensions & EXT_COMPATIBILITY)) { + token_pair_engine_add_pairing(e->pairings3, SUPERSCRIPT, SUPERSCRIPT, PAIR_SUPERSCRIPT, 0); + token_pair_engine_add_pairing(e->pairings3, SUBSCRIPT, SUBSCRIPT, PAIR_SUPERSCRIPT, 0); + } + + } + + return e; +} + +/// Create MMD Engine using an existing DString (A new copy is *not* made) +mmd_engine * mmd_engine_create_with_dstring(DString * d, unsigned long extensions) { + return mmd_engine_create(d, extensions); +} + + +/// Create MMD Engine using a C string (A private copy of the string will be +/// made. The one passed here can be freed by the calling function) +mmd_engine * mmd_engine_create_with_string(const char * str, unsigned long extensions) { + DString * d = d_string_new(str); + + return mmd_engine_create(d, extensions); +} + + +/// Free an existing MMD Engine +void mmd_engine_free(mmd_engine * e, bool freeDString) { + if (e == NULL) + return; + + if (freeDString) + d_string_free(e->dstr, true); + + if (e->extensions & EXT_CRITIC) + token_pair_engine_free(e->pairings1); + + token_pair_engine_free(e->pairings2); + token_pair_engine_free(e->pairings3); + + token_tree_free(e->root); + + // Pointers to blocks that are freed elsewhere + stack_free(e->definition_stack); + stack_free(e->header_stack); + + // Links need to be freed + while (e->link_stack->size) { + link_free(stack_pop(e->link_stack)); + } + stack_free(e->link_stack); + + // Footnotes need to be freed + while (e->footnote_stack->size) { + footnote_free(stack_pop(e->footnote_stack)); + } + stack_free(e->footnote_stack); + + free(e); +} + + +bool line_is_empty(token * t) { + while (t) { + switch (t->type) { + case NON_INDENT_SPACE: + case INDENT_TAB: + case INDENT_SPACE: + t = t->next; + break; + case TEXT_LINEBREAK: + case TEXT_NL: + return true; + default: + return false; + } + } + + return true; +} + + +/// Determine what sort of line this is +void mmd_assign_line_type(mmd_engine * e, token * line) { + if (!line) + return; + + if (!line->child) { + line->type = LINE_EMPTY; + return; + } + + const char * source = e->dstr->str; + + token * t = NULL; + short temp_short; + size_t scan_len; + + // Skip non-indenting space + if (line->child->type == NON_INDENT_SPACE) { + token_remove_first_child(line); + } else if (line->child->type == TEXT_PLAIN && line->child->len == 1) { + if (source[line->child->start] == ' ') + token_remove_first_child(line); + } + + if (line->child == NULL) { + line->type = LINE_EMPTY; + return; + } + + switch (line->child->type) { + case INDENT_TAB: + if (line_is_empty(line->child)) + line->type = LINE_EMPTY; + else + line->type = LINE_INDENTED_TAB; + break; + case INDENT_SPACE: + if (line_is_empty(line->child)) + line->type = LINE_EMPTY; + else + line->type = LINE_INDENTED_SPACE; + break; + case ANGLE_LEFT: + if (scan_html_block(&source[line->start])) + line->type = LINE_HTML; + else + line->type = LINE_PLAIN; + break; + case ANGLE_RIGHT: + line->type = LINE_BLOCKQUOTE; + line->child->type = MARKER_BLOCKQUOTE; + break; + case BACKTICK: + if (e->extensions & EXT_COMPATIBILITY) { + line->type = LINE_PLAIN; + break; + } + scan_len = scan_fence_end(&source[line->child->start]); + if (scan_len) { + line->type = LINE_FENCE_BACKTICK; + break; + } else { + scan_len = scan_fence_start(&source[line->child->start]); + if (scan_len) { + line->type = LINE_FENCE_BACKTICK_START; + break; + } + } + line->type = LINE_PLAIN; + break; + case HASH1: + case HASH2: + case HASH3: + case HASH4: + case HASH5: + case HASH6: + line->type = (line->child->type - HASH1) + LINE_ATX_1; + line->child->type = (line->type - LINE_ATX_1) + MARKER_H1; + + // Strip trailing whitespace from '#' sequence + line->child->len = line->child->type - MARKER_H1 + 1; + + // Strip trailing '#' sequence if present + if (line->child->tail->type == TEXT_NL) { + if ((line->child->tail->prev->type >= HASH1) && + (line->child->tail->prev->type <= HASH6)) + line->child->tail->prev->type = TEXT_EMPTY; + } else { + token_describe(line->child->tail, NULL); + if ((line->child->tail->type >= HASH1) && + (line->child->tail->type <= HASH6)) + line->child->tail->type = TEXT_EMPTY; + } + break; + case TEXT_NUMBER_POSS_LIST: + switch(source[line->child->next->start]) { + case '.': + switch(source[line->child->next->start + 1]) { + case ' ': + case '\t': + line->type = LINE_LIST_ENUMERATED; + line->child->type = MARKER_LIST_ENUMERATOR; + + // Strip period + line->child->next->type = TEXT_EMPTY; + + switch (line->child->next->next->type) { + case TEXT_PLAIN: + // Strip whitespace between bullet and text + while (char_is_whitespace(source[line->child->next->next->start])) { + line->child->next->next->start++; + line->child->next->next->len--; + } + break; + case INDENT_SPACE: + case INDENT_TAB: + case NON_INDENT_SPACE: + t = line->child->next; + while(t->next && ((t->next->type == INDENT_SPACE) || + (t->next->type == INDENT_TAB) || + (t->next->type == NON_INDENT_SPACE))) { + tokens_prune(t->next, t->next); + } + break; + } + break; + default: + line->type = LINE_PLAIN; + line->child->type = TEXT_PLAIN; + break; + } + break; + default: + line->type = LINE_PLAIN; + line->child->type = TEXT_PLAIN; + break; + } + break; + case DASH_N: + case DASH_M: + case STAR: + case UL: + // Could this be a horizontal rule? + t = line->child->next; + temp_short = line->child->len; + while (t) { + switch (t->type) { + case DASH_N: + case DASH_M: + if (t->type == line->child->type) { + t = t->next; + temp_short += t->len; + } else { + temp_short = 0; + t = NULL; + } + break; + case STAR: + case UL: + if (t->type == line->child->type) { + t = t->next; + temp_short++; + } else { + temp_short = 0; + t = NULL; + } + break; + case NON_INDENT_SPACE: + case INDENT_TAB: + case INDENT_SPACE: + t = t->next; + break; + case TEXT_PLAIN: + if ((t->len == 1) && (source[t->start] == ' ')) { + t = t->next; + break; + } + temp_short = 0; + t = NULL; + break; + case TEXT_NL: + case TEXT_LINEBREAK: + t = NULL; + break; + default: + temp_short = 0; + t = NULL; + break; + } + } + if (temp_short > 2) { + // This is a horizontal rule, not a list item + line->type = LINE_HR; + break; + } + if (line->child->type == UL) { + // Revert to plain for this type + line->type = LINE_PLAIN; + break; + } + // If longer than 1 character, then it can't be a list marker, so it's a + // plain line + if (line->child->len > 1) { + line->type = LINE_PLAIN; + break; + } + case PLUS: + if (!line->child->next) { + // TODO: Should this be an empty list item instead?? + line->type = LINE_PLAIN; + } else { + switch(source[line->child->next->start]) { + case ' ': + case '\t': + line->type = LINE_LIST_BULLETED; + line->child->type = MARKER_LIST_BULLET; + + switch (line->child->next->type) { + case TEXT_PLAIN: + // Strip whitespace between bullet and text + while (char_is_whitespace(source[line->child->next->start])) { + line->child->next->start++; + line->child->next->len--; + } + break; + case INDENT_SPACE: + case INDENT_TAB: + case NON_INDENT_SPACE: + t = line->child; + while(t->next && ((t->next->type == INDENT_SPACE) || + (t->next->type == INDENT_TAB) || + (t->next->type == NON_INDENT_SPACE))) { + tokens_prune(t->next, t->next); + } + break; + } + break; + default: + line->type = LINE_PLAIN; + break; + } + } + break; + case TEXT_LINEBREAK: + case TEXT_NL: + line->type = LINE_EMPTY; + break; + case BRACKET_LEFT: + if (e->extensions & EXT_COMPATIBILITY) { + scan_len = scan_ref_link_no_attributes(&source[line->start]); + line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN; + } else { + scan_len = scan_ref_link(&source[line->start]); + line->type = (scan_len) ? LINE_DEF_LINK : LINE_PLAIN; + } + break; + case BRACKET_CITATION_LEFT: + if (e->extensions & EXT_NOTES) { + scan_len = scan_ref_citation(&source[line->start]); + line->type = (scan_len) ? LINE_DEF_CITATION : LINE_PLAIN; + } else { + line->type = LINE_PLAIN; + } + break; + case BRACKET_FOOTNOTE_LEFT: + if (e->extensions & EXT_NOTES) { + scan_len = scan_ref_foot(&source[line->start]); + line->type = (scan_len) ? LINE_DEF_FOOTNOTE : LINE_PLAIN; + } else { + line->type = LINE_PLAIN; + } + break; + case TEXT_PLAIN: + default: + line->type = LINE_PLAIN; + break; + } + + if (line->type == LINE_PLAIN) { + token * walker = line->child; + + while (walker != NULL) { + if (walker->type == PIPE) { + line->type = LINE_TABLE; + + return; + } + + walker = walker->next; + } + } +} + + +/// Strip leading indenting space from line (if present) +void deindent_line(token * line) { + if (!line || !line->child) + return; + + token * t; + + switch (line->child->type) { + case INDENT_TAB: + case INDENT_SPACE: + t = line->child; + line->child = t->next; + t->next = NULL; + if (line->child) { + line->child->prev = NULL; + line->child->tail = t->tail; + } + token_free(t); + break; + } +} + + +/// Strip leading indenting space from block +/// (for recursively parsing nested lists) +void deindent_block(mmd_engine * e, token * block) { + if (!block || !block->child) + return; + + token * t = block->child; + + while (t != NULL) { + deindent_line(t); + mmd_assign_line_type(e, t); + + t = t->next; + } +} + + +/// Strip leading blockquote marker from line +void strip_quote_markers_from_line(token * line, const char * source) { + if (!line || !line->child) + return; + + token * t; + + switch (line->child->type) { + case MARKER_BLOCKQUOTE: + case NON_INDENT_SPACE: + t = line->child; + line->child = t->next; + t->next = NULL; + if (line->child) { + line->child->prev = NULL; + line->child->tail = t->tail; + } + token_free(t); + break; + } + + if (line->child && (line->child->type == TEXT_PLAIN)) { + // Strip leading whitespace from first text token + t = line->child; + + while (t->len && char_is_whitespace(source[t->start])) { + t->start++; + t->len--; + } + + if (t->len == 0) { + line->child = t->next; + t->next = NULL; + if (line->child) { + line->child->prev = NULL; + line->child->tail = t->tail; + } + + token_free(t); + } + } +} + + +/// Strip leading blockquote markers and non-indent space +/// (for recursively parsing blockquotes) +void strip_quote_markers_from_block(mmd_engine * e, token * block) { + if (!block || !block->child) + return; + + token * t = block->child; + + while (t != NULL) { + strip_quote_markers_from_line(t, e->dstr->str); + mmd_assign_line_type(e, t); + + t = t->next; + } +} + + +/// Create a token chain from source string +token * mmd_tokenize_string(mmd_engine * e, const char * str, size_t len) { + // Create a scanner (for re2c) + Scanner s; + s.start = str; + s.cur = s.start; + + // Strip trailing whitespace +// while (len && char_is_whitespace_or_line_ending(str[len - 1])) +// len--; + + // Where do we stop parsing? + const char * stop = str + len; + + int type; // TOKEN type + token * t; // Create tokens for incorporation + + token * root = token_new(0,0,0); // Store the final parse tree here + token * line = token_new(0,0,0); // Store current line here + + const char * last_stop = str; // Remember where last token ended + + do { + // Scan for next token (type of 0 means there is nothing left); + type = scan(&s, stop); + + //if (type && s.start != last_stop) { + if (s.start != last_stop) { + // We skipped characters between tokens + + if (type) { + // Create a default token type for the skipped characters + t = token_new(TEXT_PLAIN, (size_t)(last_stop - str), (size_t)(s.start - last_stop)); + + token_append_child(line, t); + } else { + if (stop > last_stop) { + // Source text ends without newline + t = token_new(TEXT_PLAIN, (size_t)(last_stop - str), (size_t)(stop - last_stop)); + + token_append_child(line, t); + } + } + } + + switch (type) { + case 0: + // 0 means we finished with input + // Add current line to root + + // What sort of line is this? + mmd_assign_line_type(e, line); + + token_append_child(root, line); + break; + case TEXT_LINEBREAK: + case TEXT_NL: + // We hit the end of a line + t = token_new(type, (size_t)(s.start - str), (size_t)(s.cur - s.start)); + token_append_child(line, t); + + // What sort of line is this? + mmd_assign_line_type(e, line); + + token_append_child(root, line); + line = token_new(0,s.cur - str,0); + break; + default: + t = token_new(type, (size_t)(s.start - str), (size_t)(s.cur - s.start)); + token_append_child(line, t); + break; + } + + // Remember where token ends to detect skipped characters + last_stop = s.cur; + } while (type != 0); + + + return root; +} + + +/// Parse token tree +void mmd_parse_token_chain(mmd_engine * e, token * chain) { + + void* pParser = ParseAlloc (malloc); // Create a parser (for lemon) + token * walker = chain->child; // Walk the existing tree + token * remainder; // Hold unparsed tail of chain + + // Remove existing token tree + e->root = NULL; + + while (walker != NULL) { + remainder = walker->next; + + // Snip token from remainder + walker->next = NULL; + walker->tail = walker; + + if (remainder) + remainder->prev = NULL; + + Parse(pParser, walker->type, walker, e); + + walker = remainder; + } + + // Signal finish to parser + Parse(pParser, 0, NULL, e); + + // Disconnect of (now empty) root + chain->child = NULL; + token_append_child(chain, e->root); + e->root = NULL; + + ParseFree(pParser, free); +} + + +void mmd_pair_tokens_in_chain(token * head, token_pair_engine * e, stack * s) { + + while (head != NULL) { + mmd_pair_tokens_in_block(head, e, s); + + head = head->next; + } +} + + +/// Match token pairs inside block +void mmd_pair_tokens_in_block(token * block, token_pair_engine * e, stack * s) { + if (block == NULL || e == NULL) + return; + + // Pair tokens (if appropriate) + // \todo: Check for leaf node. Also, might need to put this somewhere else + + switch (block->type) { + case BLOCK_BLOCKQUOTE: + case BLOCK_DEF_CITATION: + case BLOCK_DEF_FOOTNOTE: + case BLOCK_DEF_LINK: + case BLOCK_H1: + case BLOCK_H2: + case BLOCK_H3: + case BLOCK_H4: + case BLOCK_H5: + case BLOCK_H6: + case BLOCK_PARA: + token_pairs_match_pairs_inside_token(block, e, s); + break; + case DOC_START_TOKEN: + case BLOCK_LIST_BULLETED: + case BLOCK_LIST_BULLETED_LOOSE: + case BLOCK_LIST_ENUMERATED: + case BLOCK_LIST_ENUMERATED_LOOSE: + mmd_pair_tokens_in_chain(block->child, e, s); + break; + case BLOCK_LIST_ITEM: + case BLOCK_LIST_ITEM_TIGHT: + token_pairs_match_pairs_inside_token(block, e, s); + mmd_pair_tokens_in_chain(block->child, e, s); + break; + case BLOCK_EMPTY: + case BLOCK_CODE_INDENTED: + case BLOCK_CODE_FENCED: + // No need to pair tokens in these blocks + break; + case LINE_TABLE: + case BLOCK_TABLE: // \TODO: Need to handle tables differently and isolate by cell? + token_pairs_match_pairs_inside_token(block, e, s); + mmd_pair_tokens_in_chain(block->child, e, s); + break; + default: + // Nothing to do here + //fprintf(stderr, "What to do for %d\n", block->type); + return; + } +} + + +/// Ambidextrous tokens can open OR close a pair. This routine gives the opportunity +/// to change this behavior on case-by-case basis. For example, in `foo **bar** foo`, the +/// first set of asterisks can open, but not close a pair. The second set can close, but not +/// open a pair. This allows for complex behavior without having to bog down the tokenizer +/// with figuring out which type of asterisk we have. Default behavior is that open and close +/// are enabled, so we just have to figure out when to turn it off. +void mmd_assign_ambidextrous_tokens_in_block(mmd_engine * e, token * block, const char * str, size_t start_offset) { + if (block == NULL || block->child == NULL) + return; + + size_t offset; // Temp variable for use below + size_t lead_count, lag_count, pre_count, post_count; + + token * t = block->child; + + while (t != NULL) { + switch (t->type) { + case DOC_START_TOKEN: + case BLOCK_BLOCKQUOTE: + case BLOCK_H1: + case BLOCK_H2: + case BLOCK_H3: + case BLOCK_H4: + case BLOCK_H5: + case BLOCK_H6: + case BLOCK_LIST_BULLETED: + case BLOCK_LIST_BULLETED_LOOSE: + case BLOCK_LIST_ENUMERATED: + case BLOCK_LIST_ENUMERATED_LOOSE: + case BLOCK_LIST_ITEM: + case BLOCK_LIST_ITEM_TIGHT: + case BLOCK_PARA: + case BLOCK_TABLE: + // Assign child tokens of blocks + mmd_assign_ambidextrous_tokens_in_block(e, t, str, start_offset); + break; + case CRITIC_SUB_DIV: + // Divide this into two tokens + t->child = token_new(CRITIC_SUB_DIV_B, t->start + 1, 1); + t->child->next = t->next; + t->next = t->child; + t->child = NULL; + t->len = 1; + t->type = CRITIC_SUB_DIV_A; + break; + case STAR: + // Look left and skip over neighboring '*' characters + offset = t->start; + + while ((offset != 0) && ((str[offset] == '*') || (str[offset] == '_'))) { + offset--; + } + + // We can only close if there is something to left besides whitespace + if ((offset == 0) || (char_is_whitespace_or_line_ending(str[offset]))) { + // Whitespace or punctuation to left, so can't close + t->can_close = 0; + } + + // Look right and skip over neighboring '*' characters + offset = t->start + 1; + + while ((str[offset] == '*') || (str[offset] == '_')) + offset++; + + // We can only open if there is something to right besides whitespace/punctuation + if (char_is_whitespace_or_line_ending(str[offset])) { + // Whitespace to right, so can't open + t->can_open = 0; + } + + // If we're in the middle of a word, then we need to be more precise + if (t->can_open && t->can_close) { + lead_count = 0; + lag_count = 0; + pre_count = 0; + post_count = 0; + + offset = t->start - 1; + + // How many '*' in this run before current token? + while (offset && (str[offset] == '*')) { + lead_count++; + offset--; + } + + while (offset && (!char_is_whitespace_or_line_ending_or_punctuation(str[offset]))) { + offset--; + } + + // Are there '*' at the beginning of this word? + while ((offset != -1) && (str[offset] == '*')) { + pre_count++; + offset--; + } + + offset = t->start + 1; + + // How many '*' in this run after current token? + while (str[offset] == '*') { + lag_count++; + offset++; + } + + while (!char_is_whitespace_or_line_ending_or_punctuation(str[offset])) { + offset++; + } + + // Are there '*' at the end of this word? + while (offset && (str[offset] == '*')) { + post_count++; + offset++; + } + + if (pre_count + post_count > 0) { + if (pre_count + post_count == lead_count + lag_count + 1) { + if (pre_count == post_count) { + t->can_open = 0; + t->can_close = 0; + } else if (pre_count == 0) { + t->can_close = 0; + } else if (post_count == 0) { + t->can_open = 0; + } + } else if (pre_count == lead_count + lag_count + 1 + post_count) { + t->can_open = 0; + } else if (post_count == pre_count + lead_count + lag_count + 1) { + t->can_close = 0; + } else { + if (pre_count != lead_count + lag_count + 1) { + t->can_close = 0; + } + + if (post_count != lead_count + lag_count + 1) { + t->can_open = 0; + } + } + } + } + break; + case UL: + // Look left and skip over neighboring '_' characters + offset = t->start; + + while ((offset != 0) && ((str[offset] == '_') || (str[offset] == '*'))) { + offset--; + } + + if ((offset == 0) || (char_is_whitespace_or_line_ending_or_punctuation(str[offset]))) { + // Whitespace or punctuation to left, so can't close + t->can_close = 0; + } + + // We don't allow intraword underscores (e.g. `foo_bar_foo`) + if ((offset > 0) && (char_is_alphanumeric(str[offset]))) { + // Letters to left, so can't open + t->can_open = 0; + } + + // Look right and skip over neighboring '_' characters + offset = t->start + 1; + + while ((str[offset] == '*') || (str[offset] == '_')) + offset++; + + if (char_is_whitespace_or_line_ending_or_punctuation(str[offset])) { + // Whitespace to right, so can't open + t->can_open = 0; + } + + if (char_is_alphanumeric(str[offset])) { + // Letters to right, so can't close + t->can_close = 0; + } + + break; + case BACKTICK: + // Backticks are used for code spans, but also for ``foo'' double quote syntax. + // We care only about the quote syntax. + offset = t->start; + + // TODO: This does potentially prevent ``foo `` from closing due to space before closer? + // Bug or feature?? + if (t->len != 2) + break; + + if ((offset == 0) || (str[offset] != '`' && char_is_whitespace_or_line_ending_or_punctuation(str[offset - 1]))) { + // Whitespace or punctuation to left, so can't close + t->can_close = 0; + } + break; + case QUOTE_SINGLE: + if (!(e->extensions & EXT_SMART)) + break; + // Some of these are actually APOSTROPHE's and should not be paired + offset = t->start; + + if (!((offset == 0) || (char_is_whitespace_or_line_ending_or_punctuation(str[offset - 1])) || + (char_is_whitespace_or_line_ending_or_punctuation(str[offset + 1])))) { + t->type = APOSTROPHE; + break; + } + + if (offset && (char_is_punctuation(str[offset - 1])) && + (char_is_alphanumeric(str[offset + 1]))) { + t->type = APOSTROPHE; + break; + } + case QUOTE_DOUBLE: + if (!(e->extensions & EXT_SMART)) + break; + offset = t->start; + + if ((offset == 0) || (char_is_whitespace_or_line_ending(str[offset - 1]))) { + t->can_close = 0; + } + + if (char_is_whitespace_or_line_ending(str[offset + 1])) { + t->can_open = 0; + } + + break; + case DASH_N: + if (!(e->extensions & EXT_SMART)) + break; + // We want `1-2` to trigger a DASH_N, but regular hyphen otherwise (`a-b`) + // This doesn't apply to `--` or `---` + offset = t->start; + if (t->len == 1) { + // Check whether we have '1-2' + if ((offset == 0) || (!char_is_digit(str[offset - 1])) || + (!char_is_digit(str[offset + 1]))) { + t->type = TEXT_PLAIN; + } + } + break; + case MATH_DOLLAR_SINGLE: + case MATH_DOLLAR_DOUBLE: + if (e->extensions & EXT_COMPATIBILITY) + break; + + offset = t->start; + + // Look left + if ((offset == 0) || (char_is_whitespace_or_line_ending(str[offset - 1]))) { + // Whitespace to left, so can't close + t->can_close = 0; + } else if ((offset != 0) && (!char_is_whitespace_or_line_ending_or_punctuation(str[offset - 1]))){ + // No whitespace or punctuation to left, can't open + t->can_open = 0; + } + + // Look right + offset = t->start + t->len; + + if (char_is_whitespace_or_line_ending(str[offset])) { + // Whitespace to right, so can't open + t->can_open = 0; + } else if (!char_is_whitespace_or_line_ending_or_punctuation(str[offset])) { + // No whitespace or punctuation to right, can't close + t->can_close = 0; + } + break; + case SUPERSCRIPT: + case SUBSCRIPT: + if (e->extensions & EXT_COMPATIBILITY) + break; + + offset = t->start; + + // Look left -- no whitespace to left + if ((offset == 0) || (char_is_whitespace_or_line_ending_or_punctuation(str[offset - 1]))) { + t->can_open = 0; + } + + if ((offset != 0) && (char_is_whitespace_or_line_ending(str[offset - 1]))) { + t->can_close = 0; + } + + offset = t->start + t->len; + + if (char_is_whitespace_or_line_ending_or_punctuation(str[offset])) { + t->can_open = 0; + } + + // We need to be contiguous in order to match + if (t->can_open) { + offset = t->start + t->len; + t->can_open = 0; + + while (!(char_is_whitespace_or_line_ending(str[offset]))) { + if (str[offset] == str[t->start]) + t->can_open = 1; + offset++; + } + + // Are we a standalone, e.g x^2 + if (!t->can_open) { + offset = t->start + t->len; + while (!char_is_whitespace_or_line_ending_or_punctuation(str[offset])) + offset++; + + t->len = offset-t->start; + t->can_close = 0; + + // Shift next token right and move those characters as child node + if ((t->next != NULL) && ((t->next->type == TEXT_PLAIN) || (t->next->type == TEXT_NUMBER_POSS_LIST))) { + t->next->start += t->len - 1; + t->next->len -= t->len - 1; + + t->child = token_new(TEXT_PLAIN, t->start + 1, t->len - 1); + } + } + } + + // We need to be contiguous in order to match + if (t->can_close) { + offset = t->start; + t->can_close = 0; + + while ((offset > 0) && !(char_is_whitespace_or_line_ending(str[offset - 1]))) { + if (str[offset - 1] == str[t->start]) + t->can_close = 1; + offset--; + } + } + break; + } + + t = t->next; + } + +} + + +/// Strong/emph parsing is done using single `*` and `_` characters, which are +/// then combined in a separate routine here to determine when +/// consecutive characters should be interpreted as STRONG instead of EMPH +/// \todo: Perhaps combining this with the routine when they are paired +/// would improve performance? +void pair_emphasis_tokens(token * t) { + token * closer; + + while (t != NULL) { + if (t->mate != NULL) { + switch (t->type) { + case STAR: + case UL: + closer = t->mate; + if ((t->next->mate == closer->prev) && + (t->type == t->next->type) && + (t->next->mate != t) && + (t->start+t->len == t->next->start) && + (closer->start == closer->prev->start + closer->prev->len)) { + + // We have a strong pair + t->type = STRONG_START; + t->len = 2; + closer->type = STRONG_STOP; + closer->len = 2; + closer->start--; + + tokens_prune(t->next, t->next); + tokens_prune(closer->prev, closer->prev); + } else { + t->type = EMPH_START; + closer->type = EMPH_STOP; + } + break; + + default: + break; + } + + } + + if (t->child != NULL) + pair_emphasis_tokens(t->child); + + t = t->next; + } +} + + +void recursive_parse_list_item(mmd_engine * e, token * block) { + // Strip list marker from first line + token_remove_first_child(block->child); + + // Remove all leading space from first line of list item +// strip_all_leading_space(block->child) + + // Remove one indent level from all lines to allow recursive parsing + deindent_block(e, block); + + mmd_parse_token_chain(e, block); +} + + +void is_list_loose(token * list) { + bool loose = false; + + token * walker = list->child; + + while (walker->next != NULL) { + if (walker->type == BLOCK_LIST_ITEM) { + if (walker->child->type == BLOCK_PARA) { + loose = true; + } else { + walker->type = BLOCK_LIST_ITEM_TIGHT; + } + } + + walker = walker->next; + } + + if (loose) { + switch (list->type) { + case BLOCK_LIST_BULLETED: + list->type = BLOCK_LIST_BULLETED_LOOSE; + break; + case BLOCK_LIST_ENUMERATED: + list->type = BLOCK_LIST_ENUMERATED_LOOSE; + break; + } + } +} + + +/// Is this actually an HTML block? +void is_para_html(mmd_engine * e, token * block) { + if (block->child->type != LINE_PLAIN) + return; + token * t = block->child->child; + + if (t->type != ANGLE_LEFT) + return; + + if (scan_html_block(&(e->dstr->str[t->start]))) { + block->type = BLOCK_HTML; + return; + } + + if (scan_html_line(&(e->dstr->str[t->start]))) { + block->type = BLOCK_HTML; + return; + } +} + +void recursive_parse_blockquote(mmd_engine * e, token * block) { + // Strip blockquote markers (if present) + strip_quote_markers_from_block(e, block); + + mmd_parse_token_chain(e, block); +} + + +void strip_line_tokens_from_block(token * block) { + if ((block == NULL) || (block->child == NULL)) + return; + +#ifndef NDEBUG + fprintf(stderr, "Strip line tokens from %d (%lu:%lu) (child %d)\n", block->type, block->start, block->len, block->child->type); + token_tree_describe(block, NULL); +#endif + + token * l = block->child; + + // Strip trailing empty lines from indented code blocks + if (block->type == BLOCK_CODE_INDENTED) { + while (l->tail->type == LINE_EMPTY) + token_remove_last_child(block); + } + + token * children = NULL; + block->child = NULL; + + token * temp; + + + // Move contents of line directly into the parent block + while (l != NULL) { + switch (l->type) { + case LINE_ATX_1: + case LINE_ATX_2: + case LINE_ATX_3: + case LINE_ATX_4: + case LINE_ATX_5: + case LINE_ATX_6: + case LINE_BLOCKQUOTE: + case LINE_CONTINUATION: + case LINE_DEF_CITATION: + case LINE_DEF_FOOTNOTE: + case LINE_DEF_LINK: + case LINE_EMPTY: + case LINE_LIST_BULLETED: + case LINE_LIST_ENUMERATED: + case LINE_PLAIN: + // Remove leading non-indent space from line + if (l->child && l->child->type == NON_INDENT_SPACE) + token_remove_first_child(l); + + case LINE_INDENTED_TAB: + case LINE_INDENTED_SPACE: + // Strip leading indent (Only the first one) + if (l->child && ((l->child->type == INDENT_SPACE) || (l->child->type == INDENT_TAB))) + token_remove_first_child(l); + + // If we're not a code block, strip additional indents + if ((block->type != BLOCK_CODE_INDENTED) && + (block->type != BLOCK_CODE_FENCED)) { + while (l->child && ((l->child->type == INDENT_SPACE) || (l->child->type == INDENT_TAB))) + token_remove_first_child(l); + } + // Add contents of line to parent block + token_append_child(block, l->child); + + // Disconnect line from it's contents + l->child = NULL; + + // Need to remember first line we strip + if (children == NULL) + children = l; + + // Advance to next line + l = l->next; + break; + case LINE_TABLE: + l->type = ROW_TABLE; + break; + default: + // This is a block, need to remove it from chain and + // Add to parent + temp = l->next; + + token_pop_link_from_chain(l); + token_append_child(block, l); + + // Advance to next line + l = temp; + break; + } + } + + // Free token chain of line types + token_tree_free(children); +} + + +/// Parse part of the string into a token tree +token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byte_len) { +#ifdef kUseObjectPool + // Ensure token pool is available and ready + token_pool_init(); +#endif + + // Reset definition stack + e->definition_stack->size = 0; + + // Tokenize the string + token * doc = mmd_tokenize_string(e, &e->dstr->str[byte_start], byte_len); + + // Parse tokens into blocks + mmd_parse_token_chain(e, doc); + + if (doc) { + // Parse blocks for pairs + mmd_assign_ambidextrous_tokens_in_block(e, doc, &e->dstr->str[byte_start], 0); + + // Prepare stack to be used for token pairing + // This avoids allocating/freeing one for each iteration. + stack * pair_stack = stack_new(0); + + mmd_pair_tokens_in_block(doc, e->pairings1, pair_stack); + mmd_pair_tokens_in_block(doc, e->pairings2, pair_stack); + mmd_pair_tokens_in_block(doc, e->pairings3, pair_stack); + + // Free stack + stack_free(pair_stack); + + pair_emphasis_tokens(doc); + +#ifndef NDEBUG + token_tree_describe(doc, &e->dstr->str[byte_start]); +#endif + } + + return doc; +} + + +/// Parse the entire string into a token tree +void mmd_engine_parse_string(mmd_engine * e) { + // Free existing parse tree + if (e->root) + token_tree_free(e->root); + + // New parse tree + e->root = mmd_engine_parse_substring(e, 0, e->dstr->currentStringLength); +} + diff --git a/src/mmd.h b/src/mmd.h new file mode 100644 index 0000000..f44ee82 --- /dev/null +++ b/src/mmd.h @@ -0,0 +1,91 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file mmd.h + + @brief Create MMD parsing engine + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +#ifndef MMD_MULTIMARKDOWN_H +#define MMD_MULTIMARKDOWN_H + +#include "d_string.h" +#include "libMultiMarkdown.h" +#include "stack.h" +#include "token.h" +#include "token_pairs.h" + +struct mmd_engine { + DString * dstr; + token * root; + unsigned long extensions; + + token_pair_engine * pairings1; + token_pair_engine * pairings2; + token_pair_engine * pairings3; + + stack * citation_stack; + stack * definition_stack; + stack * header_stack; + stack * footnote_stack; + stack * link_stack; +}; + + +/// Expose routines to lemon parser +void recursive_parse_list_item(mmd_engine * e, token * block); +void recursive_parse_blockquote(mmd_engine * e, token * block); +void strip_line_tokens_from_block(token * block); +void is_para_html(mmd_engine * e, token * block); + + +void is_list_loose(token * list); + +#endif diff --git a/src/object_pool.c b/src/object_pool.c new file mode 100644 index 0000000..9579c8d --- /dev/null +++ b/src/object_pool.c @@ -0,0 +1,140 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file object_pool.c + + @brief Allocate memory for "objects" in large slabs, rather than one at a time. Improves + performance when generating large numbers of small chunks of memory, as the expense of + allocating memory in larger units. Could cause difficulty in extreme low memory situations. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include + +#include "object_pool.h" + +#define kNumberOfObjects 1024 + + +void pool_add_slab(pool * p) { + void * slab = malloc(p->object_size * kNumberOfObjects); + + if (slab) { + stack_push(p->allocated, slab); + + // Next object will come from beginning of this slab + p->next = slab; + + // Set warning to trigger need for next slab + p->last = slab + (p->object_size * (kNumberOfObjects)); + } +} + + +/// Allocate a new object pool +pool * pool_new(short size) { + pool * p = malloc(sizeof(pool)); + + if (p) { + p->object_size = size; + + p->allocated = stack_new(1024); + + pool_add_slab(p); + } + + return p; +} + + +/// Free object pool +void pool_free(pool * p) { + if (p) { + pool_drain(p); + + stack_free(p->allocated); + + free(p); + } +} + + +/// Drain pool -- free slabs previously allocated +void pool_drain(pool * p) { + if (p == NULL) + return; + + void * slab; + + while (p->allocated->size > 0) { + slab = stack_pop(p->allocated); + free(slab); + } + + p->next = NULL; + p->last = NULL; +} + + +/// Request memory for a new object from the pool +void * pool_allocate_object(pool * p) { + void * a = NULL; + + if (p->next == p->last) { + pool_add_slab(p); + } + + if (p->next < p->last) { + a = p->next; + + p->next += (p->object_size); + } + + return a; +} + diff --git a/src/object_pool.h b/src/object_pool.h new file mode 100644 index 0000000..a311c4a --- /dev/null +++ b/src/object_pool.h @@ -0,0 +1,101 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file object_pool.h + + @brief Allocate memory for "objects" in large slabs, rather than one at a time. Improves + performance when generating large numbers of small chunks of memory, as the expense of + allocating memory in larger units. Could cause difficulty in extreme low memory situations. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +#ifndef OBJECT_POOL_MULTIMARKDOWN_H +#define OBJECT_POOL_MULTIMARKDOWN_H + +#include "stack.h" + + +/// Structure for an object allocator pool +struct pool { + stack * allocated; //!< Stack of pointers to slabs that have been allocated + void * next; //!< Pointer to next available memory for allocation + void * last; //!< Pointer to end of available memory + short object_size; //!< Size of individual objects to be allocated + + char _PADDING[6]; //!< pad struct for alignment +}; + +typedef struct pool pool; + + +/// Allocate a new object pool +pool * pool_new( + short size //!< How big are the objects to be allocated +); + + +/// Free object pool +void pool_free( + pool * p //!< Pool to be freed +); + + +/// Drain pool -- free slabs previously allocated +void pool_drain( + pool * p //!< Pool to be drained +); + + +/// Request memory for a new object from the pool +void * pool_allocate_object( + pool * p //!< Pool to be used for allocation +); + + +#endif diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..f325eb0 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,1431 @@ +/* +** 2000-05-29 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Driver template for the LEMON parser generator. +** +** The "lemon" program processes an LALR(1) input grammar file, then uses +** this template to construct a parser. The "lemon" program inserts text +** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the +** interstitial "-" characters) contained in this template is changed into +** the value of the %name directive from the grammar. Otherwise, the content +** of this template is copied straight through into the generate parser +** source file. +** +** The following is the concatenation of all %include directives from the +** input grammar file: +*/ +#include +/************ Begin %include sections from the grammar ************************/ + + #include + #include + #include + + #include "libMultiMarkdown.h" + #include "mmd.h" + #include "parser.h" + #include "token.h" +/**************** End of %include directives **********************************/ +/* These constants specify the various numeric values for terminal symbols +** in a format understandable to "makeheaders". This section is blank unless +** "lemon" is run with the "-m" command-line option. +***************** Begin makeheaders token definitions *************************/ +/**************** End makeheaders token definitions ***************************/ + +/* The next sections is a series of control #defines. +** various aspects of the generated parser. +** YYCODETYPE is the data type used to store the integer codes +** that represent terminal and non-terminal symbols. +** "unsigned char" is used if there are fewer than +** 256 symbols. Larger types otherwise. +** YYNOCODE is a number of type YYCODETYPE that is not used for +** any terminal or nonterminal symbol. +** YYFALLBACK If defined, this indicates that one or more tokens +** (also known as: "terminal symbols") have fall-back +** values which should be used if the original symbol +** would not parse. This permits keywords to sometimes +** be used as identifiers, for example. +** YYACTIONTYPE is the data type used for "action codes" - numbers +** that indicate what to do in response to the next +** token. +** ParseTOKENTYPE is the data type used for minor type for terminal +** symbols. Background: A "minor type" is a semantic +** value associated with a terminal or non-terminal +** symbols. For example, for an "ID" terminal symbol, +** the minor type might be the name of the identifier. +** Each non-terminal can have a different minor type. +** Terminal symbols all have the same minor type, though. +** This macros defines the minor type for terminal +** symbols. +** YYMINORTYPE is the data type used for all minor types. +** This is typically a union of many types, one of +** which is ParseTOKENTYPE. The entry in the union +** for terminal symbols is called "yy0". +** YYSTACKDEPTH is the maximum depth of the parser's stack. If +** zero the stack is dynamically sized using realloc() +** ParseARG_SDECL A static variable declaration for the %extra_argument +** ParseARG_PDECL A parameter declaration for the %extra_argument +** ParseARG_STORE Code to store %extra_argument into yypParser +** ParseARG_FETCH Code to extract %extra_argument from yypParser +** YYERRORSYMBOL is the code number of the error symbol. If not +** defined, then do no error processing. +** YYNSTATE the combined number of states. +** YYNRULE the number of rules in the grammar +** YY_MAX_SHIFT Maximum value for shift actions +** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions +** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions +** YY_MIN_REDUCE Maximum value for reduce actions +** YY_ERROR_ACTION The yy_action[] code for syntax error +** YY_ACCEPT_ACTION The yy_action[] code for accept +** YY_NO_ACTION The yy_action[] code for no-op +*/ +#ifndef INTERFACE +# define INTERFACE 1 +#endif +/************* Begin control #defines *****************************************/ +#define YYCODETYPE unsigned char +#define YYNOCODE 54 +#define YYACTIONTYPE unsigned short int +#define ParseTOKENTYPE token * +typedef union { + int yyinit; + ParseTOKENTYPE yy0; +} YYMINORTYPE; +#ifndef YYSTACKDEPTH +#define YYSTACKDEPTH 100 +#endif +#define ParseARG_SDECL mmd_engine * engine ; +#define ParseARG_PDECL , mmd_engine * engine +#define ParseARG_FETCH mmd_engine * engine = yypParser->engine +#define ParseARG_STORE yypParser->engine = engine +#define YYFALLBACK 1 +#define YYNSTATE 38 +#define YYNRULE 85 +#define YY_MAX_SHIFT 37 +#define YY_MIN_SHIFTREDUCE 88 +#define YY_MAX_SHIFTREDUCE 172 +#define YY_MIN_REDUCE 173 +#define YY_MAX_REDUCE 257 +#define YY_ERROR_ACTION 258 +#define YY_ACCEPT_ACTION 259 +#define YY_NO_ACTION 260 +/************* End control #defines *******************************************/ + +/* Define the yytestcase() macro to be a no-op if is not already defined +** otherwise. +** +** Applications can choose to define yytestcase() in the %include section +** to a macro that can assist in verifying code coverage. For production +** code the yytestcase() macro should be turned off. But it is useful +** for testing. +*/ +#ifndef yytestcase +# define yytestcase(X) +#endif + + +/* Next are the tables used to determine what action to take based on the +** current state and lookahead token. These tables are used to implement +** functions that take a state number and lookahead value and return an +** action integer. +** +** Suppose the action integer is N. Then the action is determined as +** follows +** +** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead +** token onto the stack and goto state N. +** +** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then +** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE. +** +** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE +** and YY_MAX_REDUCE +** +** N == YY_ERROR_ACTION A syntax error has occurred. +** +** N == YY_ACCEPT_ACTION The parser accepts its input. +** +** N == YY_NO_ACTION No such action. Denotes unused +** slots in the yy_action[] table. +** +** The action table is constructed as a single large table named yy_action[]. +** Given state S and lookahead X, the action is computed as either: +** +** (A) N = yy_action[ yy_shift_ofst[S] + X ] +** (B) N = yy_default[S] +** +** The (A) formula is preferred. The B formula is used instead if: +** (1) The yy_shift_ofst[S]+X value is out of range, or +** (2) yy_lookahead[yy_shift_ofst[S]+X] is not equal to X, or +** (3) yy_shift_ofst[S] equal YY_SHIFT_USE_DFLT. +** (Implementation note: YY_SHIFT_USE_DFLT is chosen so that +** YY_SHIFT_USE_DFLT+X will be out of range for all possible lookaheads X. +** Hence only tests (1) and (2) need to be evaluated.) +** +** The formulas above are for computing the action when the lookahead is +** a terminal symbol. If the lookahead is a non-terminal (as occurs after +** a reduce action) then the yy_reduce_ofst[] array is used in place of +** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of +** YY_SHIFT_USE_DFLT. +** +** The following are the tables generated in this section: +** +** yy_action[] A single table containing all actions. +** yy_lookahead[] A table containing the lookahead for each entry in +** yy_action. Used to detect hash collisions. +** yy_shift_ofst[] For each state, the offset into yy_action for +** shifting terminals. +** yy_reduce_ofst[] For each state, the offset into yy_action for +** shifting non-terminals after a reduce. +** yy_default[] Default action for each state. +** +*********** Begin parsing tables **********************************************/ +#define YY_ACTTAB_COUNT (162) +static const YYACTIONTYPE yy_action[] = { + /* 0 */ 173, 153, 20, 147, 148, 93, 94, 95, 96, 97, + /* 10 */ 98, 104, 153, 14, 5, 4, 162, 3, 2, 15, + /* 20 */ 13, 12, 11, 20, 147, 148, 93, 94, 95, 96, + /* 30 */ 97, 98, 104, 153, 14, 5, 4, 162, 3, 2, + /* 40 */ 15, 13, 12, 11, 259, 1, 90, 91, 10, 37, + /* 50 */ 35, 33, 102, 36, 105, 106, 107, 108, 109, 112, + /* 60 */ 112, 26, 26, 27, 27, 35, 168, 33, 89, 91, + /* 70 */ 10, 37, 35, 33, 102, 36, 105, 106, 107, 108, + /* 80 */ 109, 28, 146, 146, 28, 169, 28, 35, 143, 33, + /* 90 */ 25, 6, 6, 153, 7, 7, 138, 28, 16, 111, + /* 100 */ 16, 18, 28, 18, 171, 126, 28, 8, 8, 28, + /* 110 */ 29, 29, 9, 9, 21, 172, 21, 111, 171, 23, + /* 120 */ 111, 23, 28, 17, 141, 17, 19, 28, 19, 172, + /* 130 */ 115, 151, 152, 111, 171, 157, 4, 121, 111, 22, + /* 140 */ 150, 22, 151, 152, 24, 172, 24, 156, 30, 30, + /* 150 */ 5, 113, 31, 31, 139, 32, 32, 34, 34, 116, + /* 160 */ 129, 113, +}; +static const YYCODETYPE yy_lookahead[] = { + /* 0 */ 0, 12, 2, 3, 4, 5, 6, 7, 8, 9, + /* 10 */ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + /* 20 */ 20, 21, 22, 2, 3, 4, 5, 6, 7, 8, + /* 30 */ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + /* 40 */ 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, + /* 50 */ 30, 31, 32, 33, 34, 35, 36, 37, 38, 41, + /* 60 */ 42, 51, 52, 51, 52, 45, 1, 47, 26, 27, + /* 70 */ 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + /* 80 */ 38, 29, 1, 1, 29, 20, 29, 45, 52, 47, + /* 90 */ 42, 39, 40, 12, 39, 40, 50, 29, 46, 40, + /* 100 */ 48, 46, 29, 48, 1, 48, 29, 39, 40, 29, + /* 110 */ 49, 50, 39, 40, 46, 12, 48, 40, 1, 46, + /* 120 */ 40, 48, 29, 46, 21, 48, 46, 29, 48, 12, + /* 130 */ 44, 3, 4, 40, 1, 1, 15, 47, 40, 46, + /* 140 */ 12, 48, 3, 4, 46, 12, 48, 13, 43, 44, + /* 150 */ 14, 12, 39, 40, 21, 39, 40, 39, 40, 45, + /* 160 */ 16, 12, +}; +#define YY_SHIFT_USE_DFLT (162) +#define YY_SHIFT_COUNT (37) +#define YY_SHIFT_MIN (-11) +#define YY_SHIFT_MAX (149) +static const short yy_shift_ofst[] = { + /* 0 */ 21, 0, 81, 81, 81, 81, 81, 81, 81, 81, + /* 10 */ 128, 117, 117, 65, 134, 82, -11, -11, -11, -11, + /* 20 */ 82, -11, -11, -11, -11, 82, 103, 133, 139, 65, + /* 30 */ 134, 82, 82, 121, 82, 136, 144, 149, +}; +#define YY_REDUCE_USE_DFLT (-1) +#define YY_REDUCE_COUNT (35) +#define YY_REDUCE_MIN (0) +#define YY_REDUCE_MAX (118) +static const signed char yy_reduce_ofst[] = { + /* 0 */ 20, 42, 52, 55, 68, 73, 77, 80, 93, 98, + /* 10 */ 18, 10, 12, 61, 105, 113, 57, 57, 57, 57, + /* 20 */ 116, 57, 57, 57, 57, 118, 36, 36, 48, 46, + /* 30 */ 86, 59, 59, 90, 59, 114, +}; +static const YYACTIONTYPE yy_default[] = { + /* 0 */ 258, 258, 249, 248, 210, 205, 219, 216, 208, 203, + /* 10 */ 177, 258, 258, 251, 239, 250, 220, 218, 217, 215, + /* 20 */ 229, 209, 207, 204, 202, 213, 227, 225, 246, 222, + /* 30 */ 199, 221, 195, 186, 212, 185, 188, 184, +}; +/********** End of lemon-generated parsing tables *****************************/ + +/* The next table maps tokens (terminal symbols) into fallback tokens. +** If a construct like the following: +** +** %fallback ID X Y Z. +** +** appears in the grammar, then ID becomes a fallback token for X, Y, +** and Z. Whenever one of the tokens X, Y, or Z is input to the parser +** but it does not parse, the type of the token is changed to ID and +** the parse is retried before an error is thrown. +** +** This feature can be used, for example, to cause some keywords in a language +** to revert to identifiers if they keyword does not apply in the context where +** it appears. +*/ +#ifdef YYFALLBACK +static const YYCODETYPE yyFallback[] = { + 0, /* $ => nothing */ + 0, /* LINE_CONTINUATION => nothing */ + 1, /* LINE_PLAIN => LINE_CONTINUATION */ + 1, /* LINE_INDENTED_TAB => LINE_CONTINUATION */ + 1, /* LINE_INDENTED_SPACE => LINE_CONTINUATION */ +}; +#endif /* YYFALLBACK */ + +/* The following structure represents a single element of the +** parser's stack. Information stored includes: +** +** + The state number for the parser at this level of the stack. +** +** + The value of the token stored at this level of the stack. +** (In other words, the "major" token.) +** +** + The semantic value stored at this level of the stack. This is +** the information used by the action routines in the grammar. +** It is sometimes called the "minor" token. +** +** After the "shift" half of a SHIFTREDUCE action, the stateno field +** actually contains the reduce action for the second half of the +** SHIFTREDUCE. +*/ +struct yyStackEntry { + YYACTIONTYPE stateno; /* The state-number, or reduce action in SHIFTREDUCE */ + YYCODETYPE major; /* The major token value. This is the code + ** number for the token at this stack level */ + YYMINORTYPE minor; /* The user-supplied minor token value. This + ** is the value of the token */ +}; +typedef struct yyStackEntry yyStackEntry; + +/* The state of the parser is completely contained in an instance of +** the following structure */ +struct yyParser { + yyStackEntry *yytos; /* Pointer to top element of the stack */ +#ifdef YYTRACKMAXSTACKDEPTH + int yyhwm; /* High-water mark of the stack */ +#endif +#ifndef YYNOERRORRECOVERY + int yyerrcnt; /* Shifts left before out of the error */ +#endif + ParseARG_SDECL /* A place to hold %extra_argument */ +#if YYSTACKDEPTH<=0 + int yystksz; /* Current side of the stack */ + yyStackEntry *yystack; /* The parser's stack */ + yyStackEntry yystk0; /* First stack entry */ +#else + yyStackEntry yystack[YYSTACKDEPTH]; /* The parser's stack */ +#endif +}; +typedef struct yyParser yyParser; + +#ifndef NDEBUG +#include +static FILE *yyTraceFILE = 0; +static char *yyTracePrompt = 0; +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* +** Turn parser tracing on by giving a stream to which to write the trace +** and a prompt to preface each trace message. Tracing is turned off +** by making either argument NULL +** +** Inputs: +**
      +**
    • A FILE* to which trace output should be written. +** If NULL, then tracing is turned off. +**
    • A prefix string written at the beginning of every +** line of trace output. If NULL, then tracing is +** turned off. +**
    +** +** Outputs: +** None. +*/ +void ParseTrace(FILE *TraceFILE, char *zTracePrompt){ + yyTraceFILE = TraceFILE; + yyTracePrompt = zTracePrompt; + if( yyTraceFILE==0 ) yyTracePrompt = 0; + else if( yyTracePrompt==0 ) yyTraceFILE = 0; +} +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* For tracing shifts, the names of all terminals and nonterminals +** are required. The following table supplies these names */ +static const char *const yyTokenName[] = { + "$", "LINE_CONTINUATION", "LINE_PLAIN", "LINE_INDENTED_TAB", + "LINE_INDENTED_SPACE", "LINE_ATX_1", "LINE_ATX_2", "LINE_ATX_3", + "LINE_ATX_4", "LINE_ATX_5", "LINE_ATX_6", "LINE_HR", + "LINE_EMPTY", "LINE_BLOCKQUOTE", "LINE_LIST_BULLETED", "LINE_LIST_ENUMERATED", + "LINE_TABLE", "LINE_DEF_CITATION", "LINE_DEF_FOOTNOTE", "LINE_DEF_LINK", + "LINE_HTML", "LINE_FENCE_BACKTICK", "LINE_FENCE_BACKTICK_START", "error", + "doc", "blocks", "block", "para", + "indented_code", "empty", "list_bulleted", "list_enumerated", + "blockquote", "table", "def_citation", "def_footnote", + "def_link", "html_block", "fenced_block", "para_lines", + "para_line", "code_line", "indented_line", "quote_lines", + "quote_line", "item_bulleted", "cont_blocks", "item_enumerated", + "cont_block", "html_block_lines", "html_block_line", "fenced_lines", + "fenced_line", +}; +#endif /* NDEBUG */ + +#ifndef NDEBUG +/* For tracing reduce actions, the names of all rules are required. +*/ +static const char *const yyRuleName[] = { + /* 0 */ "doc ::= blocks", + /* 1 */ "blocks ::= blocks block", + /* 2 */ "blocks ::= block", + /* 3 */ "block ::= para", + /* 4 */ "block ::= indented_code", + /* 5 */ "block ::= LINE_ATX_1", + /* 6 */ "block ::= LINE_ATX_2", + /* 7 */ "block ::= LINE_ATX_3", + /* 8 */ "block ::= LINE_ATX_4", + /* 9 */ "block ::= LINE_ATX_5", + /* 10 */ "block ::= LINE_ATX_6", + /* 11 */ "block ::= empty", + /* 12 */ "block ::= list_bulleted", + /* 13 */ "block ::= list_enumerated", + /* 14 */ "block ::= blockquote", + /* 15 */ "block ::= table", + /* 16 */ "block ::= LINE_HR", + /* 17 */ "block ::= def_citation", + /* 18 */ "block ::= def_footnote", + /* 19 */ "block ::= def_link", + /* 20 */ "block ::= html_block", + /* 21 */ "block ::= fenced_block", + /* 22 */ "para ::= LINE_PLAIN para_lines", + /* 23 */ "para_lines ::= para_lines para_line", + /* 24 */ "indented_code ::= indented_code code_line", + /* 25 */ "empty ::= empty LINE_EMPTY", + /* 26 */ "blockquote ::= LINE_BLOCKQUOTE quote_lines", + /* 27 */ "quote_lines ::= quote_lines quote_line", + /* 28 */ "list_bulleted ::= list_bulleted item_bulleted", + /* 29 */ "item_bulleted ::= LINE_LIST_BULLETED para_lines cont_blocks", + /* 30 */ "item_bulleted ::= LINE_LIST_BULLETED para_lines", + /* 31 */ "item_bulleted ::= LINE_LIST_BULLETED cont_blocks", + /* 32 */ "item_bulleted ::= LINE_LIST_BULLETED", + /* 33 */ "list_enumerated ::= list_enumerated item_enumerated", + /* 34 */ "item_enumerated ::= LINE_LIST_ENUMERATED para_lines cont_blocks", + /* 35 */ "item_enumerated ::= LINE_LIST_ENUMERATED para_lines", + /* 36 */ "item_enumerated ::= LINE_LIST_ENUMERATED cont_blocks", + /* 37 */ "item_enumerated ::= LINE_LIST_ENUMERATED", + /* 38 */ "cont_blocks ::= cont_blocks cont_block", + /* 39 */ "cont_block ::= empty indented_line para_lines", + /* 40 */ "cont_block ::= empty indented_line", + /* 41 */ "table ::= table LINE_TABLE", + /* 42 */ "def_citation ::= LINE_DEF_CITATION para_lines cont_blocks", + /* 43 */ "def_citation ::= LINE_DEF_CITATION para_lines", + /* 44 */ "def_citation ::= LINE_DEF_CITATION cont_blocks", + /* 45 */ "def_footnote ::= LINE_DEF_FOOTNOTE para_lines cont_blocks", + /* 46 */ "def_footnote ::= LINE_DEF_FOOTNOTE para_lines", + /* 47 */ "def_footnote ::= LINE_DEF_FOOTNOTE cont_blocks", + /* 48 */ "def_link ::= LINE_DEF_LINK para_lines", + /* 49 */ "html_block ::= LINE_HTML html_block_lines", + /* 50 */ "html_block_lines ::= html_block_lines html_block_line", + /* 51 */ "fenced_block ::= LINE_FENCE_BACKTICK fenced_lines LINE_FENCE_BACKTICK", + /* 52 */ "fenced_block ::= LINE_FENCE_BACKTICK fenced_lines", + /* 53 */ "fenced_block ::= LINE_FENCE_BACKTICK_START fenced_lines LINE_FENCE_BACKTICK", + /* 54 */ "fenced_block ::= LINE_FENCE_BACKTICK_START fenced_lines", + /* 55 */ "fenced_lines ::= fenced_lines fenced_line", + /* 56 */ "para ::= LINE_PLAIN", + /* 57 */ "para_lines ::= para_line", + /* 58 */ "para_line ::= LINE_CONTINUATION", + /* 59 */ "indented_code ::= LINE_INDENTED_TAB", + /* 60 */ "indented_code ::= LINE_INDENTED_SPACE", + /* 61 */ "code_line ::= indented_line", + /* 62 */ "code_line ::= LINE_EMPTY", + /* 63 */ "indented_line ::= LINE_INDENTED_TAB", + /* 64 */ "indented_line ::= LINE_INDENTED_SPACE", + /* 65 */ "empty ::= LINE_EMPTY", + /* 66 */ "blockquote ::= LINE_BLOCKQUOTE", + /* 67 */ "quote_lines ::= quote_line", + /* 68 */ "quote_line ::= LINE_BLOCKQUOTE", + /* 69 */ "quote_line ::= LINE_CONTINUATION", + /* 70 */ "list_bulleted ::= item_bulleted", + /* 71 */ "list_enumerated ::= item_enumerated", + /* 72 */ "cont_blocks ::= cont_block", + /* 73 */ "cont_block ::= empty", + /* 74 */ "table ::= LINE_TABLE", + /* 75 */ "def_citation ::= LINE_DEF_CITATION", + /* 76 */ "def_footnote ::= LINE_DEF_FOOTNOTE", + /* 77 */ "def_link ::= LINE_DEF_LINK", + /* 78 */ "html_block ::= LINE_HTML", + /* 79 */ "html_block_lines ::= html_block_line", + /* 80 */ "html_block_line ::= LINE_CONTINUATION", + /* 81 */ "html_block_line ::= LINE_HTML", + /* 82 */ "fenced_lines ::= fenced_line", + /* 83 */ "fenced_line ::= LINE_CONTINUATION", + /* 84 */ "fenced_line ::= LINE_EMPTY", +}; +#endif /* NDEBUG */ + + +#if YYSTACKDEPTH<=0 +/* +** Try to increase the size of the parser stack. Return the number +** of errors. Return 0 on success. +*/ +static int yyGrowStack(yyParser *p){ + int newSize; + int idx; + yyStackEntry *pNew; + + newSize = p->yystksz*2 + 100; + idx = p->yytos ? (int)(p->yytos - p->yystack) : 0; + if( p->yystack==&p->yystk0 ){ + pNew = malloc(newSize*sizeof(pNew[0])); + if( pNew ) pNew[0] = p->yystk0; + }else{ + pNew = realloc(p->yystack, newSize*sizeof(pNew[0])); + } + if( pNew ){ + p->yystack = pNew; + p->yytos = &p->yystack[idx]; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sStack grows from %d to %d entries.\n", + yyTracePrompt, p->yystksz, newSize); + } +#endif + p->yystksz = newSize; + } + return pNew==0; +} +#endif + +/* Datatype of the argument to the memory allocated passed as the +** second argument to ParseAlloc() below. This can be changed by +** putting an appropriate #define in the %include section of the input +** grammar. +*/ +#ifndef YYMALLOCARGTYPE +# define YYMALLOCARGTYPE size_t +#endif + +/* +** This function allocates a new parser. +** The only argument is a pointer to a function which works like +** malloc. +** +** Inputs: +** A pointer to the function used to allocate memory. +** +** Outputs: +** A pointer to a parser. This pointer is used in subsequent calls +** to Parse and ParseFree. +*/ +void *ParseAlloc(void *(*mallocProc)(YYMALLOCARGTYPE)){ + yyParser *pParser; + pParser = (yyParser*)(*mallocProc)( (YYMALLOCARGTYPE)sizeof(yyParser) ); + if( pParser ){ +#ifdef YYTRACKMAXSTACKDEPTH + pParser->yyhwm = 0; +#endif +#if YYSTACKDEPTH<=0 + pParser->yytos = NULL; + pParser->yystack = NULL; + pParser->yystksz = 0; + if( yyGrowStack(pParser) ){ + pParser->yystack = &pParser->yystk0; + pParser->yystksz = 1; + } +#endif +#ifndef YYNOERRORRECOVERY + pParser->yyerrcnt = -1; +#endif + pParser->yytos = pParser->yystack; + pParser->yystack[0].stateno = 0; + pParser->yystack[0].major = 0; + } + return pParser; +} + +/* The following function deletes the "minor type" or semantic value +** associated with a symbol. The symbol can be either a terminal +** or nonterminal. "yymajor" is the symbol code, and "yypminor" is +** a pointer to the value to be deleted. The code used to do the +** deletions is derived from the %destructor and/or %token_destructor +** directives of the input grammar. +*/ +static void yy_destructor( + yyParser *yypParser, /* The parser */ + YYCODETYPE yymajor, /* Type code for object to destroy */ + YYMINORTYPE *yypminor /* The object to be destroyed */ +){ + ParseARG_FETCH; + switch( yymajor ){ + /* Here is inserted the actions which take place when a + ** terminal or non-terminal is destroyed. This can happen + ** when the symbol is popped from the stack during a + ** reduce or during error processing or when a parser is + ** being destroyed before it is finished parsing. + ** + ** Note: during a reduce, the only symbols destroyed are those + ** which appear on the RHS of the rule, but which are *not* used + ** inside the C code. + */ +/********* Begin destructor definitions ***************************************/ +/********* End destructor definitions *****************************************/ + default: break; /* If no destructor action specified: do nothing */ + } +} + +/* +** Pop the parser's stack once. +** +** If there is a destructor routine associated with the token which +** is popped from the stack, then call it. +*/ +static void yy_pop_parser_stack(yyParser *pParser){ + yyStackEntry *yytos; + assert( pParser->yytos!=0 ); + assert( pParser->yytos > pParser->yystack ); + yytos = pParser->yytos--; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sPopping %s\n", + yyTracePrompt, + yyTokenName[yytos->major]); + } +#endif + yy_destructor(pParser, yytos->major, &yytos->minor); +} + +/* +** Deallocate and destroy a parser. Destructors are called for +** all stack elements before shutting the parser down. +** +** If the YYPARSEFREENEVERNULL macro exists (for example because it +** is defined in a %include section of the input grammar) then it is +** assumed that the input pointer is never NULL. +*/ +void ParseFree( + void *p, /* The parser to be deleted */ + void (*freeProc)(void*) /* Function used to reclaim memory */ +){ + yyParser *pParser = (yyParser*)p; +#ifndef YYPARSEFREENEVERNULL + if( pParser==0 ) return; +#endif + while( pParser->yytos>pParser->yystack ) yy_pop_parser_stack(pParser); +#if YYSTACKDEPTH<=0 + if( pParser->yystack!=&pParser->yystk0 ) free(pParser->yystack); +#endif + (*freeProc)((void*)pParser); +} + +/* +** Return the peak depth of the stack for a parser. +*/ +#ifdef YYTRACKMAXSTACKDEPTH +int ParseStackPeak(void *p){ + yyParser *pParser = (yyParser*)p; + return pParser->yyhwm; +} +#endif + +/* +** Find the appropriate action for a parser given the terminal +** look-ahead token iLookAhead. +*/ +static unsigned int yy_find_shift_action( + yyParser *pParser, /* The parser */ + YYCODETYPE iLookAhead /* The look-ahead token */ +){ + int i; + int stateno = pParser->yytos->stateno; + + if( stateno>=YY_MIN_REDUCE ) return stateno; + assert( stateno <= YY_SHIFT_COUNT ); + do{ + i = yy_shift_ofst[stateno]; + assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; + if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ +#ifdef YYFALLBACK + YYCODETYPE iFallback; /* Fallback token */ + if( iLookAhead %s\n", + yyTracePrompt, yyTokenName[iLookAhead], yyTokenName[iFallback]); + } +#endif + assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */ + iLookAhead = iFallback; + continue; + } +#endif +#ifdef YYWILDCARD + { + int j = i - iLookAhead + YYWILDCARD; + if( +#if YY_SHIFT_MIN+YYWILDCARD<0 + j>=0 && +#endif +#if YY_SHIFT_MAX+YYWILDCARD>=YY_ACTTAB_COUNT + j0 + ){ +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE, "%sWILDCARD %s => %s\n", + yyTracePrompt, yyTokenName[iLookAhead], + yyTokenName[YYWILDCARD]); + } +#endif /* NDEBUG */ + return yy_action[j]; + } + } +#endif /* YYWILDCARD */ + return yy_default[stateno]; + }else{ + return yy_action[i]; + } + }while(1); +} + +/* +** Find the appropriate action for a parser given the non-terminal +** look-ahead token iLookAhead. +*/ +static int yy_find_reduce_action( + int stateno, /* Current state number */ + YYCODETYPE iLookAhead /* The look-ahead token */ +){ + int i; +#ifdef YYERRORSYMBOL + if( stateno>YY_REDUCE_COUNT ){ + return yy_default[stateno]; + } +#else + assert( stateno<=YY_REDUCE_COUNT ); +#endif + i = yy_reduce_ofst[stateno]; + assert( i!=YY_REDUCE_USE_DFLT ); + assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; +#ifdef YYERRORSYMBOL + if( i<0 || i>=YY_ACTTAB_COUNT || yy_lookahead[i]!=iLookAhead ){ + return yy_default[stateno]; + } +#else + assert( i>=0 && iyytos--; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sStack Overflow!\n",yyTracePrompt); + } +#endif + while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will execute if the parser + ** stack every overflows */ +/******** Begin %stack_overflow code ******************************************/ +/******** End %stack_overflow code ********************************************/ + ParseARG_STORE; /* Suppress warning about unused %extra_argument var */ +} + +/* +** Print tracing information for a SHIFT action +*/ +#ifndef NDEBUG +static void yyTraceShift(yyParser *yypParser, int yyNewState){ + if( yyTraceFILE ){ + if( yyNewStateyytos->major], + yyNewState); + }else{ + fprintf(yyTraceFILE,"%sShift '%s'\n", + yyTracePrompt,yyTokenName[yypParser->yytos->major]); + } + } +} +#else +# define yyTraceShift(X,Y) +#endif + +/* +** Perform a shift action. +*/ +static void yy_shift( + yyParser *yypParser, /* The parser to be shifted */ + int yyNewState, /* The new state to shift in */ + int yyMajor, /* The major token to shift in */ + ParseTOKENTYPE yyMinor /* The minor token to shift in */ +){ + yyStackEntry *yytos; + yypParser->yytos++; +#ifdef YYTRACKMAXSTACKDEPTH + if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){ + yypParser->yyhwm++; + assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) ); + } +#endif +#if YYSTACKDEPTH>0 + if( yypParser->yytos>=&yypParser->yystack[YYSTACKDEPTH] ){ + yyStackOverflow(yypParser); + return; + } +#else + if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz] ){ + if( yyGrowStack(yypParser) ){ + yyStackOverflow(yypParser); + return; + } + } +#endif + if( yyNewState > YY_MAX_SHIFT ){ + yyNewState += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; + } + yytos = yypParser->yytos; + yytos->stateno = (YYACTIONTYPE)yyNewState; + yytos->major = (YYCODETYPE)yyMajor; + yytos->minor.yy0 = yyMinor; + yyTraceShift(yypParser, yyNewState); +} + +/* The following table contains information about every rule that +** is used during the reduce. +*/ +static const struct { + YYCODETYPE lhs; /* Symbol on the left-hand side of the rule */ + unsigned char nrhs; /* Number of right-hand side symbols in the rule */ +} yyRuleInfo[] = { + { 24, 1 }, + { 25, 2 }, + { 25, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 26, 1 }, + { 27, 2 }, + { 39, 2 }, + { 28, 2 }, + { 29, 2 }, + { 32, 2 }, + { 43, 2 }, + { 30, 2 }, + { 45, 3 }, + { 45, 2 }, + { 45, 2 }, + { 45, 1 }, + { 31, 2 }, + { 47, 3 }, + { 47, 2 }, + { 47, 2 }, + { 47, 1 }, + { 46, 2 }, + { 48, 3 }, + { 48, 2 }, + { 33, 2 }, + { 34, 3 }, + { 34, 2 }, + { 34, 2 }, + { 35, 3 }, + { 35, 2 }, + { 35, 2 }, + { 36, 2 }, + { 37, 2 }, + { 49, 2 }, + { 38, 3 }, + { 38, 2 }, + { 38, 3 }, + { 38, 2 }, + { 51, 2 }, + { 27, 1 }, + { 39, 1 }, + { 40, 1 }, + { 28, 1 }, + { 28, 1 }, + { 41, 1 }, + { 41, 1 }, + { 42, 1 }, + { 42, 1 }, + { 29, 1 }, + { 32, 1 }, + { 43, 1 }, + { 44, 1 }, + { 44, 1 }, + { 30, 1 }, + { 31, 1 }, + { 46, 1 }, + { 48, 1 }, + { 33, 1 }, + { 34, 1 }, + { 35, 1 }, + { 36, 1 }, + { 37, 1 }, + { 49, 1 }, + { 50, 1 }, + { 50, 1 }, + { 51, 1 }, + { 52, 1 }, + { 52, 1 }, +}; + +static void yy_accept(yyParser*); /* Forward Declaration */ + +/* +** Perform a reduce action and the shift that must immediately +** follow the reduce. +*/ +static void yy_reduce( + yyParser *yypParser, /* The parser */ + unsigned int yyruleno /* Number of the rule by which to reduce */ +){ + int yygoto; /* The next state */ + int yyact; /* The next action */ + yyStackEntry *yymsp; /* The top of the parser's stack */ + int yysize; /* Amount to pop the stack */ + ParseARG_FETCH; + yymsp = yypParser->yytos; +#ifndef NDEBUG + if( yyTraceFILE && yyruleno<(int)(sizeof(yyRuleName)/sizeof(yyRuleName[0])) ){ + yysize = yyRuleInfo[yyruleno].nrhs; + fprintf(yyTraceFILE, "%sReduce [%s], go to state %d.\n", yyTracePrompt, + yyRuleName[yyruleno], yymsp[-yysize].stateno); + } +#endif /* NDEBUG */ + + /* Check that the stack is large enough to grow by a single entry + ** if the RHS of the rule is empty. This ensures that there is room + ** enough on the stack to push the LHS value */ + if( yyRuleInfo[yyruleno].nrhs==0 ){ +#ifdef YYTRACKMAXSTACKDEPTH + if( (int)(yypParser->yytos - yypParser->yystack)>yypParser->yyhwm ){ + yypParser->yyhwm++; + assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack)); + } +#endif +#if YYSTACKDEPTH>0 + if( yypParser->yytos>=&yypParser->yystack[YYSTACKDEPTH-1] ){ + yyStackOverflow(yypParser); + return; + } +#else + if( yypParser->yytos>=&yypParser->yystack[yypParser->yystksz-1] ){ + if( yyGrowStack(yypParser) ){ + yyStackOverflow(yypParser); + return; + } + yymsp = yypParser->yytos; + } +#endif + } + + switch( yyruleno ){ + /* Beginning here are the reduction cases. A typical example + ** follows: + ** case 0: + ** #line + ** { ... } // User supplied code + ** #line + ** break; + */ +/********** Begin reduce actions **********************************************/ + YYMINORTYPE yylhsminor; + case 0: /* doc ::= blocks */ +{ engine->root = yymsp[0].minor.yy0; } + break; + case 1: /* blocks ::= blocks block */ +{ + strip_line_tokens_from_block(yymsp[0].minor.yy0); + if (yymsp[-1].minor.yy0 == NULL) { yymsp[-1].minor.yy0 = yymsp[0].minor.yy0; yymsp[0].minor.yy0 = NULL;} + yylhsminor.yy0 = yymsp[-1].minor.yy0; + token_chain_append(yylhsminor.yy0, yymsp[0].minor.yy0); + #ifndef NDEBUG + fprintf(stderr, "Next block %d\n", yylhsminor.yy0->tail->type); + #endif + } + yymsp[-1].minor.yy0 = yylhsminor.yy0; + break; + case 2: /* blocks ::= block */ +{ + strip_line_tokens_from_block(yymsp[0].minor.yy0); + #ifndef NDEBUG + fprintf(stderr, "First block %d\n", yymsp[0].minor.yy0->type); + #endif + yylhsminor.yy0 = yymsp[0].minor.yy0; + } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 3: /* block ::= para */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_PARA); is_para_html(engine, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 4: /* block ::= indented_code */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_CODE_INDENTED); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 5: /* block ::= LINE_ATX_1 */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_H1); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 6: /* block ::= LINE_ATX_2 */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_H2); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 7: /* block ::= LINE_ATX_3 */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_H3); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 8: /* block ::= LINE_ATX_4 */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_H4); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 9: /* block ::= LINE_ATX_5 */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_H5); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 10: /* block ::= LINE_ATX_6 */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_H6); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 11: /* block ::= empty */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_EMPTY); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 12: /* block ::= list_bulleted */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_LIST_BULLETED); is_list_loose(yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 13: /* block ::= list_enumerated */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_LIST_ENUMERATED); is_list_loose(yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 14: /* block ::= blockquote */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_BLOCKQUOTE); recursive_parse_blockquote(engine, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 15: /* block ::= table */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_TABLE); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 16: /* block ::= LINE_HR */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_HR); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 17: /* block ::= def_citation */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_DEF_CITATION); stack_push(engine->definition_stack, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 18: /* block ::= def_footnote */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_DEF_FOOTNOTE); stack_push(engine->definition_stack, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 19: /* block ::= def_link */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_DEF_LINK); stack_push(engine->definition_stack, yylhsminor.yy0); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 20: /* block ::= html_block */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_HTML); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 21: /* block ::= fenced_block */ +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_CODE_FENCED); yymsp[0].minor.yy0->child->type = CODE_FENCE; } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 22: /* para ::= LINE_PLAIN para_lines */ + case 23: /* para_lines ::= para_lines para_line */ yytestcase(yyruleno==23); + case 24: /* indented_code ::= indented_code code_line */ yytestcase(yyruleno==24); + case 25: /* empty ::= empty LINE_EMPTY */ yytestcase(yyruleno==25); + case 26: /* blockquote ::= LINE_BLOCKQUOTE quote_lines */ yytestcase(yyruleno==26); + case 27: /* quote_lines ::= quote_lines quote_line */ yytestcase(yyruleno==27); + case 28: /* list_bulleted ::= list_bulleted item_bulleted */ yytestcase(yyruleno==28); + case 33: /* list_enumerated ::= list_enumerated item_enumerated */ yytestcase(yyruleno==33); + case 38: /* cont_blocks ::= cont_blocks cont_block */ yytestcase(yyruleno==38); + case 41: /* table ::= table LINE_TABLE */ yytestcase(yyruleno==41); + case 43: /* def_citation ::= LINE_DEF_CITATION para_lines */ yytestcase(yyruleno==43); + case 44: /* def_citation ::= LINE_DEF_CITATION cont_blocks */ yytestcase(yyruleno==44); + case 46: /* def_footnote ::= LINE_DEF_FOOTNOTE para_lines */ yytestcase(yyruleno==46); + case 47: /* def_footnote ::= LINE_DEF_FOOTNOTE cont_blocks */ yytestcase(yyruleno==47); + case 48: /* def_link ::= LINE_DEF_LINK para_lines */ yytestcase(yyruleno==48); + case 49: /* html_block ::= LINE_HTML html_block_lines */ yytestcase(yyruleno==49); + case 50: /* html_block_lines ::= html_block_lines html_block_line */ yytestcase(yyruleno==50); + case 52: /* fenced_block ::= LINE_FENCE_BACKTICK fenced_lines */ yytestcase(yyruleno==52); + case 54: /* fenced_block ::= LINE_FENCE_BACKTICK_START fenced_lines */ yytestcase(yyruleno==54); + case 55: /* fenced_lines ::= fenced_lines fenced_line */ yytestcase(yyruleno==55); +{ yylhsminor.yy0 = yymsp[-1].minor.yy0; token_chain_append(yymsp[-1].minor.yy0, yymsp[0].minor.yy0); } + yymsp[-1].minor.yy0 = yylhsminor.yy0; + break; + case 29: /* item_bulleted ::= LINE_LIST_BULLETED para_lines cont_blocks */ + case 34: /* item_enumerated ::= LINE_LIST_ENUMERATED para_lines cont_blocks */ yytestcase(yyruleno==34); +{ token_chain_append(yymsp[-2].minor.yy0, yymsp[-1].minor.yy0); token_chain_append(yymsp[-2].minor.yy0, yymsp[0].minor.yy0); yylhsminor.yy0 = token_new_parent(yymsp[-2].minor.yy0, BLOCK_LIST_ITEM); recursive_parse_list_item(engine, yylhsminor.yy0); } + yymsp[-2].minor.yy0 = yylhsminor.yy0; + break; + case 30: /* item_bulleted ::= LINE_LIST_BULLETED para_lines */ + case 35: /* item_enumerated ::= LINE_LIST_ENUMERATED para_lines */ yytestcase(yyruleno==35); +{ token_chain_append(yymsp[-1].minor.yy0, yymsp[0].minor.yy0); yylhsminor.yy0 = token_new_parent(yymsp[-1].minor.yy0, BLOCK_LIST_ITEM_TIGHT); recursive_parse_list_item(engine, yylhsminor.yy0); } + yymsp[-1].minor.yy0 = yylhsminor.yy0; + break; + case 31: /* item_bulleted ::= LINE_LIST_BULLETED cont_blocks */ +{ token_chain_append(yymsp[-1].minor.yy0, yymsp[0].minor.yy0); yylhsminor.yy0 = token_new_parent(yymsp[-1].minor.yy0, BLOCK_LIST_ITEM); if (yymsp[0].minor.yy0) {recursive_parse_list_item(engine, yylhsminor.yy0);} } + yymsp[-1].minor.yy0 = yylhsminor.yy0; + break; + case 32: /* item_bulleted ::= LINE_LIST_BULLETED */ + case 37: /* item_enumerated ::= LINE_LIST_ENUMERATED */ yytestcase(yyruleno==37); +{ yylhsminor.yy0 = token_new_parent(yymsp[0].minor.yy0, BLOCK_LIST_ITEM_TIGHT); } + yymsp[0].minor.yy0 = yylhsminor.yy0; + break; + case 36: /* item_enumerated ::= LINE_LIST_ENUMERATED cont_blocks */ +{ token_chain_append(yymsp[-1].minor.yy0, yymsp[0].minor.yy0); yylhsminor.yy0 = token_new_parent(yymsp[-1].minor.yy0, BLOCK_LIST_ITEM); recursive_parse_list_item(engine, yylhsminor.yy0); } + yymsp[-1].minor.yy0 = yylhsminor.yy0; + break; + case 39: /* cont_block ::= empty indented_line para_lines */ +{ yylhsminor.yy0 = yymsp[-2].minor.yy0; token_chain_append(yymsp[-2].minor.yy0, yymsp[-1].minor.yy0); token_chain_append(yymsp[-2].minor.yy0, yymsp[0].minor.yy0); yymsp[-1].minor.yy0->type = LINE_CONTINUATION; } + yymsp[-2].minor.yy0 = yylhsminor.yy0; + break; + case 40: /* cont_block ::= empty indented_line */ +{ yylhsminor.yy0 = yymsp[-1].minor.yy0; token_chain_append(yymsp[-1].minor.yy0, yymsp[0].minor.yy0); yymsp[0].minor.yy0->type = LINE_CONTINUATION; } + yymsp[-1].minor.yy0 = yylhsminor.yy0; + break; + case 42: /* def_citation ::= LINE_DEF_CITATION para_lines cont_blocks */ + case 45: /* def_footnote ::= LINE_DEF_FOOTNOTE para_lines cont_blocks */ yytestcase(yyruleno==45); +{ yylhsminor.yy0 = yymsp[-2].minor.yy0; token_chain_append(yymsp[-2].minor.yy0, yymsp[-1].minor.yy0); token_chain_append(yymsp[-2].minor.yy0, yymsp[0].minor.yy0); } + yymsp[-2].minor.yy0 = yylhsminor.yy0; + break; + case 51: /* fenced_block ::= LINE_FENCE_BACKTICK fenced_lines LINE_FENCE_BACKTICK */ + case 53: /* fenced_block ::= LINE_FENCE_BACKTICK_START fenced_lines LINE_FENCE_BACKTICK */ yytestcase(yyruleno==53); +{ yylhsminor.yy0 = yymsp[-2].minor.yy0; token_chain_append(yymsp[-2].minor.yy0, yymsp[-1].minor.yy0); token_chain_append(yymsp[-2].minor.yy0, yymsp[0].minor.yy0); yymsp[0].minor.yy0->child->type = CODE_FENCE; } + yymsp[-2].minor.yy0 = yylhsminor.yy0; + break; + default: + /* (56) para ::= LINE_PLAIN */ yytestcase(yyruleno==56); + /* (57) para_lines ::= para_line (OPTIMIZED OUT) */ assert(yyruleno!=57); + /* (58) para_line ::= LINE_CONTINUATION */ yytestcase(yyruleno==58); + /* (59) indented_code ::= LINE_INDENTED_TAB */ yytestcase(yyruleno==59); + /* (60) indented_code ::= LINE_INDENTED_SPACE */ yytestcase(yyruleno==60); + /* (61) code_line ::= indented_line (OPTIMIZED OUT) */ assert(yyruleno!=61); + /* (62) code_line ::= LINE_EMPTY */ yytestcase(yyruleno==62); + /* (63) indented_line ::= LINE_INDENTED_TAB */ yytestcase(yyruleno==63); + /* (64) indented_line ::= LINE_INDENTED_SPACE */ yytestcase(yyruleno==64); + /* (65) empty ::= LINE_EMPTY */ yytestcase(yyruleno==65); + /* (66) blockquote ::= LINE_BLOCKQUOTE */ yytestcase(yyruleno==66); + /* (67) quote_lines ::= quote_line (OPTIMIZED OUT) */ assert(yyruleno!=67); + /* (68) quote_line ::= LINE_BLOCKQUOTE */ yytestcase(yyruleno==68); + /* (69) quote_line ::= LINE_CONTINUATION */ yytestcase(yyruleno==69); + /* (70) list_bulleted ::= item_bulleted (OPTIMIZED OUT) */ assert(yyruleno!=70); + /* (71) list_enumerated ::= item_enumerated (OPTIMIZED OUT) */ assert(yyruleno!=71); + /* (72) cont_blocks ::= cont_block (OPTIMIZED OUT) */ assert(yyruleno!=72); + /* (73) cont_block ::= empty */ yytestcase(yyruleno==73); + /* (74) table ::= LINE_TABLE */ yytestcase(yyruleno==74); + /* (75) def_citation ::= LINE_DEF_CITATION */ yytestcase(yyruleno==75); + /* (76) def_footnote ::= LINE_DEF_FOOTNOTE */ yytestcase(yyruleno==76); + /* (77) def_link ::= LINE_DEF_LINK */ yytestcase(yyruleno==77); + /* (78) html_block ::= LINE_HTML */ yytestcase(yyruleno==78); + /* (79) html_block_lines ::= html_block_line (OPTIMIZED OUT) */ assert(yyruleno!=79); + /* (80) html_block_line ::= LINE_CONTINUATION */ yytestcase(yyruleno==80); + /* (81) html_block_line ::= LINE_HTML */ yytestcase(yyruleno==81); + /* (82) fenced_lines ::= fenced_line (OPTIMIZED OUT) */ assert(yyruleno!=82); + /* (83) fenced_line ::= LINE_CONTINUATION */ yytestcase(yyruleno==83); + /* (84) fenced_line ::= LINE_EMPTY */ yytestcase(yyruleno==84); + break; +/********** End reduce actions ************************************************/ + }; + assert( yyrulenoYY_MAX_SHIFT ){ + yyact += YY_MIN_REDUCE - YY_MIN_SHIFTREDUCE; + } + yymsp -= yysize-1; + yypParser->yytos = yymsp; + yymsp->stateno = (YYACTIONTYPE)yyact; + yymsp->major = (YYCODETYPE)yygoto; + yyTraceShift(yypParser, yyact); + }else{ + assert( yyact == YY_ACCEPT_ACTION ); + yypParser->yytos -= yysize; + yy_accept(yypParser); + } +} + +/* +** The following code executes when the parse fails +*/ +#ifndef YYNOERRORRECOVERY +static void yy_parse_failed( + yyParser *yypParser /* The parser */ +){ + ParseARG_FETCH; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sFail!\n",yyTracePrompt); + } +#endif + while( yypParser->yytos>yypParser->yystack ) yy_pop_parser_stack(yypParser); + /* Here code is inserted which will be executed whenever the + ** parser fails */ +/************ Begin %parse_failure code ***************************************/ + + fprintf(stderr, "Parser failed to successfully parse.\n"); +/************ End %parse_failure code *****************************************/ + ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} +#endif /* YYNOERRORRECOVERY */ + +/* +** The following code executes when a syntax error first occurs. +*/ +static void yy_syntax_error( + yyParser *yypParser, /* The parser */ + int yymajor, /* The major type of the error token */ + ParseTOKENTYPE yyminor /* The minor type of the error token */ +){ + ParseARG_FETCH; +#define TOKEN yyminor +/************ Begin %syntax_error code ****************************************/ + +#ifndef NDEBUG + fprintf(stderr,"Parser syntax error.\n"); + int n = sizeof(yyTokenName) / sizeof(yyTokenName[0]); + for (int i = 0; i < n; ++i) { + int a = yy_find_shift_action(yypParser, (YYCODETYPE)i); + if (a < YYNSTATE + YYNRULE) { + fprintf(stderr,"expected token: %s\n", yyTokenName[i]); + } + } +#endif +/************ End %syntax_error code ******************************************/ + ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} + +/* +** The following is executed when the parser accepts +*/ +static void yy_accept( + yyParser *yypParser /* The parser */ +){ + ParseARG_FETCH; +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sAccept!\n",yyTracePrompt); + } +#endif +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + assert( yypParser->yytos==yypParser->yystack ); + /* Here code is inserted which will be executed whenever the + ** parser accepts */ +/*********** Begin %parse_accept code *****************************************/ +/*********** End %parse_accept code *******************************************/ + ParseARG_STORE; /* Suppress warning about unused %extra_argument variable */ +} + +/* The main parser program. +** The first argument is a pointer to a structure obtained from +** "ParseAlloc" which describes the current state of the parser. +** The second argument is the major token number. The third is +** the minor token. The fourth optional argument is whatever the +** user wants (and specified in the grammar) and is available for +** use by the action routines. +** +** Inputs: +**
      +**
    • A pointer to the parser (an opaque structure.) +**
    • The major token number. +**
    • The minor token number. +**
    • An option argument of a grammar-specified type. +**
    +** +** Outputs: +** None. +*/ +void Parse( + void *yyp, /* The parser */ + int yymajor, /* The major token code number */ + ParseTOKENTYPE yyminor /* The value for the token */ + ParseARG_PDECL /* Optional %extra_argument parameter */ +){ + YYMINORTYPE yyminorunion; + unsigned int yyact; /* The parser action. */ +#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) + int yyendofinput; /* True if we are at the end of input */ +#endif +#ifdef YYERRORSYMBOL + int yyerrorhit = 0; /* True if yymajor has invoked an error */ +#endif + yyParser *yypParser; /* The parser */ + + yypParser = (yyParser*)yyp; + assert( yypParser->yytos!=0 ); +#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY) + yyendofinput = (yymajor==0); +#endif + ParseARG_STORE; + +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sInput '%s'\n",yyTracePrompt,yyTokenName[yymajor]); + } +#endif + + do{ + yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor); + if( yyact <= YY_MAX_SHIFTREDUCE ){ + yy_shift(yypParser,yyact,yymajor,yyminor); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt--; +#endif + yymajor = YYNOCODE; + }else if( yyact <= YY_MAX_REDUCE ){ + yy_reduce(yypParser,yyact-YY_MIN_REDUCE); + }else{ + assert( yyact == YY_ERROR_ACTION ); + yyminorunion.yy0 = yyminor; +#ifdef YYERRORSYMBOL + int yymx; +#endif +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sSyntax Error!\n",yyTracePrompt); + } +#endif +#ifdef YYERRORSYMBOL + /* A syntax error has occurred. + ** The response to an error depends upon whether or not the + ** grammar defines an error token "ERROR". + ** + ** This is what we do if the grammar does define ERROR: + ** + ** * Call the %syntax_error function. + ** + ** * Begin popping the stack until we enter a state where + ** it is legal to shift the error symbol, then shift + ** the error symbol. + ** + ** * Set the error count to three. + ** + ** * Begin accepting and shifting new tokens. No new error + ** processing will occur until three tokens have been + ** shifted successfully. + ** + */ + if( yypParser->yyerrcnt<0 ){ + yy_syntax_error(yypParser,yymajor,yyminor); + } + yymx = yypParser->yytos->major; + if( yymx==YYERRORSYMBOL || yyerrorhit ){ +#ifndef NDEBUG + if( yyTraceFILE ){ + fprintf(yyTraceFILE,"%sDiscard input token %s\n", + yyTracePrompt,yyTokenName[yymajor]); + } +#endif + yy_destructor(yypParser, (YYCODETYPE)yymajor, &yyminorunion); + yymajor = YYNOCODE; + }else{ + while( yypParser->yytos >= yypParser->yystack + && yymx != YYERRORSYMBOL + && (yyact = yy_find_reduce_action( + yypParser->yytos->stateno, + YYERRORSYMBOL)) >= YY_MIN_REDUCE + ){ + yy_pop_parser_stack(yypParser); + } + if( yypParser->yytos < yypParser->yystack || yymajor==0 ){ + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + yy_parse_failed(yypParser); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + yymajor = YYNOCODE; + }else if( yymx!=YYERRORSYMBOL ){ + yy_shift(yypParser,yyact,YYERRORSYMBOL,yyminor); + } + } + yypParser->yyerrcnt = 3; + yyerrorhit = 1; +#elif defined(YYNOERRORRECOVERY) + /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to + ** do any kind of error recovery. Instead, simply invoke the syntax + ** error routine and continue going as if nothing had happened. + ** + ** Applications can set this macro (for example inside %include) if + ** they intend to abandon the parse upon the first syntax error seen. + */ + yy_syntax_error(yypParser,yymajor, yyminor); + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + yymajor = YYNOCODE; + +#else /* YYERRORSYMBOL is not defined */ + /* This is what we do if the grammar does not define ERROR: + ** + ** * Report an error message, and throw away the input token. + ** + ** * If the input token is $, then fail the parse. + ** + ** As before, subsequent error messages are suppressed until + ** three input tokens have been successfully shifted. + */ + if( yypParser->yyerrcnt<=0 ){ + yy_syntax_error(yypParser,yymajor, yyminor); + } + yypParser->yyerrcnt = 3; + yy_destructor(yypParser,(YYCODETYPE)yymajor,&yyminorunion); + if( yyendofinput ){ + yy_parse_failed(yypParser); +#ifndef YYNOERRORRECOVERY + yypParser->yyerrcnt = -1; +#endif + } + yymajor = YYNOCODE; +#endif + } + }while( yymajor!=YYNOCODE && yypParser->yytos>yypParser->yystack ); +#ifndef NDEBUG + if( yyTraceFILE ){ + yyStackEntry *i; + char cDiv = '['; + fprintf(yyTraceFILE,"%sReturn. Stack=",yyTracePrompt); + for(i=&yypParser->yystack[1]; i<=yypParser->yytos; i++){ + fprintf(yyTraceFILE,"%c%s", cDiv, yyTokenName[i->major]); + cDiv = ' '; + } + fprintf(yyTraceFILE,"]\n"); + } +#endif + return; +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..06471ee --- /dev/null +++ b/src/parser.h @@ -0,0 +1,22 @@ +#define LINE_CONTINUATION 1 +#define LINE_PLAIN 2 +#define LINE_INDENTED_TAB 3 +#define LINE_INDENTED_SPACE 4 +#define LINE_ATX_1 5 +#define LINE_ATX_2 6 +#define LINE_ATX_3 7 +#define LINE_ATX_4 8 +#define LINE_ATX_5 9 +#define LINE_ATX_6 10 +#define LINE_HR 11 +#define LINE_EMPTY 12 +#define LINE_BLOCKQUOTE 13 +#define LINE_LIST_BULLETED 14 +#define LINE_LIST_ENUMERATED 15 +#define LINE_TABLE 16 +#define LINE_DEF_CITATION 17 +#define LINE_DEF_FOOTNOTE 18 +#define LINE_DEF_LINK 19 +#define LINE_HTML 20 +#define LINE_FENCE_BACKTICK 21 +#define LINE_FENCE_BACKTICK_START 22 diff --git a/src/parser.out b/src/parser.out new file mode 100644 index 0000000..94a0f4d --- /dev/null +++ b/src/parser.out @@ -0,0 +1,819 @@ +State 0: + doc ::= * blocks + blocks ::= * blocks block + blocks ::= * block + block ::= * para + block ::= * indented_code + block ::= * LINE_ATX_1 + block ::= * LINE_ATX_2 + block ::= * LINE_ATX_3 + block ::= * LINE_ATX_4 + block ::= * LINE_ATX_5 + block ::= * LINE_ATX_6 + block ::= * empty + block ::= * list_bulleted + block ::= * list_enumerated + block ::= * blockquote + block ::= * table + block ::= * LINE_HR + block ::= * def_citation + block ::= * def_footnote + block ::= * def_link + block ::= * html_block + block ::= * fenced_block + para ::= * LINE_PLAIN para_lines + para ::= * LINE_PLAIN + indented_code ::= * indented_code code_line + indented_code ::= * LINE_INDENTED_TAB + indented_code ::= * LINE_INDENTED_SPACE + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + blockquote ::= * LINE_BLOCKQUOTE quote_lines + blockquote ::= * LINE_BLOCKQUOTE + list_bulleted ::= * list_bulleted item_bulleted + list_bulleted ::= * item_bulleted + item_bulleted ::= * LINE_LIST_BULLETED para_lines cont_blocks + item_bulleted ::= * LINE_LIST_BULLETED para_lines + item_bulleted ::= * LINE_LIST_BULLETED cont_blocks + item_bulleted ::= * LINE_LIST_BULLETED + list_enumerated ::= * list_enumerated item_enumerated + list_enumerated ::= * item_enumerated + item_enumerated ::= * LINE_LIST_ENUMERATED para_lines cont_blocks + item_enumerated ::= * LINE_LIST_ENUMERATED para_lines + item_enumerated ::= * LINE_LIST_ENUMERATED cont_blocks + item_enumerated ::= * LINE_LIST_ENUMERATED + table ::= * table LINE_TABLE + table ::= * LINE_TABLE + def_citation ::= * LINE_DEF_CITATION para_lines cont_blocks + def_citation ::= * LINE_DEF_CITATION para_lines + def_citation ::= * LINE_DEF_CITATION cont_blocks + def_citation ::= * LINE_DEF_CITATION + def_footnote ::= * LINE_DEF_FOOTNOTE para_lines cont_blocks + def_footnote ::= * LINE_DEF_FOOTNOTE para_lines + def_footnote ::= * LINE_DEF_FOOTNOTE cont_blocks + def_footnote ::= * LINE_DEF_FOOTNOTE + def_link ::= * LINE_DEF_LINK para_lines + def_link ::= * LINE_DEF_LINK + html_block ::= * LINE_HTML html_block_lines + html_block ::= * LINE_HTML + fenced_block ::= * LINE_FENCE_BACKTICK fenced_lines LINE_FENCE_BACKTICK + fenced_block ::= * LINE_FENCE_BACKTICK fenced_lines + fenced_block ::= * LINE_FENCE_BACKTICK_START fenced_lines LINE_FENCE_BACKTICK + fenced_block ::= * LINE_FENCE_BACKTICK_START fenced_lines + + LINE_PLAIN shift 20 + LINE_INDENTED_TAB shift-reduce 59 indented_code ::= LINE_INDENTED_TAB + LINE_INDENTED_SPACE shift-reduce 60 indented_code ::= LINE_INDENTED_SPACE + LINE_ATX_1 shift-reduce 5 block ::= LINE_ATX_1 + LINE_ATX_2 shift-reduce 6 block ::= LINE_ATX_2 + LINE_ATX_3 shift-reduce 7 block ::= LINE_ATX_3 + LINE_ATX_4 shift-reduce 8 block ::= LINE_ATX_4 + LINE_ATX_5 shift-reduce 9 block ::= LINE_ATX_5 + LINE_ATX_6 shift-reduce 10 block ::= LINE_ATX_6 + LINE_HR shift-reduce 16 block ::= LINE_HR + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_BLOCKQUOTE shift 14 + LINE_LIST_BULLETED shift 5 + LINE_LIST_ENUMERATED shift 4 + LINE_TABLE shift-reduce 74 table ::= LINE_TABLE + LINE_DEF_CITATION shift 3 + LINE_DEF_FOOTNOTE shift 2 + LINE_DEF_LINK shift 15 + LINE_HTML shift 13 + LINE_FENCE_BACKTICK shift 12 + LINE_FENCE_BACKTICK_START shift 11 + doc accept + blocks shift 1 + block shift-reduce 2 blocks ::= block + para shift-reduce 3 block ::= para + indented_code shift 10 + empty shift 37 + list_bulleted shift 35 + list_enumerated shift 33 + blockquote shift-reduce 14 block ::= blockquote + table shift 36 + def_citation shift-reduce 17 block ::= def_citation + def_footnote shift-reduce 18 block ::= def_footnote + def_link shift-reduce 19 block ::= def_link + html_block shift-reduce 20 block ::= html_block + fenced_block shift-reduce 21 block ::= fenced_block + item_bulleted shift 35 /* because item_bulleted==list_bulleted */ + item_enumerated shift 33 /* because item_enumerated==list_enumerated */ + +State 1: + (0) doc ::= blocks * + blocks ::= blocks * block + block ::= * para + block ::= * indented_code + block ::= * LINE_ATX_1 + block ::= * LINE_ATX_2 + block ::= * LINE_ATX_3 + block ::= * LINE_ATX_4 + block ::= * LINE_ATX_5 + block ::= * LINE_ATX_6 + block ::= * empty + block ::= * list_bulleted + block ::= * list_enumerated + block ::= * blockquote + block ::= * table + block ::= * LINE_HR + block ::= * def_citation + block ::= * def_footnote + block ::= * def_link + block ::= * html_block + block ::= * fenced_block + para ::= * LINE_PLAIN para_lines + para ::= * LINE_PLAIN + indented_code ::= * indented_code code_line + indented_code ::= * LINE_INDENTED_TAB + indented_code ::= * LINE_INDENTED_SPACE + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + blockquote ::= * LINE_BLOCKQUOTE quote_lines + blockquote ::= * LINE_BLOCKQUOTE + list_bulleted ::= * list_bulleted item_bulleted + list_bulleted ::= * item_bulleted + item_bulleted ::= * LINE_LIST_BULLETED para_lines cont_blocks + item_bulleted ::= * LINE_LIST_BULLETED para_lines + item_bulleted ::= * LINE_LIST_BULLETED cont_blocks + item_bulleted ::= * LINE_LIST_BULLETED + list_enumerated ::= * list_enumerated item_enumerated + list_enumerated ::= * item_enumerated + item_enumerated ::= * LINE_LIST_ENUMERATED para_lines cont_blocks + item_enumerated ::= * LINE_LIST_ENUMERATED para_lines + item_enumerated ::= * LINE_LIST_ENUMERATED cont_blocks + item_enumerated ::= * LINE_LIST_ENUMERATED + table ::= * table LINE_TABLE + table ::= * LINE_TABLE + def_citation ::= * LINE_DEF_CITATION para_lines cont_blocks + def_citation ::= * LINE_DEF_CITATION para_lines + def_citation ::= * LINE_DEF_CITATION cont_blocks + def_citation ::= * LINE_DEF_CITATION + def_footnote ::= * LINE_DEF_FOOTNOTE para_lines cont_blocks + def_footnote ::= * LINE_DEF_FOOTNOTE para_lines + def_footnote ::= * LINE_DEF_FOOTNOTE cont_blocks + def_footnote ::= * LINE_DEF_FOOTNOTE + def_link ::= * LINE_DEF_LINK para_lines + def_link ::= * LINE_DEF_LINK + html_block ::= * LINE_HTML html_block_lines + html_block ::= * LINE_HTML + fenced_block ::= * LINE_FENCE_BACKTICK fenced_lines LINE_FENCE_BACKTICK + fenced_block ::= * LINE_FENCE_BACKTICK fenced_lines + fenced_block ::= * LINE_FENCE_BACKTICK_START fenced_lines LINE_FENCE_BACKTICK + fenced_block ::= * LINE_FENCE_BACKTICK_START fenced_lines + + $ reduce 0 doc ::= blocks + LINE_PLAIN shift 20 + LINE_INDENTED_TAB shift-reduce 59 indented_code ::= LINE_INDENTED_TAB + LINE_INDENTED_SPACE shift-reduce 60 indented_code ::= LINE_INDENTED_SPACE + LINE_ATX_1 shift-reduce 5 block ::= LINE_ATX_1 + LINE_ATX_2 shift-reduce 6 block ::= LINE_ATX_2 + LINE_ATX_3 shift-reduce 7 block ::= LINE_ATX_3 + LINE_ATX_4 shift-reduce 8 block ::= LINE_ATX_4 + LINE_ATX_5 shift-reduce 9 block ::= LINE_ATX_5 + LINE_ATX_6 shift-reduce 10 block ::= LINE_ATX_6 + LINE_HR shift-reduce 16 block ::= LINE_HR + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_BLOCKQUOTE shift 14 + LINE_LIST_BULLETED shift 5 + LINE_LIST_ENUMERATED shift 4 + LINE_TABLE shift-reduce 74 table ::= LINE_TABLE + LINE_DEF_CITATION shift 3 + LINE_DEF_FOOTNOTE shift 2 + LINE_DEF_LINK shift 15 + LINE_HTML shift 13 + LINE_FENCE_BACKTICK shift 12 + LINE_FENCE_BACKTICK_START shift 11 + block shift-reduce 1 blocks ::= blocks block + para shift-reduce 3 block ::= para + indented_code shift 10 + empty shift 37 + list_bulleted shift 35 + list_enumerated shift 33 + blockquote shift-reduce 14 block ::= blockquote + table shift 36 + def_citation shift-reduce 17 block ::= def_citation + def_footnote shift-reduce 18 block ::= def_footnote + def_link shift-reduce 19 block ::= def_link + html_block shift-reduce 20 block ::= html_block + fenced_block shift-reduce 21 block ::= fenced_block + item_bulleted shift 35 /* because item_bulleted==list_bulleted */ + item_enumerated shift 33 /* because item_enumerated==list_enumerated */ + +State 2: + para_lines ::= * para_lines para_line + para_lines ::= * para_line + para_line ::= * LINE_CONTINUATION + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + cont_blocks ::= * cont_blocks cont_block + cont_blocks ::= * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + def_footnote ::= LINE_DEF_FOOTNOTE * para_lines cont_blocks + def_footnote ::= LINE_DEF_FOOTNOTE * para_lines + def_footnote ::= LINE_DEF_FOOTNOTE * cont_blocks + (76) def_footnote ::= LINE_DEF_FOOTNOTE * + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 76 ** Parsing conflict ** + empty shift 28 + para_lines shift 6 + para_line shift 6 /* because para_line==para_lines */ + cont_blocks shift 16 + cont_block shift 16 /* because cont_block==cont_blocks */ + {default} reduce 76 def_footnote ::= LINE_DEF_FOOTNOTE + +State 3: + para_lines ::= * para_lines para_line + para_lines ::= * para_line + para_line ::= * LINE_CONTINUATION + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + cont_blocks ::= * cont_blocks cont_block + cont_blocks ::= * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + def_citation ::= LINE_DEF_CITATION * para_lines cont_blocks + def_citation ::= LINE_DEF_CITATION * para_lines + def_citation ::= LINE_DEF_CITATION * cont_blocks + (75) def_citation ::= LINE_DEF_CITATION * + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 75 ** Parsing conflict ** + empty shift 28 + para_lines shift 7 + para_line shift 7 /* because para_line==para_lines */ + cont_blocks shift 18 + cont_block shift 18 /* because cont_block==cont_blocks */ + {default} reduce 75 def_citation ::= LINE_DEF_CITATION + +State 4: + para_lines ::= * para_lines para_line + para_lines ::= * para_line + para_line ::= * LINE_CONTINUATION + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + item_enumerated ::= LINE_LIST_ENUMERATED * para_lines cont_blocks + item_enumerated ::= LINE_LIST_ENUMERATED * para_lines + item_enumerated ::= LINE_LIST_ENUMERATED * cont_blocks + (37) item_enumerated ::= LINE_LIST_ENUMERATED * + cont_blocks ::= * cont_blocks cont_block + cont_blocks ::= * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 37 ** Parsing conflict ** + empty shift 28 + para_lines shift 8 + para_line shift 8 /* because para_line==para_lines */ + cont_blocks shift 21 + cont_block shift 21 /* because cont_block==cont_blocks */ + {default} reduce 37 item_enumerated ::= LINE_LIST_ENUMERATED + +State 5: + para_lines ::= * para_lines para_line + para_lines ::= * para_line + para_line ::= * LINE_CONTINUATION + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + item_bulleted ::= LINE_LIST_BULLETED * para_lines cont_blocks + item_bulleted ::= LINE_LIST_BULLETED * para_lines + item_bulleted ::= LINE_LIST_BULLETED * cont_blocks + (32) item_bulleted ::= LINE_LIST_BULLETED * + cont_blocks ::= * cont_blocks cont_block + cont_blocks ::= * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 32 ** Parsing conflict ** + empty shift 28 + para_lines shift 9 + para_line shift 9 /* because para_line==para_lines */ + cont_blocks shift 23 + cont_block shift 23 /* because cont_block==cont_blocks */ + {default} reduce 32 item_bulleted ::= LINE_LIST_BULLETED + +State 6: + para_lines ::= para_lines * para_line + para_line ::= * LINE_CONTINUATION + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + cont_blocks ::= * cont_blocks cont_block + cont_blocks ::= * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + def_footnote ::= LINE_DEF_FOOTNOTE para_lines * cont_blocks + (46) def_footnote ::= LINE_DEF_FOOTNOTE para_lines * + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 46 ** Parsing conflict ** + empty shift 28 + para_line shift-reduce 23 para_lines ::= para_lines para_line + cont_blocks shift 17 + cont_block shift 17 /* because cont_block==cont_blocks */ + {default} reduce 46 def_footnote ::= LINE_DEF_FOOTNOTE para_lines + +State 7: + para_lines ::= para_lines * para_line + para_line ::= * LINE_CONTINUATION + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + cont_blocks ::= * cont_blocks cont_block + cont_blocks ::= * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + def_citation ::= LINE_DEF_CITATION para_lines * cont_blocks + (43) def_citation ::= LINE_DEF_CITATION para_lines * + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 43 ** Parsing conflict ** + empty shift 28 + para_line shift-reduce 23 para_lines ::= para_lines para_line + cont_blocks shift 19 + cont_block shift 19 /* because cont_block==cont_blocks */ + {default} reduce 43 def_citation ::= LINE_DEF_CITATION para_lines + +State 8: + para_lines ::= para_lines * para_line + para_line ::= * LINE_CONTINUATION + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + item_enumerated ::= LINE_LIST_ENUMERATED para_lines * cont_blocks + (35) item_enumerated ::= LINE_LIST_ENUMERATED para_lines * + cont_blocks ::= * cont_blocks cont_block + cont_blocks ::= * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 35 ** Parsing conflict ** + empty shift 28 + para_line shift-reduce 23 para_lines ::= para_lines para_line + cont_blocks shift 22 + cont_block shift 22 /* because cont_block==cont_blocks */ + {default} reduce 35 item_enumerated ::= LINE_LIST_ENUMERATED para_lines + +State 9: + para_lines ::= para_lines * para_line + para_line ::= * LINE_CONTINUATION + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + item_bulleted ::= LINE_LIST_BULLETED para_lines * cont_blocks + (30) item_bulleted ::= LINE_LIST_BULLETED para_lines * + cont_blocks ::= * cont_blocks cont_block + cont_blocks ::= * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 30 ** Parsing conflict ** + empty shift 28 + para_line shift-reduce 23 para_lines ::= para_lines para_line + cont_blocks shift 24 + cont_block shift 24 /* because cont_block==cont_blocks */ + {default} reduce 30 item_bulleted ::= LINE_LIST_BULLETED para_lines + +State 10: + (4) block ::= indented_code * + indented_code ::= indented_code * code_line + code_line ::= * indented_line + code_line ::= * LINE_EMPTY + indented_line ::= * LINE_INDENTED_TAB + indented_line ::= * LINE_INDENTED_SPACE + + LINE_INDENTED_TAB shift-reduce 63 indented_line ::= LINE_INDENTED_TAB + LINE_INDENTED_TAB reduce 4 ** Parsing conflict ** + LINE_INDENTED_SPACE shift-reduce 64 indented_line ::= LINE_INDENTED_SPACE + LINE_INDENTED_SPACE reduce 4 ** Parsing conflict ** + LINE_EMPTY shift-reduce 62 code_line ::= LINE_EMPTY + LINE_EMPTY reduce 4 ** Parsing conflict ** + code_line shift-reduce 24 indented_code ::= indented_code code_line + indented_line shift-reduce 24 indented_code ::= indented_code code_line /* because indented_line==code_line */ + {default} reduce 4 block ::= indented_code + +State 11: + fenced_block ::= LINE_FENCE_BACKTICK_START * fenced_lines LINE_FENCE_BACKTICK + fenced_block ::= LINE_FENCE_BACKTICK_START * fenced_lines + fenced_lines ::= * fenced_lines fenced_line + fenced_lines ::= * fenced_line + fenced_line ::= * LINE_CONTINUATION + fenced_line ::= * LINE_EMPTY + + LINE_CONTINUATION shift-reduce 83 fenced_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 84 fenced_line ::= LINE_EMPTY + fenced_lines shift 26 + fenced_line shift 26 /* because fenced_line==fenced_lines */ + +State 12: + fenced_block ::= LINE_FENCE_BACKTICK * fenced_lines LINE_FENCE_BACKTICK + fenced_block ::= LINE_FENCE_BACKTICK * fenced_lines + fenced_lines ::= * fenced_lines fenced_line + fenced_lines ::= * fenced_line + fenced_line ::= * LINE_CONTINUATION + fenced_line ::= * LINE_EMPTY + + LINE_CONTINUATION shift-reduce 83 fenced_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 84 fenced_line ::= LINE_EMPTY + fenced_lines shift 27 + fenced_line shift 27 /* because fenced_line==fenced_lines */ + +State 13: + html_block ::= LINE_HTML * html_block_lines + (78) html_block ::= LINE_HTML * + html_block_lines ::= * html_block_lines html_block_line + html_block_lines ::= * html_block_line + html_block_line ::= * LINE_CONTINUATION + html_block_line ::= * LINE_HTML + + LINE_CONTINUATION shift-reduce 80 html_block_line ::= LINE_CONTINUATION + LINE_HTML shift-reduce 81 html_block_line ::= LINE_HTML + LINE_HTML reduce 78 ** Parsing conflict ** + html_block_lines shift 29 + html_block_line shift 29 /* because html_block_line==html_block_lines */ + {default} reduce 78 html_block ::= LINE_HTML + +State 14: + blockquote ::= LINE_BLOCKQUOTE * quote_lines + (66) blockquote ::= LINE_BLOCKQUOTE * + quote_lines ::= * quote_lines quote_line + quote_lines ::= * quote_line + quote_line ::= * LINE_BLOCKQUOTE + quote_line ::= * LINE_CONTINUATION + + LINE_CONTINUATION shift-reduce 69 quote_line ::= LINE_CONTINUATION + LINE_BLOCKQUOTE shift-reduce 68 quote_line ::= LINE_BLOCKQUOTE + LINE_BLOCKQUOTE reduce 66 ** Parsing conflict ** + quote_lines shift 30 + quote_line shift 30 /* because quote_line==quote_lines */ + {default} reduce 66 blockquote ::= LINE_BLOCKQUOTE + +State 15: + para_lines ::= * para_lines para_line + para_lines ::= * para_line + para_line ::= * LINE_CONTINUATION + def_link ::= LINE_DEF_LINK * para_lines + (77) def_link ::= LINE_DEF_LINK * + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + para_lines shift 31 + para_line shift 31 /* because para_line==para_lines */ + {default} reduce 77 def_link ::= LINE_DEF_LINK + +State 16: + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + cont_blocks ::= cont_blocks * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + (47) def_footnote ::= LINE_DEF_FOOTNOTE cont_blocks * + + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 47 ** Parsing conflict ** + empty shift 28 + cont_block shift-reduce 38 cont_blocks ::= cont_blocks cont_block + {default} reduce 47 def_footnote ::= LINE_DEF_FOOTNOTE cont_blocks + +State 17: + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + cont_blocks ::= cont_blocks * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + (45) def_footnote ::= LINE_DEF_FOOTNOTE para_lines cont_blocks * + + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 45 ** Parsing conflict ** + empty shift 28 + cont_block shift-reduce 38 cont_blocks ::= cont_blocks cont_block + {default} reduce 45 def_footnote ::= LINE_DEF_FOOTNOTE para_lines cont_blocks + +State 18: + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + cont_blocks ::= cont_blocks * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + (44) def_citation ::= LINE_DEF_CITATION cont_blocks * + + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 44 ** Parsing conflict ** + empty shift 28 + cont_block shift-reduce 38 cont_blocks ::= cont_blocks cont_block + {default} reduce 44 def_citation ::= LINE_DEF_CITATION cont_blocks + +State 19: + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + cont_blocks ::= cont_blocks * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + (42) def_citation ::= LINE_DEF_CITATION para_lines cont_blocks * + + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 42 ** Parsing conflict ** + empty shift 28 + cont_block shift-reduce 38 cont_blocks ::= cont_blocks cont_block + {default} reduce 42 def_citation ::= LINE_DEF_CITATION para_lines cont_blocks + +State 20: + para ::= LINE_PLAIN * para_lines + (56) para ::= LINE_PLAIN * + para_lines ::= * para_lines para_line + para_lines ::= * para_line + para_line ::= * LINE_CONTINUATION + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + para_lines shift 32 + para_line shift 32 /* because para_line==para_lines */ + {default} reduce 56 para ::= LINE_PLAIN + +State 21: + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + (36) item_enumerated ::= LINE_LIST_ENUMERATED cont_blocks * + cont_blocks ::= cont_blocks * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 36 ** Parsing conflict ** + empty shift 28 + cont_block shift-reduce 38 cont_blocks ::= cont_blocks cont_block + {default} reduce 36 item_enumerated ::= LINE_LIST_ENUMERATED cont_blocks + +State 22: + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + (34) item_enumerated ::= LINE_LIST_ENUMERATED para_lines cont_blocks * + cont_blocks ::= cont_blocks * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 34 ** Parsing conflict ** + empty shift 28 + cont_block shift-reduce 38 cont_blocks ::= cont_blocks cont_block + {default} reduce 34 item_enumerated ::= LINE_LIST_ENUMERATED para_lines cont_blocks + +State 23: + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + (31) item_bulleted ::= LINE_LIST_BULLETED cont_blocks * + cont_blocks ::= cont_blocks * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 31 ** Parsing conflict ** + empty shift 28 + cont_block shift-reduce 38 cont_blocks ::= cont_blocks cont_block + {default} reduce 31 item_bulleted ::= LINE_LIST_BULLETED cont_blocks + +State 24: + empty ::= * empty LINE_EMPTY + empty ::= * LINE_EMPTY + (29) item_bulleted ::= LINE_LIST_BULLETED para_lines cont_blocks * + cont_blocks ::= cont_blocks * cont_block + cont_block ::= * empty indented_line para_lines + cont_block ::= * empty indented_line + cont_block ::= * empty + + LINE_EMPTY shift-reduce 65 empty ::= LINE_EMPTY + LINE_EMPTY reduce 29 ** Parsing conflict ** + empty shift 28 + cont_block shift-reduce 38 cont_blocks ::= cont_blocks cont_block + {default} reduce 29 item_bulleted ::= LINE_LIST_BULLETED para_lines cont_blocks + +State 25: + para_lines ::= * para_lines para_line + para_lines ::= * para_line + para_line ::= * LINE_CONTINUATION + cont_block ::= empty indented_line * para_lines + (40) cont_block ::= empty indented_line * + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + para_lines shift 34 + para_line shift 34 /* because para_line==para_lines */ + {default} reduce 40 cont_block ::= empty indented_line + +State 26: + fenced_block ::= LINE_FENCE_BACKTICK_START fenced_lines * LINE_FENCE_BACKTICK + (54) fenced_block ::= LINE_FENCE_BACKTICK_START fenced_lines * + fenced_lines ::= fenced_lines * fenced_line + fenced_line ::= * LINE_CONTINUATION + fenced_line ::= * LINE_EMPTY + + LINE_CONTINUATION shift-reduce 83 fenced_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 84 fenced_line ::= LINE_EMPTY + LINE_EMPTY reduce 54 ** Parsing conflict ** + LINE_FENCE_BACKTICK shift-reduce 53 fenced_block ::= LINE_FENCE_BACKTICK_START fenced_lines LINE_FENCE_BACKTICK + LINE_FENCE_BACKTICK reduce 54 ** Parsing conflict ** + fenced_line shift-reduce 55 fenced_lines ::= fenced_lines fenced_line + {default} reduce 54 fenced_block ::= LINE_FENCE_BACKTICK_START fenced_lines + +State 27: + fenced_block ::= LINE_FENCE_BACKTICK fenced_lines * LINE_FENCE_BACKTICK + (52) fenced_block ::= LINE_FENCE_BACKTICK fenced_lines * + fenced_lines ::= fenced_lines * fenced_line + fenced_line ::= * LINE_CONTINUATION + fenced_line ::= * LINE_EMPTY + + LINE_CONTINUATION shift-reduce 83 fenced_line ::= LINE_CONTINUATION + LINE_EMPTY shift-reduce 84 fenced_line ::= LINE_EMPTY + LINE_EMPTY reduce 52 ** Parsing conflict ** + LINE_FENCE_BACKTICK shift-reduce 51 fenced_block ::= LINE_FENCE_BACKTICK fenced_lines LINE_FENCE_BACKTICK + LINE_FENCE_BACKTICK reduce 52 ** Parsing conflict ** + fenced_line shift-reduce 55 fenced_lines ::= fenced_lines fenced_line + {default} reduce 52 fenced_block ::= LINE_FENCE_BACKTICK fenced_lines + +State 28: + indented_line ::= * LINE_INDENTED_TAB + indented_line ::= * LINE_INDENTED_SPACE + empty ::= empty * LINE_EMPTY + cont_block ::= empty * indented_line para_lines + cont_block ::= empty * indented_line + (73) cont_block ::= empty * + + LINE_INDENTED_TAB shift-reduce 63 indented_line ::= LINE_INDENTED_TAB + LINE_INDENTED_TAB reduce 73 ** Parsing conflict ** + LINE_INDENTED_SPACE shift-reduce 64 indented_line ::= LINE_INDENTED_SPACE + LINE_INDENTED_SPACE reduce 73 ** Parsing conflict ** + LINE_EMPTY shift-reduce 25 empty ::= empty LINE_EMPTY + LINE_EMPTY reduce 73 ** Parsing conflict ** + indented_line shift 25 + {default} reduce 73 cont_block ::= empty + +State 29: + (49) html_block ::= LINE_HTML html_block_lines * + html_block_lines ::= html_block_lines * html_block_line + html_block_line ::= * LINE_CONTINUATION + html_block_line ::= * LINE_HTML + + LINE_CONTINUATION shift-reduce 80 html_block_line ::= LINE_CONTINUATION + LINE_HTML shift-reduce 81 html_block_line ::= LINE_HTML + LINE_HTML reduce 49 ** Parsing conflict ** + html_block_line shift-reduce 50 html_block_lines ::= html_block_lines html_block_line + {default} reduce 49 html_block ::= LINE_HTML html_block_lines + +State 30: + (26) blockquote ::= LINE_BLOCKQUOTE quote_lines * + quote_lines ::= quote_lines * quote_line + quote_line ::= * LINE_BLOCKQUOTE + quote_line ::= * LINE_CONTINUATION + + LINE_CONTINUATION shift-reduce 69 quote_line ::= LINE_CONTINUATION + LINE_BLOCKQUOTE shift-reduce 68 quote_line ::= LINE_BLOCKQUOTE + LINE_BLOCKQUOTE reduce 26 ** Parsing conflict ** + quote_line shift-reduce 27 quote_lines ::= quote_lines quote_line + {default} reduce 26 blockquote ::= LINE_BLOCKQUOTE quote_lines + +State 31: + para_lines ::= para_lines * para_line + para_line ::= * LINE_CONTINUATION + (48) def_link ::= LINE_DEF_LINK para_lines * + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + para_line shift-reduce 23 para_lines ::= para_lines para_line + {default} reduce 48 def_link ::= LINE_DEF_LINK para_lines + +State 32: + (22) para ::= LINE_PLAIN para_lines * + para_lines ::= para_lines * para_line + para_line ::= * LINE_CONTINUATION + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + para_line shift-reduce 23 para_lines ::= para_lines para_line + {default} reduce 22 para ::= LINE_PLAIN para_lines + +State 33: + (13) block ::= list_enumerated * + list_enumerated ::= list_enumerated * item_enumerated + item_enumerated ::= * LINE_LIST_ENUMERATED para_lines cont_blocks + item_enumerated ::= * LINE_LIST_ENUMERATED para_lines + item_enumerated ::= * LINE_LIST_ENUMERATED cont_blocks + item_enumerated ::= * LINE_LIST_ENUMERATED + + LINE_LIST_ENUMERATED shift 4 + LINE_LIST_ENUMERATED reduce 13 ** Parsing conflict ** + item_enumerated shift-reduce 33 list_enumerated ::= list_enumerated item_enumerated + {default} reduce 13 block ::= list_enumerated + +State 34: + para_lines ::= para_lines * para_line + para_line ::= * LINE_CONTINUATION + (39) cont_block ::= empty indented_line para_lines * + + LINE_CONTINUATION shift-reduce 58 para_line ::= LINE_CONTINUATION + para_line shift-reduce 23 para_lines ::= para_lines para_line + {default} reduce 39 cont_block ::= empty indented_line para_lines + +State 35: + (12) block ::= list_bulleted * + list_bulleted ::= list_bulleted * item_bulleted + item_bulleted ::= * LINE_LIST_BULLETED para_lines cont_blocks + item_bulleted ::= * LINE_LIST_BULLETED para_lines + item_bulleted ::= * LINE_LIST_BULLETED cont_blocks + item_bulleted ::= * LINE_LIST_BULLETED + + LINE_LIST_BULLETED shift 5 + LINE_LIST_BULLETED reduce 12 ** Parsing conflict ** + item_bulleted shift-reduce 28 list_bulleted ::= list_bulleted item_bulleted + {default} reduce 12 block ::= list_bulleted + +State 36: + (15) block ::= table * + table ::= table * LINE_TABLE + + LINE_TABLE shift-reduce 41 table ::= table LINE_TABLE + LINE_TABLE reduce 15 ** Parsing conflict ** + {default} reduce 15 block ::= table + +State 37: + (11) block ::= empty * + empty ::= empty * LINE_EMPTY + + LINE_EMPTY shift-reduce 25 empty ::= empty LINE_EMPTY + LINE_EMPTY reduce 11 ** Parsing conflict ** + {default} reduce 11 block ::= empty + +---------------------------------------------------- +Symbols: + 0: $: + 1: LINE_CONTINUATION + 2: LINE_PLAIN + 3: LINE_INDENTED_TAB + 4: LINE_INDENTED_SPACE + 5: LINE_ATX_1 + 6: LINE_ATX_2 + 7: LINE_ATX_3 + 8: LINE_ATX_4 + 9: LINE_ATX_5 + 10: LINE_ATX_6 + 11: LINE_HR + 12: LINE_EMPTY + 13: LINE_BLOCKQUOTE + 14: LINE_LIST_BULLETED + 15: LINE_LIST_ENUMERATED + 16: LINE_TABLE + 17: LINE_DEF_CITATION + 18: LINE_DEF_FOOTNOTE + 19: LINE_DEF_LINK + 20: LINE_HTML + 21: LINE_FENCE_BACKTICK + 22: LINE_FENCE_BACKTICK_START + 23: error: + 24: doc: LINE_PLAIN LINE_INDENTED_TAB LINE_INDENTED_SPACE LINE_ATX_1 LINE_ATX_2 LINE_ATX_3 LINE_ATX_4 LINE_ATX_5 LINE_ATX_6 LINE_HR LINE_EMPTY LINE_BLOCKQUOTE LINE_LIST_BULLETED LINE_LIST_ENUMERATED LINE_TABLE LINE_DEF_CITATION LINE_DEF_FOOTNOTE LINE_DEF_LINK LINE_HTML LINE_FENCE_BACKTICK LINE_FENCE_BACKTICK_START + 25: blocks: LINE_PLAIN LINE_INDENTED_TAB LINE_INDENTED_SPACE LINE_ATX_1 LINE_ATX_2 LINE_ATX_3 LINE_ATX_4 LINE_ATX_5 LINE_ATX_6 LINE_HR LINE_EMPTY LINE_BLOCKQUOTE LINE_LIST_BULLETED LINE_LIST_ENUMERATED LINE_TABLE LINE_DEF_CITATION LINE_DEF_FOOTNOTE LINE_DEF_LINK LINE_HTML LINE_FENCE_BACKTICK LINE_FENCE_BACKTICK_START + 26: block: LINE_PLAIN LINE_INDENTED_TAB LINE_INDENTED_SPACE LINE_ATX_1 LINE_ATX_2 LINE_ATX_3 LINE_ATX_4 LINE_ATX_5 LINE_ATX_6 LINE_HR LINE_EMPTY LINE_BLOCKQUOTE LINE_LIST_BULLETED LINE_LIST_ENUMERATED LINE_TABLE LINE_DEF_CITATION LINE_DEF_FOOTNOTE LINE_DEF_LINK LINE_HTML LINE_FENCE_BACKTICK LINE_FENCE_BACKTICK_START + 27: para: LINE_PLAIN + 28: indented_code: LINE_INDENTED_TAB LINE_INDENTED_SPACE + 29: empty: LINE_EMPTY + 30: list_bulleted: LINE_LIST_BULLETED + 31: list_enumerated: LINE_LIST_ENUMERATED + 32: blockquote: LINE_BLOCKQUOTE + 33: table: LINE_TABLE + 34: def_citation: LINE_DEF_CITATION + 35: def_footnote: LINE_DEF_FOOTNOTE + 36: def_link: LINE_DEF_LINK + 37: html_block: LINE_HTML + 38: fenced_block: LINE_FENCE_BACKTICK LINE_FENCE_BACKTICK_START + 39: para_lines: LINE_CONTINUATION + 40: para_line: LINE_CONTINUATION + 41: code_line: LINE_INDENTED_TAB LINE_INDENTED_SPACE LINE_EMPTY + 42: indented_line: LINE_INDENTED_TAB LINE_INDENTED_SPACE + 43: quote_lines: LINE_CONTINUATION LINE_BLOCKQUOTE + 44: quote_line: LINE_CONTINUATION LINE_BLOCKQUOTE + 45: item_bulleted: LINE_LIST_BULLETED + 46: cont_blocks: LINE_EMPTY + 47: item_enumerated: LINE_LIST_ENUMERATED + 48: cont_block: LINE_EMPTY + 49: html_block_lines: LINE_CONTINUATION LINE_HTML + 50: html_block_line: LINE_CONTINUATION LINE_HTML + 51: fenced_lines: LINE_CONTINUATION LINE_EMPTY + 52: fenced_line: LINE_CONTINUATION LINE_EMPTY diff --git a/src/parser.y b/src/parser.y new file mode 100644 index 0000000..d6923a4 --- /dev/null +++ b/src/parser.y @@ -0,0 +1,237 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file parser.y + + @brief Definition of the parser grammar, processed with lemon to create a parser. + + http://www.hwaci.com/sw/lemon/ + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +// +// Language grammar here +// + +%token_type { token * } + +%extra_argument { mmd_engine * engine } + +%fallback LINE_CONTINUATION LINE_PLAIN LINE_INDENTED_TAB LINE_INDENTED_SPACE. + + +doc ::= blocks(B). { engine->root = B; } + +blocks(A) ::= blocks(B) block(C). + { + strip_line_tokens_from_block(C); + if (B == NULL) { B = C; C = NULL;} + A = B; + token_chain_append(A, C); + #ifndef NDEBUG + fprintf(stderr, "Next block %d\n", A->tail->type); + #endif + } +blocks(A) ::= block(B). + { + strip_line_tokens_from_block(B); + #ifndef NDEBUG + fprintf(stderr, "First block %d\n", B->type); + #endif + A = B; + } + + +block(A) ::= para(B). { A = token_new_parent(B, BLOCK_PARA); is_para_html(engine, A); } +block(A) ::= indented_code(B). { A = token_new_parent(B, BLOCK_CODE_INDENTED); } +block(A) ::= LINE_ATX_1(B). { A = token_new_parent(B, BLOCK_H1); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, A); } +block(A) ::= LINE_ATX_2(B). { A = token_new_parent(B, BLOCK_H2); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, A); } +block(A) ::= LINE_ATX_3(B). { A = token_new_parent(B, BLOCK_H3); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, A); } +block(A) ::= LINE_ATX_4(B). { A = token_new_parent(B, BLOCK_H4); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, A); } +block(A) ::= LINE_ATX_5(B). { A = token_new_parent(B, BLOCK_H5); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, A); } +block(A) ::= LINE_ATX_6(B). { A = token_new_parent(B, BLOCK_H6); if (!(engine->extensions & EXT_NO_LABELS)) stack_push(engine->header_stack, A); } +block(A) ::= empty(B). { A = token_new_parent(B, BLOCK_EMPTY); } +block(A) ::= list_bulleted(B). { A = token_new_parent(B, BLOCK_LIST_BULLETED); is_list_loose(A); } +block(A) ::= list_enumerated(B). { A = token_new_parent(B, BLOCK_LIST_ENUMERATED); is_list_loose(A); } +block(A) ::= blockquote(B). { A = token_new_parent(B, BLOCK_BLOCKQUOTE); recursive_parse_blockquote(engine, A); } +block(A) ::= table(B). { A = token_new_parent(B, BLOCK_TABLE); } +block(A) ::= LINE_HR(B). { A = token_new_parent(B, BLOCK_HR); } +block(A) ::= def_citation(B). { A = token_new_parent(B, BLOCK_DEF_CITATION); stack_push(engine->definition_stack, A); } +block(A) ::= def_footnote(B). { A = token_new_parent(B, BLOCK_DEF_FOOTNOTE); stack_push(engine->definition_stack, A); } +block(A) ::= def_link(B). { A = token_new_parent(B, BLOCK_DEF_LINK); stack_push(engine->definition_stack, A); } +block(A) ::= html_block(B). { A = token_new_parent(B, BLOCK_HTML); } +block(A) ::= fenced_block(B). { A = token_new_parent(B, BLOCK_CODE_FENCED); B->child->type = CODE_FENCE; } + + +para(A) ::= LINE_PLAIN(B) para_lines(C). { A = B; token_chain_append(B, C); } +para ::= LINE_PLAIN. + +para_lines(A) ::= para_lines(B) para_line(C). { A = B; token_chain_append(B, C); } +para_lines ::= para_line. + +para_line ::= LINE_CONTINUATION. + +indented_code(A) ::= indented_code(B) code_line(C). { A = B; token_chain_append(B, C); } +indented_code ::= LINE_INDENTED_TAB. +indented_code ::= LINE_INDENTED_SPACE. + +code_line ::= indented_line. +code_line ::= LINE_EMPTY. + +indented_line ::= LINE_INDENTED_TAB. +indented_line ::= LINE_INDENTED_SPACE. + +empty(A) ::= empty(B) LINE_EMPTY(C). { A = B; token_chain_append(B, C); } +empty ::= LINE_EMPTY. + +blockquote(A) ::= LINE_BLOCKQUOTE(B) quote_lines(C). { A = B; token_chain_append(B, C); } +blockquote ::= LINE_BLOCKQUOTE. + +quote_lines(A) ::= quote_lines(B) quote_line(C). { A = B; token_chain_append(B, C); } +quote_lines ::= quote_line. + +quote_line ::= LINE_BLOCKQUOTE. +quote_line ::= LINE_CONTINUATION. + +list_bulleted(A) ::= list_bulleted(B) item_bulleted(C). { A = B; token_chain_append(B, C); } +list_bulleted ::= item_bulleted. + +item_bulleted(A) ::= LINE_LIST_BULLETED(B) para_lines(C) cont_blocks(D). { token_chain_append(B, C); token_chain_append(B, D); A = token_new_parent(B, BLOCK_LIST_ITEM); recursive_parse_list_item(engine, A); } +item_bulleted(A) ::= LINE_LIST_BULLETED(B) para_lines(C). { token_chain_append(B, C); A = token_new_parent(B, BLOCK_LIST_ITEM_TIGHT); recursive_parse_list_item(engine, A); } +item_bulleted(A) ::= LINE_LIST_BULLETED(B) cont_blocks(C). { token_chain_append(B, C); A = token_new_parent(B, BLOCK_LIST_ITEM); if (C) {recursive_parse_list_item(engine, A);} } +item_bulleted(A) ::= LINE_LIST_BULLETED(B). { A = token_new_parent(B, BLOCK_LIST_ITEM_TIGHT); } + +list_enumerated(A) ::= list_enumerated(B) item_enumerated(C). { A = B; token_chain_append(B, C); } +list_enumerated ::= item_enumerated. + +item_enumerated(A) ::= LINE_LIST_ENUMERATED(B) para_lines(C) cont_blocks(D). { token_chain_append(B, C); token_chain_append(B, D); A = token_new_parent(B, BLOCK_LIST_ITEM); recursive_parse_list_item(engine, A); } +item_enumerated(A) ::= LINE_LIST_ENUMERATED(B) para_lines(C). { token_chain_append(B, C); A = token_new_parent(B, BLOCK_LIST_ITEM_TIGHT); recursive_parse_list_item(engine, A); } +item_enumerated(A) ::= LINE_LIST_ENUMERATED(B) cont_blocks(C). { token_chain_append(B, C); A = token_new_parent(B, BLOCK_LIST_ITEM); recursive_parse_list_item(engine, A); } +item_enumerated(A) ::= LINE_LIST_ENUMERATED(B). { A = token_new_parent(B, BLOCK_LIST_ITEM_TIGHT); } + +cont_blocks(A) ::= cont_blocks(B) cont_block(C). { A = B; token_chain_append(B, C); } +cont_blocks ::= cont_block. + +cont_block(A) ::= empty(B) indented_line(C) para_lines(D).{ A = B; token_chain_append(B, C); token_chain_append(B, D); C->type = LINE_CONTINUATION; } +cont_block(A) ::= empty(B) indented_line(C). { A = B; token_chain_append(B, C); C->type = LINE_CONTINUATION; } +cont_block ::= empty. + +table(A) ::= table(B) LINE_TABLE(C). { A = B; token_chain_append(B, C); } +table ::= LINE_TABLE. + +def_citation(A) ::= LINE_DEF_CITATION(B) para_lines(C) cont_blocks(D). { A = B; token_chain_append(B, C); token_chain_append(B, D); } +def_citation(A) ::= LINE_DEF_CITATION(B) para_lines(C). { A = B; token_chain_append(B, C); } +def_citation(A) ::= LINE_DEF_CITATION(B) cont_blocks(C). { A = B; token_chain_append(B, C); } +def_citation ::= LINE_DEF_CITATION. + +def_footnote(A) ::= LINE_DEF_FOOTNOTE(B) para_lines(C) cont_blocks(D). { A = B; token_chain_append(B, C); token_chain_append(B, D); } +def_footnote(A) ::= LINE_DEF_FOOTNOTE(B) para_lines(C). { A = B; token_chain_append(B, C); } +def_footnote(A) ::= LINE_DEF_FOOTNOTE(B) cont_blocks(C). { A = B; token_chain_append(B, C); } +def_footnote ::= LINE_DEF_FOOTNOTE. + +def_link(A) ::= LINE_DEF_LINK(B) para_lines(C). { A = B; token_chain_append(B, C); } +def_link ::= LINE_DEF_LINK. + +html_block(A) ::= LINE_HTML(B) html_block_lines(C). { A = B; token_chain_append(B, C); } +html_block ::= LINE_HTML. + + +html_block_lines(A) ::= html_block_lines(B) html_block_line(C). { A = B; token_chain_append(B, C); } +html_block_lines ::= html_block_line. + +html_block_line ::= LINE_CONTINUATION. +html_block_line ::= LINE_HTML. + +fenced_block(A) ::= LINE_FENCE_BACKTICK(B) fenced_lines(C) LINE_FENCE_BACKTICK(D). { A = B; token_chain_append(B, C); token_chain_append(B, D); D->child->type = CODE_FENCE; } +fenced_block(A) ::= LINE_FENCE_BACKTICK(B) fenced_lines(C). { A = B; token_chain_append(B, C); } +fenced_block(A) ::= LINE_FENCE_BACKTICK_START(B) fenced_lines(C) LINE_FENCE_BACKTICK(D). { A = B; token_chain_append(B, C); token_chain_append(B, D); D->child->type = CODE_FENCE; } +fenced_block(A) ::= LINE_FENCE_BACKTICK_START(B) fenced_lines(C). { A = B; token_chain_append(B, C); } + + +fenced_lines(A) ::= fenced_lines(B) fenced_line(C). { A = B; token_chain_append(B, C); } +fenced_lines ::= fenced_line. + +fenced_line ::= LINE_CONTINUATION. +fenced_line ::= LINE_EMPTY. + +// +// Additional Configuration +// + +%include { + #include + #include + #include + + #include "libMultiMarkdown.h" + #include "mmd.h" + #include "parser.h" + #include "token.h" +} + + +// Improved error messages for debugging: +// http://stackoverflow.com/questions/11705737/expected-token-using-lemon-parser-generator + +%syntax_error { +#ifndef NDEBUG + fprintf(stderr,"Parser syntax error.\n"); + int n = sizeof(yyTokenName) / sizeof(yyTokenName[0]); + for (int i = 0; i < n; ++i) { + int a = yy_find_shift_action(yypParser, (YYCODETYPE)i); + if (a < YYNSTATE + YYNRULE) { + fprintf(stderr,"expected token: %s\n", yyTokenName[i]); + } + } +#endif +} + +%parse_failure { + fprintf(stderr, "Parser failed to successfully parse.\n"); +} + diff --git a/src/rng.c b/src/rng.c new file mode 100644 index 0000000..07a8701 --- /dev/null +++ b/src/rng.c @@ -0,0 +1,117 @@ +/* This program by D E Knuth is in the public domain and freely copyable + * AS LONG AS YOU MAKE ABSOLUTELY NO CHANGES! + * It is explained in Seminumerical Algorithms, 3rd edition, Section 3.6 + * (or in the errata to the 2nd edition --- see + * http://www-cs-faculty.stanford.edu/~knuth/taocp.html + * in the changes to Volume 2 on pages 171 and following). */ + +/* N.B. The MODIFICATIONS introduced in the 9th printing (2002) are + included here; there's no backwards compatibility with the original. */ + +/* This version also adopts Brendan McKay's suggestion to + accommodate naive users who forget to call ran_start(seed). */ + +/* If you find any bugs, please report them immediately to + * taocp@cs.stanford.edu + * (and you will be rewarded if the bug is genuine). Thanks! */ + +/************ see the book for explanations and caveats! *******************/ +/************ in particular, you need two's complement arithmetic **********/ + +#define KK 100 /* the long lag */ +#define LL 37 /* the short lag */ +#define MM (1L<<30) /* the modulus */ +#define mod_diff(x,y) (((x)-(y))&(MM-1)) /* subtraction mod MM */ + +long ran_x[KK]; /* the generator state */ + +#ifdef __STDC__ +void ran_array(long aa[],int n) +#else +void ran_array(aa,n) /* put n new random numbers in aa */ + long *aa; /* destination */ + int n; /* array length (must be at least KK) */ +#endif +{ + register int i,j; + for (j=0;j=MM) ss-=MM-2; /* cyclic shift 29 bits */ + } + x[1]++; /* make x[1] (and only x[1]) odd */ + for (ss=seed&(MM-1),t=TT-1; t; ) { + for (j=KK-1;j>0;j--) x[j+j]=x[j], x[j+j-1]=0; /* "square" */ + for (j=KK+KK-2;j>=KK;j--) + x[j-(KK-LL)]=mod_diff(x[j-(KK-LL)],x[j]), + x[j-KK]=mod_diff(x[j-KK],x[j]); + if (is_odd(ss)) { /* "multiply by z" */ + for (j=KK;j>0;j--) x[j]=x[j-1]; + x[0]=x[KK]; /* shift the buffer cyclically */ + x[LL]=mod_diff(x[LL],x[KK]); + } + if (ss) ss>>=1; else t--; + } + for (j=0;j=0? *ran_arr_ptr++: ran_arr_cycle()) +long ran_arr_cycle() +{ + if (ran_arr_ptr==&ran_arr_dummy) + ran_start(314159L); /* the user forgot to initialize */ + ran_array(ran_arr_buf,QUALITY); + ran_arr_buf[KK]=-1; + ran_arr_ptr=ran_arr_buf+1; + return ran_arr_buf[0]; +} + +/* Tweaked to include as a library - Fletcher T. Penney */ +/*#include +int main() +{ + register int m; long a[2009]; + ran_start(310952L); + for (m=0;m<=2009;m++) ran_array(a,1009); + printf("%ld\n", a[0]); *//* 995235265 */ +/* ran_start(310952L); + for (m=0;m<=1009;m++) ran_array(a,2009); + printf("%ld\n", a[0]); *//* 995235265 */ +/* printf("%ld\n",ran_arr_next()); + return 0; +} */ + +long ran_num_next() +{ + return ran_arr_next(); +} + diff --git a/src/scanners.c b/src/scanners.c new file mode 100644 index 0000000..3caba93 --- /dev/null +++ b/src/scanners.c @@ -0,0 +1,8515 @@ +/* Generated by re2c 0.14.3 on Wed Jan 18 22:23:16 2017 */ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file scanners.c + + @brief After text has been tokenized, there are still some constructs that are best + interpreted using regular expressions. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include + +#include "scanners.h" + + + + +size_t scan_spnl(const char * c) { + const char * start = c; + + +{ + char yych; + + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy3; + case '\n': goto yy4; + case '\r': goto yy6; + default: goto yy7; + } +yy2: + { return (size_t)( c - start ); } +yy3: + yych = *++c; + goto yy11; +yy4: + ++c; + yych = *c; +yy5: + switch (yych) { + case '\t': + case ' ': goto yy4; + default: goto yy2; + } +yy6: + yych = *++c; + switch (yych) { + case '\n': goto yy4; + default: goto yy5; + } +yy7: + ++c; + { return 0; } +yy9: + yych = *++c; + switch (yych) { + case '\n': goto yy4; + default: goto yy5; + } +yy10: + ++c; + yych = *c; +yy11: + switch (yych) { + case '\t': + case ' ': goto yy10; + case '\n': goto yy4; + case '\r': goto yy9; + default: goto yy2; + } +} + +} + + +size_t scan_key(const char * c) { + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy14; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy15; + default: goto yy17; + } +yy14: + { return 0; } +yy15: + ++c; + yych = *c; + goto yy19; +yy16: + { return (size_t)( c - start ); } +yy17: + yych = *++c; + goto yy14; +yy18: + ++c; + yych = *c; +yy19: + switch (yych) { + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy18; + default: goto yy16; + } +} + +} + + +size_t scan_value(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy22; + case '"': goto yy23; + case '\'': goto yy24; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy25; + default: goto yy27; + } +yy22: + { return 0; } +yy23: + yych = *(marker = ++c); + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy22; + default: goto yy35; + } +yy24: + yych = *(marker = ++c); + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy22; + default: goto yy31; + } +yy25: + ++c; + yych = *c; + goto yy29; +yy26: + { return (size_t)( c - start ); } +yy27: + yych = *++c; + goto yy22; +yy28: + ++c; + yych = *c; +yy29: + switch (yych) { + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy28; + default: goto yy26; + } +yy30: + ++c; + yych = *c; +yy31: + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy32; + case '\'': goto yy33; + default: goto yy30; + } +yy32: + c = marker; + goto yy22; +yy33: + yych = *++c; + goto yy26; +yy34: + ++c; + yych = *c; +yy35: + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy32; + case '"': goto yy33; + default: goto yy34; + } +} + +} + + +size_t scan_attr(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *(marker = c); + switch (yych) { + case '\t': + case ' ': goto yy39; + case '\n': goto yy40; + case '\r': goto yy43; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy44; + default: goto yy45; + } +yy38: + { return 0; } +yy39: + yych = *(marker = ++c); + switch (yych) { + case '\t': + case '\n': + case '\r': + case ' ': + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy60; + default: goto yy38; + } +yy40: + ++c; + yych = *c; +yy41: + switch (yych) { + case '\t': + case ' ': goto yy40; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy46; + default: goto yy42; + } +yy42: + c = marker; + goto yy38; +yy43: + yych = *(marker = ++c); + switch (yych) { + case '\t': + case '\n': + case ' ': goto yy40; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy46; + default: goto yy38; + } +yy44: + yych = *(marker = ++c); + switch (yych) { + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy46; + case '=': goto yy48; + default: goto yy38; + } +yy45: + yych = *++c; + goto yy38; +yy46: + ++c; + yych = *c; + switch (yych) { + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy46; + case '=': goto yy48; + default: goto yy42; + } +yy48: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy48; + case '"': goto yy50; + case '\'': goto yy52; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy54; + default: goto yy42; + } +yy50: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy42; + case '"': goto yy57; + default: goto yy50; + } +yy52: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy42; + case '\'': goto yy57; + default: goto yy52; + } +yy54: + ++c; + yych = *c; + switch (yych) { + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy54; + default: goto yy56; + } +yy56: + { return (size_t)( c - start ); } +yy57: + yych = *++c; + goto yy56; +yy58: + yych = *++c; + switch (yych) { + case '\n': goto yy40; + default: goto yy41; + } +yy59: + ++c; + yych = *c; +yy60: + switch (yych) { + case '\t': + case ' ': goto yy59; + case '\n': goto yy40; + case '\r': goto yy58; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy46; + default: goto yy42; + } +} + +} + + +size_t scan_attributes(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + unsigned int yyaccept = 0; + yych = *(marker = c); + switch (yych) { + case '\t': + case ' ': goto yy64; + case '\n': goto yy65; + case '\r': goto yy68; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy69; + default: goto yy70; + } +yy63: + { return 0; } +yy64: + yyaccept = 0; + yych = *(marker = ++c); + switch (yych) { + case '\t': + case '\n': + case '\r': + case ' ': + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy83; + default: goto yy63; + } +yy65: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy65; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy71; + default: goto yy67; + } +yy67: + c = marker; + if (yyaccept == 0) { + goto yy63; + } else { + goto yy81; + } +yy68: + yyaccept = 0; + yych = *(marker = ++c); + switch (yych) { + case '\t': + case '\n': + case ' ': goto yy65; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy71; + default: goto yy63; + } +yy69: + yyaccept = 0; + yych = *(marker = ++c); + switch (yych) { + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy71; + case '=': goto yy73; + default: goto yy63; + } +yy70: + yych = *++c; + goto yy63; +yy71: + ++c; + yych = *c; + switch (yych) { + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy71; + case '=': goto yy73; + default: goto yy67; + } +yy73: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy73; + case '"': goto yy75; + case '\'': goto yy77; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy79; + default: goto yy67; + } +yy75: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy67; + case '"': goto yy87; + default: goto yy75; + } +yy77: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy67; + case '\'': goto yy87; + default: goto yy77; + } +yy79: + yyaccept = 1; + marker = ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy82; + case '\n': goto yy65; + case '\r': goto yy84; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy79; + case ':': + case '_': goto yy71; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy85; + default: goto yy81; + } +yy81: + { return (size_t)( c - start ); } +yy82: + ++c; + yych = *c; +yy83: + switch (yych) { + case '\t': + case ' ': goto yy82; + case '\n': goto yy65; + case '\r': goto yy84; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy71; + default: goto yy67; + } +yy84: + ++c; + yych = *c; + switch (yych) { + case '\t': + case '\n': + case ' ': goto yy65; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy71; + default: goto yy67; + } +yy85: + yyaccept = 1; + marker = ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy82; + case '\n': goto yy65; + case '\r': goto yy84; + case '-': + case ':': + case '_': goto yy71; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy85; + case '=': goto yy73; + default: goto yy81; + } +yy87: + yyaccept = 1; + marker = ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy82; + case '\n': goto yy65; + case '\r': goto yy84; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy71; + default: goto yy81; + } +} + +} + + +size_t scan_email(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy90; + case '!': + case '$': + case '%': + case '+': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '~': goto yy92; + case 'M': + case 'm': goto yy91; + default: goto yy93; + } +yy90: + { return 0; } +yy91: + yych = *(marker = ++c); + switch (yych) { + case '!': + case '$': + case '%': + case '+': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '~': goto yy96; + case '@': goto yy94; + case 'A': + case 'a': goto yy101; + default: goto yy90; + } +yy92: + yych = *(marker = ++c); + switch (yych) { + case '!': + case '$': + case '%': + case '+': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '@': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '~': goto yy97; + default: goto yy90; + } +yy93: + yych = *++c; + goto yy90; +yy94: + yych = *++c; + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': + case '>': goto yy95; + default: goto yy98; + } +yy95: + c = marker; + goto yy90; +yy96: + ++c; + yych = *c; +yy97: + switch (yych) { + case '!': + case '$': + case '%': + case '+': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '~': goto yy96; + case '@': goto yy94; + default: goto yy95; + } +yy98: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': + case '>': goto yy100; + default: goto yy98; + } +yy100: + { return (size_t)( c - start ); } +yy101: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy102; + default: goto yy97; + } +yy102: + yych = *++c; + switch (yych) { + case 'L': + case 'l': goto yy103; + default: goto yy97; + } +yy103: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy104; + default: goto yy97; + } +yy104: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy105; + default: goto yy97; + } +yy105: + yych = *++c; + switch (yych) { + case ':': goto yy106; + default: goto yy97; + } +yy106: + ++c; + switch ((yych = *c)) { + case '@': goto yy95; + default: goto yy97; + } +} + +} + + +size_t scan_url(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy109; + case '!': + case '$': + case '%': + case '+': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '_': + case '~': goto yy111; + case '-': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy112; + case 'M': + case 'm': goto yy110; + default: goto yy113; + } +yy109: + { return 0; } +yy110: + yych = *(marker = ++c); + switch (yych) { + case '!': + case '$': + case '%': + case '+': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '_': + case '~': goto yy119; + case '-': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy116; + case ':': goto yy114; + case '@': goto yy118; + case 'A': + case 'a': goto yy129; + default: goto yy109; + } +yy111: + yych = *(marker = ++c); + switch (yych) { + case '!': + case '$': + case '%': + case '+': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '@': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '~': goto yy120; + default: goto yy109; + } +yy112: + yych = *(marker = ++c); + switch (yych) { + case '!': + case '$': + case '%': + case '+': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '_': + case '~': goto yy119; + case '-': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy116; + case ':': goto yy114; + case '@': goto yy118; + default: goto yy109; + } +yy113: + yych = *++c; + goto yy109; +yy114: + yych = *++c; + switch (yych) { + case '/': goto yy124; + default: goto yy115; + } +yy115: + c = marker; + goto yy109; +yy116: + ++c; + yych = *c; + switch (yych) { + case '!': + case '$': + case '%': + case '+': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '_': + case '~': goto yy119; + case '-': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy116; + case ':': goto yy114; + case '@': goto yy118; + default: goto yy115; + } +yy118: + yych = *++c; + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': + case '>': goto yy115; + default: goto yy121; + } +yy119: + ++c; + yych = *c; +yy120: + switch (yych) { + case '!': + case '$': + case '%': + case '+': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '~': goto yy119; + case '@': goto yy118; + default: goto yy115; + } +yy121: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': + case '>': goto yy123; + default: goto yy121; + } +yy123: + { return (size_t)( c - start ); } +yy124: + yych = *++c; + switch (yych) { + case '/': goto yy125; + default: goto yy115; + } +yy125: + yych = *++c; + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': + case '>': goto yy115; + default: goto yy126; + } +yy126: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': + case '>': goto yy128; + default: goto yy126; + } +yy128: + { return (size_t)( c - start ); } +yy129: + yych = *++c; + switch (yych) { + case '-': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy116; + case ':': goto yy114; + case 'I': + case 'i': goto yy130; + default: goto yy120; + } +yy130: + yych = *++c; + switch (yych) { + case '-': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy116; + case ':': goto yy114; + case 'L': + case 'l': goto yy131; + default: goto yy120; + } +yy131: + yych = *++c; + switch (yych) { + case '-': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy116; + case ':': goto yy114; + case 'T': + case 't': goto yy132; + default: goto yy120; + } +yy132: + yych = *++c; + switch (yych) { + case '-': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy116; + case ':': goto yy114; + case 'O': + case 'o': goto yy133; + default: goto yy120; + } +yy133: + yych = *++c; + switch (yych) { + case '-': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy116; + case ':': goto yy134; + default: goto yy120; + } +yy134: + yych = *++c; + switch (yych) { + case '/': goto yy135; + case '@': goto yy115; + default: goto yy120; + } +yy135: + yych = *++c; + switch (yych) { + case '/': goto yy136; + default: goto yy120; + } +yy136: + yych = *++c; + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': + case '>': goto yy115; + case '!': + case '$': + case '%': + case '+': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '~': goto yy137; + case '@': goto yy139; + default: goto yy126; + } +yy137: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': + case '>': goto yy128; + case '!': + case '$': + case '%': + case '+': + case '-': + case '.': + case '/': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case '~': goto yy137; + case '@': goto yy139; + default: goto yy126; + } +yy139: + yych = *++c; + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': + case '>': goto yy128; + default: goto yy140; + } +yy140: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\t': + case '\n': + case '\r': + case ' ': + case '>': goto yy123; + default: goto yy140; + } +} + +} + + +size_t scan_ref_citation(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy144; + case ' ': goto yy145; + case '[': goto yy146; + default: goto yy147; + } +yy144: + { return 0; } +yy145: + yych = *(marker = ++c); + switch (yych) { + case ' ': goto yy157; + case '[': goto yy158; + default: goto yy144; + } +yy146: + yych = *(marker = ++c); + switch (yych) { + case '#': goto yy148; + default: goto yy144; + } +yy147: + yych = *++c; + goto yy144; +yy148: + yych = *++c; + switch (yych) { + case ']': goto yy149; + default: goto yy151; + } +yy149: + c = marker; + goto yy144; +yy150: + ++c; + yych = *c; +yy151: + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy149; + case ']': goto yy152; + default: goto yy150; + } +yy152: + yych = *++c; + switch (yych) { + case ':': goto yy153; + default: goto yy149; + } +yy153: + yych = *++c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy149; + default: goto yy154; + } +yy154: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy156; + default: goto yy154; + } +yy156: + { return (size_t)( c - start ); } +yy157: + yych = *++c; + switch (yych) { + case ' ': goto yy159; + case '[': goto yy158; + default: goto yy149; + } +yy158: + yych = *++c; + switch (yych) { + case '#': goto yy148; + default: goto yy149; + } +yy159: + ++c; + switch ((yych = *c)) { + case '[': goto yy158; + default: goto yy149; + } +} + +} + +size_t scan_ref_foot(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy162; + case ' ': goto yy163; + case '[': goto yy164; + default: goto yy165; + } +yy162: + { return 0; } +yy163: + yych = *(marker = ++c); + switch (yych) { + case ' ': goto yy175; + case '[': goto yy176; + default: goto yy162; + } +yy164: + yych = *(marker = ++c); + switch (yych) { + case '^': goto yy166; + default: goto yy162; + } +yy165: + yych = *++c; + goto yy162; +yy166: + yych = *++c; + switch (yych) { + case ']': goto yy167; + default: goto yy169; + } +yy167: + c = marker; + goto yy162; +yy168: + ++c; + yych = *c; +yy169: + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy167; + case ']': goto yy170; + default: goto yy168; + } +yy170: + yych = *++c; + switch (yych) { + case ':': goto yy171; + default: goto yy167; + } +yy171: + yych = *++c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy167; + default: goto yy172; + } +yy172: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy174; + default: goto yy172; + } +yy174: + { return (size_t)( c - start ); } +yy175: + yych = *++c; + switch (yych) { + case ' ': goto yy177; + case '[': goto yy176; + default: goto yy167; + } +yy176: + yych = *++c; + switch (yych) { + case '^': goto yy166; + default: goto yy167; + } +yy177: + ++c; + switch ((yych = *c)) { + case '[': goto yy176; + default: goto yy167; + } +} + +} + + +size_t scan_ref_link_no_attributes(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + unsigned int yyaccept = 0; + yych = *c; + switch (yych) { + case '\n': goto yy180; + case ' ': goto yy181; + case '[': goto yy182; + default: goto yy183; + } +yy180: + { return 0; } +yy181: + yyaccept = 0; + yych = *(marker = ++c); + switch (yych) { + case ' ': goto yy317; + case '[': goto yy318; + default: goto yy180; + } +yy182: + yyaccept = 0; + yych = *(marker = ++c); + switch (yych) { + case 0x00: + case '\n': + case '\r': + case ']': goto yy180; + default: goto yy184; + } +yy183: + yych = *++c; + goto yy180; +yy184: + ++c; + yych = *c; +yy185: + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy186; + case ']': goto yy187; + default: goto yy184; + } +yy186: + c = marker; + if (yyaccept == 0) { + goto yy180; + } else { + goto yy200; + } +yy187: + yych = *++c; + switch (yych) { + case ':': goto yy188; + default: goto yy186; + } +yy188: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy188; + case '\n': goto yy190; + case '\r': goto yy192; + case '<': goto yy193; + default: goto yy195; + } +yy190: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy186; + case '\t': + case ' ': goto yy190; + case '<': goto yy193; + default: goto yy195; + } +yy192: + yych = *++c; + switch (yych) { + case 0x00: + case '\r': goto yy186; + case '\t': + case '\n': + case ' ': goto yy190; + case '<': goto yy193; + default: goto yy195; + } +yy193: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy197; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy290; + case '\'': goto yy292; + case '(': goto yy294; + case '>': goto yy195; + default: goto yy193; + } +yy195: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy197; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy202; + case '\'': goto yy204; + case '(': goto yy206; + default: goto yy195; + } +yy197: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy197; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy218; + case '\'': goto yy220; + case '(': goto yy222; + default: goto yy186; + } +yy199: + yyaccept = 1; + yych = *(marker = ++c); + switch (yych) { + case '"': goto yy218; + case '\'': goto yy220; + case '(': goto yy222; + default: goto yy200; + } +yy200: + { return (size_t)( c - start ); } +yy201: + yyaccept = 1; + yych = *(marker = ++c); + switch (yych) { + case '\n': goto yy199; + case '"': goto yy218; + case '\'': goto yy220; + case '(': goto yy222; + default: goto yy200; + } +yy202: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy288; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy276; + case '\'': goto yy266; + case '(': goto yy210; + default: goto yy202; + } +yy204: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy286; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy266; + case '\'': goto yy228; + case '(': goto yy212; + default: goto yy204; + } +yy206: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy208; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy210; + case '\'': goto yy212; + case ')': goto yy214; + default: goto yy206; + } +yy208: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy208; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy256; + case '\'': goto yy240; + case ')': goto yy224; + default: goto yy222; + } +yy210: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy278; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy280; + case '\'': goto yy232; + case ')': goto yy276; + default: goto yy210; + } +yy212: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy230; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy232; + case '\'': goto yy234; + case ')': goto yy228; + default: goto yy212; + } +yy214: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy215; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy202; + case '\'': goto yy204; + case '(': goto yy206; + default: goto yy195; + } +yy215: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy215; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy218; + case '\'': goto yy220; + case '(': goto yy222; + default: goto yy186; + } +yy217: + yyaccept = 1; + yych = *(marker = ++c); + switch (yych) { + case '\n': goto yy199; + case '"': goto yy218; + case '\'': goto yy220; + case '(': goto yy222; + default: goto yy200; + } +yy218: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy186; + case '"': goto yy224; + default: goto yy218; + } +yy220: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy186; + case '\'': goto yy224; + default: goto yy220; + } +yy222: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy186; + case ')': goto yy224; + default: goto yy222; + } +yy224: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy224; + case '\n': goto yy226; + case '\r': goto yy227; + default: goto yy186; + } +yy226: + yych = *++c; + goto yy200; +yy227: + yych = *++c; + switch (yych) { + case '\n': goto yy226; + default: goto yy200; + } +yy228: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy274; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy266; + case '\'': goto yy228; + case '(': goto yy212; + default: goto yy204; + } +yy230: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy230; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy242; + case '\'': goto yy244; + case ')': goto yy238; + default: goto yy240; + } +yy232: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy260; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': + case '\'': goto yy258; + case ')': goto yy262; + default: goto yy232; + } +yy234: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy236; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy232; + case '\'': goto yy234; + case ')': goto yy228; + default: goto yy212; + } +yy236: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy236; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy242; + case '\'': goto yy244; + case ')': goto yy238; + default: goto yy240; + } +yy238: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy238; + case '\n': goto yy226; + case '\r': goto yy227; + case '\'': goto yy224; + default: goto yy220; + } +yy240: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy186; + case '\'': goto yy246; + case ')': goto yy238; + default: goto yy240; + } +yy242: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy186; + case '"': goto yy244; + case '\'': goto yy248; + case ')': goto yy250; + default: goto yy242; + } +yy244: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy244; + case '\n': goto yy226; + case '\r': goto yy227; + case '\'': goto yy246; + case ')': goto yy238; + default: goto yy240; + } +yy246: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy246; + case '\n': goto yy226; + case '\r': goto yy227; + case ')': goto yy224; + default: goto yy222; + } +yy248: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy248; + case '\n': goto yy226; + case '\r': goto yy227; + case '"': goto yy246; + case ')': goto yy252; + default: goto yy256; + } +yy250: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy250; + case '\n': goto yy226; + case '\r': goto yy227; + case '"': goto yy238; + case '\'': goto yy252; + default: goto yy254; + } +yy252: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy252; + case '\n': goto yy226; + case '\r': goto yy227; + case '"': goto yy224; + default: goto yy218; + } +yy254: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy186; + case '"': goto yy238; + case '\'': goto yy252; + default: goto yy254; + } +yy256: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy186; + case '"': goto yy246; + case ')': goto yy252; + default: goto yy256; + } +yy258: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy272; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': + case '\'': goto yy258; + case ')': goto yy262; + default: goto yy232; + } +yy260: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy260; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': + case '\'': goto yy270; + case ')': goto yy250; + default: goto yy242; + } +yy262: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy264; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': + case '\'': goto yy262; + case '(': goto yy232; + default: goto yy266; + } +yy264: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy264; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': + case '\'': goto yy250; + case '(': goto yy242; + default: goto yy254; + } +yy266: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy268; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': + case '\'': goto yy262; + case '(': goto yy232; + default: goto yy266; + } +yy268: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy268; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': + case '\'': goto yy250; + case '(': goto yy242; + default: goto yy254; + } +yy270: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy270; + case '\n': goto yy226; + case '\r': goto yy227; + case '"': goto yy244; + case '\'': goto yy248; + case ')': goto yy250; + default: goto yy242; + } +yy272: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy272; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': + case '\'': goto yy270; + case ')': goto yy250; + default: goto yy242; + } +yy274: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy274; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy254; + case '\'': goto yy238; + case '(': goto yy240; + default: goto yy220; + } +yy276: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy284; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy276; + case '\'': goto yy266; + case '(': goto yy210; + default: goto yy202; + } +yy278: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy278; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy248; + case '\'': goto yy242; + case ')': goto yy252; + default: goto yy256; + } +yy280: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy282; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy280; + case '\'': goto yy232; + case ')': goto yy276; + default: goto yy210; + } +yy282: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy282; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy248; + case '\'': goto yy242; + case ')': goto yy252; + default: goto yy256; + } +yy284: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy284; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy252; + case '\'': goto yy254; + case '(': goto yy256; + default: goto yy218; + } +yy286: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy286; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy254; + case '\'': goto yy238; + case '(': goto yy240; + default: goto yy220; + } +yy288: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy288; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy252; + case '\'': goto yy254; + case '(': goto yy256; + default: goto yy218; + } +yy290: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy288; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy313; + case '\'': goto yy311; + case '(': goto yy296; + case '>': goto yy202; + default: goto yy290; + } +yy292: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy286; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy311; + case '\'': goto yy301; + case '(': goto yy298; + case '>': goto yy204; + default: goto yy292; + } +yy294: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy208; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy296; + case '\'': goto yy298; + case ')': goto yy300; + case '>': goto yy206; + default: goto yy294; + } +yy296: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy278; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy315; + case '\'': goto yy303; + case ')': goto yy313; + case '>': goto yy210; + default: goto yy296; + } +yy298: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy230; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': goto yy303; + case '\'': goto yy305; + case ')': goto yy301; + case '>': goto yy212; + default: goto yy298; + } +yy300: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy215; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy290; + case '\'': goto yy292; + case '(': goto yy294; + case '>': goto yy195; + default: goto yy193; + } +yy301: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy274; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy311; + case '\'': goto yy301; + case '(': goto yy298; + case '>': goto yy204; + default: goto yy292; + } +yy303: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy260; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': + case '\'': goto yy307; + case ')': goto yy309; + case '>': goto yy232; + default: goto yy303; + } +yy305: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy236; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy303; + case '\'': goto yy305; + case ')': goto yy301; + case '>': goto yy212; + default: goto yy298; + } +yy307: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy272; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': + case '\'': goto yy307; + case ')': goto yy309; + case '>': goto yy232; + default: goto yy303; + } +yy309: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy264; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': + case '\'': goto yy309; + case '(': goto yy303; + case '>': goto yy266; + default: goto yy311; + } +yy311: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy268; + case '\n': goto yy199; + case '\r': goto yy201; + case '"': + case '\'': goto yy309; + case '(': goto yy303; + case '>': goto yy266; + default: goto yy311; + } +yy313: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy284; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy313; + case '\'': goto yy311; + case '(': goto yy296; + case '>': goto yy202; + default: goto yy290; + } +yy315: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy186; + case '\t': + case ' ': goto yy282; + case '\n': goto yy199; + case '\r': goto yy217; + case '"': goto yy315; + case '\'': goto yy303; + case ')': goto yy313; + case '>': goto yy210; + default: goto yy296; + } +yy317: + yych = *++c; + switch (yych) { + case ' ': goto yy319; + case '[': goto yy318; + default: goto yy186; + } +yy318: + yych = *++c; + switch (yych) { + case ']': goto yy186; + default: goto yy185; + } +yy319: + ++c; + switch ((yych = *c)) { + case '[': goto yy318; + default: goto yy186; + } +} + +} + + +size_t scan_ref_link(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy322; + case ' ': goto yy323; + case '[': goto yy324; + default: goto yy325; + } +yy322: + { return 0; } +yy323: + yych = *(marker = ++c); + switch (yych) { + case ' ': goto yy334; + case '[': goto yy335; + default: goto yy322; + } +yy324: + yych = *(marker = ++c); + switch (yych) { + case 0x00: + case '\n': + case '\r': + case ']': goto yy322; + default: goto yy326; + } +yy325: + yych = *++c; + goto yy322; +yy326: + ++c; + yych = *c; +yy327: + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy328; + case ']': goto yy329; + default: goto yy326; + } +yy328: + c = marker; + goto yy322; +yy329: + yych = *++c; + switch (yych) { + case ':': goto yy330; + default: goto yy328; + } +yy330: + yych = *++c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy328; + default: goto yy331; + } +yy331: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy333; + default: goto yy331; + } +yy333: + { return (size_t)( c - start ); } +yy334: + yych = *++c; + switch (yych) { + case ' ': goto yy336; + case '[': goto yy335; + default: goto yy328; + } +yy335: + yych = *++c; + switch (yych) { + case ']': goto yy328; + default: goto yy327; + } +yy336: + ++c; + switch ((yych = *c)) { + case '[': goto yy335; + default: goto yy328; + } +} + +} + + +size_t scan_html(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy339; + case '<': goto yy340; + default: goto yy341; + } +yy339: + { return 0; } +yy340: + yych = *(marker = ++c); + switch (yych) { + case '!': goto yy342; + case '/': goto yy344; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy345; + default: goto yy339; + } +yy341: + yych = *++c; + goto yy339; +yy342: + yych = *++c; + switch (yych) { + case '-': goto yy373; + default: goto yy343; + } +yy343: + c = marker; + goto yy339; +yy344: + yych = *++c; + switch (yych) { + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy369; + default: goto yy343; + } +yy345: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy349; + case '\n': goto yy351; + case '\r': goto yy353; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy345; + case '/': goto yy358; + case ':': + case '_': goto yy354; + case '>': goto yy356; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy347; + default: goto yy343; + } +yy347: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy349; + case '\n': goto yy351; + case '\r': goto yy353; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy347; + case '.': + case ':': + case '_': goto yy354; + case '/': goto yy358; + case '=': goto yy359; + case '>': goto yy356; + default: goto yy343; + } +yy349: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy349; + case '\n': goto yy351; + case '\r': goto yy353; + case '/': goto yy358; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy354; + case '>': goto yy356; + default: goto yy343; + } +yy351: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy351; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy354; + default: goto yy343; + } +yy353: + ++c; + yych = *c; + switch (yych) { + case '\t': + case '\n': + case ' ': goto yy351; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy354; + default: goto yy343; + } +yy354: + ++c; + yych = *c; + switch (yych) { + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy354; + case '=': goto yy359; + default: goto yy343; + } +yy356: + ++c; + { return (size_t)( c - start ); } +yy358: + yych = *++c; + switch (yych) { + case '>': goto yy356; + default: goto yy343; + } +yy359: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy359; + case '"': goto yy361; + case '\'': goto yy363; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy365; + default: goto yy343; + } +yy361: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy343; + case '"': goto yy349; + default: goto yy361; + } +yy363: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy343; + case '\'': goto yy349; + default: goto yy363; + } +yy365: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy349; + case '\n': goto yy351; + case '\r': goto yy353; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy365; + case '/': goto yy358; + case ':': + case '_': goto yy354; + case '>': goto yy356; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy367; + default: goto yy343; + } +yy367: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy349; + case '\n': goto yy351; + case '\r': goto yy353; + case '-': + case ':': + case '_': goto yy354; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy367; + case '/': goto yy358; + case '=': goto yy359; + case '>': goto yy356; + default: goto yy343; + } +yy369: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy371; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy369; + case '>': goto yy356; + default: goto yy343; + } +yy371: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy371; + case '>': goto yy356; + default: goto yy343; + } +yy373: + yych = *++c; + switch (yych) { + case '-': goto yy374; + default: goto yy343; + } +yy374: + yych = *++c; + switch (yych) { + case '-': goto yy343; + default: goto yy376; + } +yy375: + ++c; + yych = *c; +yy376: + switch (yych) { + case 0x00: + case '>': goto yy343; + case '-': goto yy377; + default: goto yy375; + } +yy377: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '>': goto yy343; + case '-': goto yy378; + default: goto yy375; + } +yy378: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy343; + case '-': goto yy378; + case '>': goto yy356; + default: goto yy375; + } +} + +} + + +size_t scan_html_block(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy382; + case '<': goto yy383; + default: goto yy384; + } +yy382: + { return 0; } +yy383: + yych = *(marker = ++c); + switch (yych) { + case '/': goto yy385; + case 'A': + case 'a': goto yy388; + case 'B': + case 'b': goto yy389; + case 'C': + case 'c': goto yy390; + case 'D': + case 'd': goto yy391; + case 'F': + case 'f': goto yy392; + case 'H': + case 'h': goto yy393; + case 'I': + case 'i': goto yy394; + case 'L': + case 'l': goto yy395; + case 'M': + case 'm': goto yy396; + case 'N': + case 'n': goto yy397; + case 'O': + case 'o': goto yy398; + case 'P': + case 'p': goto yy387; + case 'S': + case 's': goto yy399; + case 'T': + case 't': goto yy400; + case 'U': + case 'u': goto yy401; + case 'V': + case 'v': goto yy402; + default: goto yy382; + } +yy384: + yych = *++c; + goto yy382; +yy385: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy388; + case 'B': + case 'b': goto yy389; + case 'C': + case 'c': goto yy390; + case 'D': + case 'd': goto yy391; + case 'F': + case 'f': goto yy392; + case 'H': + case 'h': goto yy393; + case 'I': + case 'i': goto yy394; + case 'L': + case 'l': goto yy395; + case 'M': + case 'm': goto yy396; + case 'N': + case 'n': goto yy397; + case 'O': + case 'o': goto yy398; + case 'P': + case 'p': goto yy387; + case 'S': + case 's': goto yy399; + case 'T': + case 't': goto yy400; + case 'U': + case 'u': goto yy401; + case 'V': + case 'v': goto yy402; + default: goto yy386; + } +yy386: + c = marker; + goto yy382; +yy387: + yych = *++c; + switch (yych) { + case '/': goto yy414; + case '>': goto yy415; + case 'R': + case 'r': goto yy534; + default: goto yy408; + } +yy388: + yych = *++c; + switch (yych) { + case 'D': + case 'd': goto yy523; + case 'R': + case 'r': goto yy522; + case 'S': + case 's': goto yy521; + default: goto yy386; + } +yy389: + yych = *++c; + switch (yych) { + case 'L': + case 'l': goto yy513; + default: goto yy386; + } +yy390: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy506; + case 'E': + case 'e': goto yy505; + default: goto yy386; + } +yy391: + yych = *++c; + switch (yych) { + case 'D': + case 'L': + case 'T': + case 'd': + case 'l': + case 't': goto yy406; + case 'I': + case 'i': goto yy504; + default: goto yy386; + } +yy392: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy480; + case 'O': + case 'o': goto yy479; + case 'R': + case 'r': goto yy478; + default: goto yy386; + } +yy393: + yych = *++c; + switch (yych) { + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case 'R': + case 'r': goto yy406; + case 'E': + case 'e': goto yy471; + case 'G': + case 'g': goto yy470; + default: goto yy386; + } +yy394: + yych = *++c; + switch (yych) { + case 'S': + case 's': goto yy465; + default: goto yy386; + } +yy395: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy406; + default: goto yy386; + } +yy396: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy462; + case 'E': + case 'e': goto yy461; + default: goto yy386; + } +yy397: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy450; + case 'O': + case 'o': goto yy449; + default: goto yy386; + } +yy398: + yych = *++c; + switch (yych) { + case 'L': + case 'l': goto yy406; + case 'U': + case 'u': goto yy445; + default: goto yy386; + } +yy399: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy440; + default: goto yy386; + } +yy400: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy430; + case 'B': + case 'b': goto yy429; + case 'D': + case 'R': + case 'd': + case 'r': goto yy406; + case 'F': + case 'f': goto yy428; + case 'H': + case 'h': goto yy427; + default: goto yy386; + } +yy401: + yych = *++c; + switch (yych) { + case 'L': + case 'l': goto yy406; + default: goto yy386; + } +yy402: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy403; + default: goto yy386; + } +yy403: + yych = *++c; + switch (yych) { + case 'D': + case 'd': goto yy404; + default: goto yy386; + } +yy404: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy405; + default: goto yy386; + } +yy405: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy406; + default: goto yy386; + } +yy406: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy407; + case '\n': goto yy409; + case '\r': goto yy411; + case '/': goto yy414; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy412; + case '>': goto yy415; + default: goto yy386; + } +yy407: + ++c; + yych = *c; +yy408: + switch (yych) { + case '\t': + case ' ': goto yy407; + case '\n': goto yy409; + case '\r': goto yy411; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy412; + default: goto yy386; + } +yy409: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy409; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy412; + default: goto yy386; + } +yy411: + ++c; + yych = *c; + switch (yych) { + case '\t': + case '\n': + case ' ': goto yy409; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy412; + default: goto yy386; + } +yy412: + ++c; + yych = *c; +yy413: + switch (yych) { + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy412; + case '=': goto yy417; + default: goto yy386; + } +yy414: + yych = *++c; + switch (yych) { + case '>': goto yy415; + default: goto yy386; + } +yy415: + ++c; + { return (size_t)( c - start ); } +yy417: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy417; + case '"': goto yy419; + case '\'': goto yy421; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy423; + default: goto yy386; + } +yy419: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy386; + case '"': goto yy406; + default: goto yy419; + } +yy421: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy386; + case '\'': goto yy406; + default: goto yy421; + } +yy423: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy407; + case '\n': goto yy409; + case '\r': goto yy411; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy423; + case '/': goto yy414; + case ':': + case '_': goto yy412; + case '>': goto yy415; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy425; + default: goto yy386; + } +yy425: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy407; + case '\n': goto yy409; + case '\r': goto yy411; + case '-': + case ':': + case '_': goto yy412; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy425; + case '/': goto yy414; + case '=': goto yy417; + case '>': goto yy415; + default: goto yy386; + } +yy427: + yych = *++c; + switch (yych) { + case '/': goto yy414; + case '>': goto yy415; + case 'E': + case 'e': goto yy437; + default: goto yy408; + } +yy428: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy435; + default: goto yy386; + } +yy429: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy433; + default: goto yy386; + } +yy430: + yych = *++c; + switch (yych) { + case 'B': + case 'b': goto yy431; + default: goto yy386; + } +yy431: + yych = *++c; + switch (yych) { + case 'L': + case 'l': goto yy432; + default: goto yy386; + } +yy432: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy406; + default: goto yy386; + } +yy433: + yych = *++c; + switch (yych) { + case 'D': + case 'd': goto yy434; + default: goto yy386; + } +yy434: + yych = *++c; + switch (yych) { + case 'Y': + case 'y': goto yy406; + default: goto yy386; + } +yy435: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy436; + default: goto yy386; + } +yy436: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy406; + default: goto yy386; + } +yy437: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy438; + default: goto yy413; + } +yy438: + yych = *++c; + switch (yych) { + case 'D': + case 'd': goto yy439; + default: goto yy413; + } +yy439: + yych = *++c; + switch (yych) { + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy412; + case '/': goto yy414; + case '=': goto yy417; + case '>': goto yy415; + default: goto yy408; + } +yy440: + yych = *++c; + switch (yych) { + case 'C': + case 'c': goto yy441; + default: goto yy386; + } +yy441: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy442; + default: goto yy386; + } +yy442: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy443; + default: goto yy386; + } +yy443: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy444; + default: goto yy386; + } +yy444: + yych = *++c; + switch (yych) { + case 'N': + case 'n': goto yy406; + default: goto yy386; + } +yy445: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy446; + default: goto yy386; + } +yy446: + yych = *++c; + switch (yych) { + case 'P': + case 'p': goto yy447; + default: goto yy386; + } +yy447: + yych = *++c; + switch (yych) { + case 'U': + case 'u': goto yy448; + default: goto yy386; + } +yy448: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy406; + default: goto yy386; + } +yy449: + yych = *++c; + switch (yych) { + case 'F': + case 'f': goto yy451; + case 'S': + case 's': goto yy452; + default: goto yy386; + } +yy450: + yych = *++c; + switch (yych) { + case 'V': + case 'v': goto yy406; + default: goto yy386; + } +yy451: + yych = *++c; + switch (yych) { + case 'R': + case 'r': goto yy457; + default: goto yy386; + } +yy452: + yych = *++c; + switch (yych) { + case 'C': + case 'c': goto yy453; + default: goto yy386; + } +yy453: + yych = *++c; + switch (yych) { + case 'R': + case 'r': goto yy454; + default: goto yy386; + } +yy454: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy455; + default: goto yy386; + } +yy455: + yych = *++c; + switch (yych) { + case 'P': + case 'p': goto yy456; + default: goto yy386; + } +yy456: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy406; + default: goto yy386; + } +yy457: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy458; + default: goto yy386; + } +yy458: + yych = *++c; + switch (yych) { + case 'M': + case 'm': goto yy459; + default: goto yy386; + } +yy459: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy460; + default: goto yy386; + } +yy460: + yych = *++c; + switch (yych) { + case 'S': + case 's': goto yy406; + default: goto yy386; + } +yy461: + yych = *++c; + switch (yych) { + case 'N': + case 'n': goto yy464; + default: goto yy386; + } +yy462: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy463; + default: goto yy386; + } +yy463: + yych = *++c; + switch (yych) { + case 'N': + case 'n': goto yy406; + default: goto yy386; + } +yy464: + yych = *++c; + switch (yych) { + case 'U': + case 'u': goto yy406; + default: goto yy386; + } +yy465: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy466; + default: goto yy386; + } +yy466: + yych = *++c; + switch (yych) { + case 'N': + case 'n': goto yy467; + default: goto yy386; + } +yy467: + yych = *++c; + switch (yych) { + case 'D': + case 'd': goto yy468; + default: goto yy386; + } +yy468: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy469; + default: goto yy386; + } +yy469: + yych = *++c; + switch (yych) { + case 'X': + case 'x': goto yy406; + default: goto yy386; + } +yy470: + yych = *++c; + switch (yych) { + case 'R': + case 'r': goto yy475; + default: goto yy386; + } +yy471: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy472; + default: goto yy386; + } +yy472: + yych = *++c; + switch (yych) { + case 'D': + case 'd': goto yy473; + default: goto yy386; + } +yy473: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy474; + default: goto yy386; + } +yy474: + yych = *++c; + switch (yych) { + case 'R': + case 'r': goto yy406; + default: goto yy386; + } +yy475: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy476; + default: goto yy386; + } +yy476: + yych = *++c; + switch (yych) { + case 'U': + case 'u': goto yy477; + default: goto yy386; + } +yy477: + yych = *++c; + switch (yych) { + case 'P': + case 'p': goto yy406; + default: goto yy386; + } +yy478: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy499; + default: goto yy386; + } +yy479: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy495; + case 'R': + case 'r': goto yy496; + default: goto yy386; + } +yy480: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy481; + case 'G': + case 'g': goto yy482; + default: goto yy386; + } +yy481: + yych = *++c; + switch (yych) { + case 'L': + case 'l': goto yy491; + default: goto yy386; + } +yy482: + yych = *++c; + switch (yych) { + case 'C': + case 'c': goto yy484; + case 'U': + case 'u': goto yy483; + default: goto yy386; + } +yy483: + yych = *++c; + switch (yych) { + case 'R': + case 'r': goto yy490; + default: goto yy386; + } +yy484: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy485; + default: goto yy386; + } +yy485: + yych = *++c; + switch (yych) { + case 'P': + case 'p': goto yy486; + default: goto yy386; + } +yy486: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy487; + default: goto yy386; + } +yy487: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy488; + default: goto yy386; + } +yy488: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy489; + default: goto yy386; + } +yy489: + yych = *++c; + switch (yych) { + case 'N': + case 'n': goto yy406; + default: goto yy386; + } +yy490: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy406; + default: goto yy386; + } +yy491: + yych = *++c; + switch (yych) { + case 'D': + case 'd': goto yy492; + default: goto yy386; + } +yy492: + yych = *++c; + switch (yych) { + case 'S': + case 's': goto yy493; + default: goto yy386; + } +yy493: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy494; + default: goto yy386; + } +yy494: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy406; + default: goto yy386; + } +yy495: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy497; + default: goto yy386; + } +yy496: + yych = *++c; + switch (yych) { + case 'M': + case 'm': goto yy406; + default: goto yy386; + } +yy497: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy498; + default: goto yy386; + } +yy498: + yych = *++c; + switch (yych) { + case 'R': + case 'r': goto yy406; + default: goto yy386; + } +yy499: + yych = *++c; + switch (yych) { + case 'M': + case 'm': goto yy500; + default: goto yy386; + } +yy500: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy501; + default: goto yy386; + } +yy501: + yych = *++c; + switch (yych) { + case 'S': + case 's': goto yy502; + default: goto yy386; + } +yy502: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy503; + default: goto yy386; + } +yy503: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy406; + default: goto yy386; + } +yy504: + yych = *++c; + switch (yych) { + case 'R': + case 'V': + case 'r': + case 'v': goto yy406; + default: goto yy386; + } +yy505: + yych = *++c; + switch (yych) { + case 'N': + case 'n': goto yy510; + default: goto yy386; + } +yy506: + yych = *++c; + switch (yych) { + case 'N': + case 'n': goto yy507; + default: goto yy386; + } +yy507: + yych = *++c; + switch (yych) { + case 'V': + case 'v': goto yy508; + default: goto yy386; + } +yy508: + yych = *++c; + switch (yych) { + case 'A': + case 'a': goto yy509; + default: goto yy386; + } +yy509: + yych = *++c; + switch (yych) { + case 'S': + case 's': goto yy406; + default: goto yy386; + } +yy510: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy511; + default: goto yy386; + } +yy511: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy512; + default: goto yy386; + } +yy512: + yych = *++c; + switch (yych) { + case 'R': + case 'r': goto yy406; + default: goto yy386; + } +yy513: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy514; + default: goto yy386; + } +yy514: + yych = *++c; + switch (yych) { + case 'C': + case 'c': goto yy515; + default: goto yy386; + } +yy515: + yych = *++c; + switch (yych) { + case 'K': + case 'k': goto yy516; + default: goto yy386; + } +yy516: + yych = *++c; + switch (yych) { + case 'Q': + case 'q': goto yy517; + default: goto yy386; + } +yy517: + yych = *++c; + switch (yych) { + case 'U': + case 'u': goto yy518; + default: goto yy386; + } +yy518: + yych = *++c; + switch (yych) { + case 'O': + case 'o': goto yy519; + default: goto yy386; + } +yy519: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy520; + default: goto yy386; + } +yy520: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy406; + default: goto yy386; + } +yy521: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy532; + default: goto yy386; + } +yy522: + yych = *++c; + switch (yych) { + case 'T': + case 't': goto yy528; + default: goto yy386; + } +yy523: + yych = *++c; + switch (yych) { + case 'D': + case 'd': goto yy524; + default: goto yy386; + } +yy524: + yych = *++c; + switch (yych) { + case 'R': + case 'r': goto yy525; + default: goto yy386; + } +yy525: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy526; + default: goto yy386; + } +yy526: + yych = *++c; + switch (yych) { + case 'S': + case 's': goto yy527; + default: goto yy386; + } +yy527: + yych = *++c; + switch (yych) { + case 'S': + case 's': goto yy406; + default: goto yy386; + } +yy528: + yych = *++c; + switch (yych) { + case 'I': + case 'i': goto yy529; + default: goto yy386; + } +yy529: + yych = *++c; + switch (yych) { + case 'C': + case 'c': goto yy530; + default: goto yy386; + } +yy530: + yych = *++c; + switch (yych) { + case 'L': + case 'l': goto yy531; + default: goto yy386; + } +yy531: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy406; + default: goto yy386; + } +yy532: + yych = *++c; + switch (yych) { + case 'D': + case 'd': goto yy533; + default: goto yy386; + } +yy533: + yych = *++c; + switch (yych) { + case 'E': + case 'e': goto yy406; + default: goto yy386; + } +yy534: + ++c; + switch ((yych = *c)) { + case 'E': + case 'e': goto yy439; + default: goto yy413; + } +} + +} + + +size_t scan_html_line(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy537; + case '<': goto yy538; + default: goto yy539; + } +yy537: + { return 0; } +yy538: + yych = *(marker = ++c); + switch (yych) { + case '!': goto yy540; + case '/': goto yy542; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy543; + default: goto yy537; + } +yy539: + yych = *++c; + goto yy537; +yy540: + yych = *++c; + switch (yych) { + case '-': goto yy574; + default: goto yy541; + } +yy541: + c = marker; + goto yy537; +yy542: + yych = *++c; + switch (yych) { + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy570; + default: goto yy541; + } +yy543: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy547; + case '\n': goto yy549; + case '\r': goto yy551; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy543; + case '/': goto yy556; + case ':': + case '_': goto yy552; + case '>': goto yy554; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy545; + default: goto yy541; + } +yy545: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy547; + case '\n': goto yy549; + case '\r': goto yy551; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy545; + case '.': + case ':': + case '_': goto yy552; + case '/': goto yy556; + case '=': goto yy560; + case '>': goto yy554; + default: goto yy541; + } +yy547: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy547; + case '\n': goto yy549; + case '\r': goto yy551; + case '/': goto yy556; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy552; + case '>': goto yy554; + default: goto yy541; + } +yy549: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy549; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy552; + default: goto yy541; + } +yy551: + ++c; + yych = *c; + switch (yych) { + case '\t': + case '\n': + case ' ': goto yy549; + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy552; + default: goto yy541; + } +yy552: + ++c; + yych = *c; + switch (yych) { + case '-': + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ':': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy552; + case '=': goto yy560; + default: goto yy541; + } +yy554: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy554; + case '\n': goto yy557; + case '\r': goto yy559; + default: goto yy541; + } +yy556: + yych = *++c; + switch (yych) { + case '>': goto yy554; + default: goto yy541; + } +yy557: + ++c; +yy558: + { return (size_t)( c - start ); } +yy559: + yych = *++c; + switch (yych) { + case '\n': goto yy557; + default: goto yy558; + } +yy560: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy560; + case '"': goto yy562; + case '\'': goto yy564; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy566; + default: goto yy541; + } +yy562: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy541; + case '"': goto yy547; + default: goto yy562; + } +yy564: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': goto yy541; + case '\'': goto yy547; + default: goto yy564; + } +yy566: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy547; + case '\n': goto yy549; + case '\r': goto yy551; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': goto yy566; + case '/': goto yy556; + case ':': + case '_': goto yy552; + case '>': goto yy554; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy568; + default: goto yy541; + } +yy568: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy547; + case '\n': goto yy549; + case '\r': goto yy551; + case '-': + case ':': + case '_': goto yy552; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy568; + case '/': goto yy556; + case '=': goto yy560; + case '>': goto yy554; + default: goto yy541; + } +yy570: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy572; + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': goto yy570; + case '>': goto yy554; + default: goto yy541; + } +yy572: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy572; + case '>': goto yy554; + default: goto yy541; + } +yy574: + yych = *++c; + switch (yych) { + case '-': goto yy575; + default: goto yy541; + } +yy575: + yych = *++c; + switch (yych) { + case '-': goto yy541; + default: goto yy577; + } +yy576: + ++c; + yych = *c; +yy577: + switch (yych) { + case 0x00: + case '>': goto yy541; + case '-': goto yy578; + default: goto yy576; + } +yy578: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '>': goto yy541; + case '-': goto yy579; + default: goto yy576; + } +yy579: + ++c; + yych = *c; + switch (yych) { + case 0x00: goto yy541; + case '-': goto yy579; + case '>': goto yy554; + default: goto yy576; + } +} + +} + + +size_t scan_fence_start(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy583; + case ' ': goto yy584; + case '`': + case '~': goto yy585; + default: goto yy586; + } +yy583: + { return 0; } +yy584: + yych = *(marker = ++c); + switch (yych) { + case ' ': goto yy597; + case '`': + case '~': goto yy598; + default: goto yy583; + } +yy585: + yych = *(marker = ++c); + switch (yych) { + case '`': + case '~': goto yy587; + default: goto yy583; + } +yy586: + yych = *++c; + goto yy583; +yy587: + yych = *++c; + switch (yych) { + case '`': + case '~': goto yy589; + default: goto yy588; + } +yy588: + c = marker; + goto yy583; +yy589: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\n': + case '\r': + case '\'': goto yy588; + case '`': goto yy589; + case '~': goto yy592; + default: goto yy591; + } +yy591: + yych = *++c; + switch (yych) { + case '\n': goto yy594; + case '\r': goto yy596; + default: goto yy588; + } +yy592: + ++c; + yych = *c; + switch (yych) { + case 0x00: + case '\'': goto yy588; + case '\n': goto yy594; + case '\r': goto yy596; + case '`': goto yy589; + case '~': goto yy592; + default: goto yy591; + } +yy594: + ++c; +yy595: + { return (size_t)( c - start ); } +yy596: + yych = *++c; + switch (yych) { + case '\n': goto yy594; + default: goto yy595; + } +yy597: + yych = *++c; + switch (yych) { + case ' ': goto yy599; + case '`': + case '~': goto yy598; + default: goto yy588; + } +yy598: + yych = *++c; + switch (yych) { + case '`': + case '~': goto yy587; + default: goto yy588; + } +yy599: + ++c; + switch ((yych = *c)) { + case '`': + case '~': goto yy598; + default: goto yy588; + } +} + +} + + +size_t scan_fence_end(const char * c) { + const char * marker = NULL; + const char * start = c; + + +{ + char yych; + yych = *c; + switch (yych) { + case '\n': goto yy602; + case ' ': goto yy603; + case '`': + case '~': goto yy604; + default: goto yy605; + } +yy602: + { return 0; } +yy603: + yych = *(marker = ++c); + switch (yych) { + case ' ': goto yy615; + case '`': + case '~': goto yy616; + default: goto yy602; + } +yy604: + yych = *(marker = ++c); + switch (yych) { + case '`': + case '~': goto yy606; + default: goto yy602; + } +yy605: + yych = *++c; + goto yy602; +yy606: + yych = *++c; + switch (yych) { + case '`': + case '~': goto yy608; + default: goto yy607; + } +yy607: + c = marker; + goto yy602; +yy608: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy610; + case '\n': goto yy612; + case '\r': goto yy614; + case '`': + case '~': goto yy608; + default: goto yy607; + } +yy610: + ++c; + yych = *c; + switch (yych) { + case '\t': + case ' ': goto yy610; + case '\n': goto yy612; + case '\r': goto yy614; + default: goto yy607; + } +yy612: + ++c; +yy613: + { return (size_t)( c - start ); } +yy614: + yych = *++c; + switch (yych) { + case '\n': goto yy612; + default: goto yy613; + } +yy615: + yych = *++c; + switch (yych) { + case ' ': goto yy617; + case '`': + case '~': goto yy616; + default: goto yy607; + } +yy616: + yych = *++c; + switch (yych) { + case '`': + case '~': goto yy606; + default: goto yy607; + } +yy617: + ++c; + switch ((yych = *c)) { + case '`': + case '~': goto yy616; + default: goto yy607; + } +} + +} + + +#ifdef TEST +void Test_scan_url(CuTest* tc) { + int url_len; + + url_len = (int) scan_url("mailto:foo@bar.com"); + CuAssertIntEquals(tc, 18, url_len); + url_len = (int) scan_email("mailto:foo@bar.com"); + CuAssertIntEquals(tc, 18, url_len); + + url_len = (int) scan_url("http://test.com/"); + CuAssertIntEquals(tc, 16, url_len); + url_len = (int) scan_email("mailto:foo@bar.com"); + CuAssertIntEquals(tc, 0, url_len); + + url_len = (int) scan_url("foo@bar.com "); + CuAssertIntEquals(tc, 12, url_len); + url_len = (int) scan_email("mailto:foo@bar.com"); + CuAssertIntEquals(tc, 12, url_len); +} +#endif + diff --git a/src/scanners.h b/src/scanners.h new file mode 100644 index 0000000..3be8a58 --- /dev/null +++ b/src/scanners.h @@ -0,0 +1,81 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file scanners.h + + @brief After text has been tokenized, there are still some constructs that are best + interpreted using regular expressions. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +#ifndef SCANNERS_MULTIMARKDOWN_H +#define SCANNERS_MULTIMARKDOWN_H + +#ifdef TEST +#include "CuTest.h" +#endif + +size_t scan_attr(const char * c); +size_t scan_attributes(const char * c); +size_t scan_email(const char * c); +size_t scan_fence_start(const char * c); +size_t scan_fence_end(const char * c); +size_t scan_html(const char * c); +size_t scan_html_block(const char * c); +size_t scan_html_line(const char * c); +size_t scan_key(const char * c); +size_t scan_ref_citation(const char * c); +size_t scan_ref_foot(const char * c); +size_t scan_ref_link(const char * c); +size_t scan_ref_link_no_attributes(const char * c); +size_t scan_spnl(const char * c); +size_t scan_url(const char * c); +size_t scan_value(const char * c); + +#endif diff --git a/src/scanners.re b/src/scanners.re new file mode 100644 index 0000000..b0e7b23 --- /dev/null +++ b/src/scanners.re @@ -0,0 +1,330 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file scanners.c + + @brief After text has been tokenized, there are still some constructs that are best + interpreted using regular expressions. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include + +#include "scanners.h" + +/*!re2c + + re2c:define:YYCTYPE = "char"; + re2c:define:YYCURSOR = c; + re2c:define:YYMARKER = marker; + re2c:define:YYCTXMARKER = marker; + re2c:yyfill:enable = 0; + + nl = ( '\n' | '\r' '\n'?); + sp = [ \t]*; + spnl = sp (nl sp)?; + non_indent = ' '{0,3}; + + email = 'mailto:'? [-A-Za-z0-9+_./!%~$]+ '@' [^ \t\n\r\x00>]+; + + url = [A-Za-z\-]+ '://' [^ \t\n\r\x00>]+; + + name = [A-Za-z_:] [A-Za-z0-9_.:-]*; + quoted_d = '"' [^"\n\r\x00]* '"'; + quoted_s = "'" [^'\n\r\x00]* "'"; + quoted_p = "(" [^)\n\r\x00]* ")"; + unquoted = [\.A-Za-z0-9]+; + + value = (quoted_d | quoted_s | unquoted); + attr = spnl name '=' sp value; + attributes = (attr)+; + title = (quoted_d | quoted_s | quoted_p); + + label = [^\]\n\r\x00]+; + finish_line = [^\n\r\x00]+; + + ref_citation = non_indent '[#' label ']' ':' finish_line; + + ref_foot = non_indent '[^' label ']' ':' finish_line; + + ref_link = non_indent '[' label ']' ':' finish_line; + + destination = ('<' [^ \t\n\r\x00>]* '>') | [^ \t\n\r\x00]+; + + ref_link_no_attributes = non_indent '[' label ']' ':' spnl destination sp (nl | (nl? (title) sp) nl); + + tag_name = [A-Za-z] [A-Za-z0-9\-]*; + + tag_start = '<' tag_name attributes? sp '>'; + + tag_empty = '<' tag_name attributes? sp '/>'; + + tag_end = ''; + + // We limit comments to exclude '>' character to minimize backtracking + comment = [^>\-\x00] [^>\x00]*; + + tag_comment = ''; + + html = tag_start | tag_empty | tag_end | tag_comment; + + html_line = html sp nl; + +// http://www.cs.sfu.ca/CourseCentral/165/sbrown1/wdgxhtml10/block.html +// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements + + block_tag = 'address' | 'article' | 'aside' | 'blockquote' | 'canvas' | 'center' | 'dd' | + 'dir' | 'div' | 'dl' | 'dt' | 'fieldset' | 'figcaption' | 'figure' | + 'footer' | 'form' | 'frameset' | 'h1' | 'h2' | 'h3' | 'h4' | 'h5' | 'h6' | + 'header' | 'hgroup' | 'hr' | 'isindex' | 'li' | 'main' | 'menu' | 'nav' | + 'noframes' | 'noscript' | 'ol' | 'output' | 'p' | 'pre' | 'section' | + 'table' | 'tbody' | 'td' | 'tfoot' | 'th' | 'thead' | 'tr' | 'ul' | 'video'; + + html_block = '<' '/'? block_tag attributes? '/'? '>'; + + fence_start = non_indent [`~]{3,} [^`'\n\r\x00] nl; + + fence_end = non_indent [`~]{3,} sp nl; +*/ + + +size_t scan_spnl(const char * c) { + const char * start = c; + +/*!re2c + spnl { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_key(const char * c) { + const char * start = c; + +/*!re2c + name { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_value(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + value { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_attr(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + attr { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_attributes(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + attributes { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_email(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + email { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_url(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + email { return (size_t)( c - start ); } + url { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_ref_citation(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + ref_citation { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + +size_t scan_ref_foot(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + ref_foot { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_ref_link_no_attributes(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + ref_link_no_attributes { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_ref_link(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + ref_link { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_html(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + html { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_html_block(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + html_block { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_html_line(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + html_line { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_fence_start(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + fence_start { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +size_t scan_fence_end(const char * c) { + const char * marker = NULL; + const char * start = c; + +/*!re2c + fence_end { return (size_t)( c - start ); } + .? { return 0; } +*/ +} + + +#ifdef TEST +void Test_scan_url(CuTest* tc) { + int url_len; + + url_len = (int) scan_url("mailto:foo@bar.com"); + CuAssertIntEquals(tc, 18, url_len); + url_len = (int) scan_email("mailto:foo@bar.com"); + CuAssertIntEquals(tc, 18, url_len); + + url_len = (int) scan_url("http://test.com/"); + CuAssertIntEquals(tc, 16, url_len); + url_len = (int) scan_email("mailto:foo@bar.com"); + CuAssertIntEquals(tc, 0, url_len); + + url_len = (int) scan_url("foo@bar.com "); + CuAssertIntEquals(tc, 12, url_len); + url_len = (int) scan_email("mailto:foo@bar.com"); + CuAssertIntEquals(tc, 12, url_len); +} +#endif + diff --git a/src/stack.c b/src/stack.c new file mode 100644 index 0000000..5b1864f --- /dev/null +++ b/src/stack.c @@ -0,0 +1,132 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file stack.c + + @brief Create a dynamic array that stores pointers in a LIFO order. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include +#include + +#include "stack.h" + +#define kStackStartingSize 64 + + +/// Create a new stack with dynamic storage with an +/// initial capacity (0 to use default capacity) +stack * stack_new(int startingSize) { + stack * s = malloc(sizeof(stack)); + + if (s) { + if (startingSize <= 0) + startingSize = kStackStartingSize; + + s->element = malloc(sizeof(void *) * startingSize); + + if (!s->element) { + free(s); + return NULL; + } + + s->size = 0; + s->capacity = startingSize; + } + + return s; +} + + +/// Free the stack +void stack_free(stack * s) { + free(s->element); + free(s); +} + + +/// Add a new pointer to the stack +void stack_push(stack * s, void * element) { + if (s->size == s->capacity) { + s->capacity *= 2; + s->element = realloc(s->element, s->capacity * sizeof(void *)); + } + + s->element[s->size++] = element; +} + + +/// Pop the top item off the stack +void * stack_pop(stack * s) { + void * last = stack_peek(s); + + if (s->size != 0) + s->size--; + + return last; +} + + +/// Peek at the top item on the stack +void * stack_peek(stack * s) { + if (s->size == 0) + return NULL; + + return s->element[(s->size) - 1]; +} + + +/// Peek at a specific index in the stack +void * stack_peek_index(stack * s, size_t index) { + if (index >= s->size) + return NULL; + + return s->element[index]; +} + diff --git a/src/stack.h b/src/stack.h new file mode 100644 index 0000000..5248a59 --- /dev/null +++ b/src/stack.h @@ -0,0 +1,110 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file stack.h + + @brief Create a dynamic array that stores pointers in a LIFO order. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +#ifndef STACK_SMART_STRING_H +#define STACK_SMART_STRING_H + +#include + +/// Structure for a stack +struct stack { + size_t size; //!< Number of objects currently in stack + size_t capacity; //!< Total current capacity for stack + void ** element; //!< Array of pointers to objects in stack +}; + +typedef struct stack stack; + + +/// Create a new stack with dynamic storage with an +/// initial capacity (0 to use default capacity) +stack * stack_new( + int startingSize //!< Default capacity for stack +); + + +/// Free the stack +void stack_free( + stack * s //!< Stack to be freed +); + + +/// Add a new pointer to the stack +void stack_push( + stack * s, //!< Stack to use + void * element //!< Pointer to push onto stack +); + + +/// Pop the top pointer off the stack and return it +void * stack_pop( + stack * s //!< Stack to examine +); + + +/// Peek at the top pointer on the stack (but don't remove it from stack) +void * stack_peek( + stack * s //!< Stack to examine +); + + +/// Peek at a specific index in the stack +void * stack_peek_index( + stack * s, //!< Stack to examine + size_t index //!< Index to peek at (0 is first pointer on stack) +); + + +#endif diff --git a/src/token.c b/src/token.c new file mode 100644 index 0000000..17cd427 --- /dev/null +++ b/src/token.c @@ -0,0 +1,619 @@ +/** + + Parser-Template -- Boilerplate parser example using re2c lexer and lemon parser. + + @file token.c + + @brief Structure and functions to manage tokens representing portions of a + text string. + + + @author Fletcher T. Penney + + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include +#include +#include +#include + +#include "char.h" +#include "token.h" + + +#ifdef kUseObjectPool +//!< Use an object pool to allocate tokens more efficiently to improve +//!< performance. + +#include "object_pool.h" + +static pool * token_pool = NULL; + +/// Intialize object pool for token allocation +void token_pool_init(void) { + if (token_pool == NULL) { + // No pool exists + token_pool = pool_new(sizeof(token)); + } else { + // Pool exists, ensure it's drained + // NOTE: This invalidates any tokens currently in use. + token_pool_drain(); + } +} + + +/// Drain token allocator pool to prepare for another parse +void token_pool_drain(void) { + pool_drain(token_pool); +} + + +/// Free token allocator pool +void token_pool_free(void) { + pool_free(token_pool); + token_pool = NULL; +} + +#endif + + +/// Get pointer to a new token +token * token_new(unsigned short type, size_t start, size_t len) { + + +#ifdef kUseObjectPool + token * t = pool_allocate_object(token_pool); +#else + //token * t = calloc(1, sizeof(token)); + token * t = malloc(sizeof(token)); +#endif + + if (t) { + t->type = type; + t->start = start; + t->len = len; + + t->next = NULL; + t->prev = NULL; + t->child = NULL; + + t->tail = t; + + t->can_open = true; //!< Default to true -- we assume openers can open and closers can close + t->can_close = true; //!< unless specified otherwise (e.g. for ambidextrous tokens) + t->unmatched = true; + + t->mate = NULL; + } + + return t; +} + + +/// Create a parent for a chain of tokens +token * token_new_parent(token * child, unsigned short type) { + token * t = token_new(type, child->start, 0); + t->child = child; + child->prev = NULL; + + // Ensure that parent length correctly includes children + if (child == NULL) { + t->len = 0; + } else if (child->next == NULL) { + t->len = child->len; + } else { + while (child->next != NULL) + child = child->next; + + t->len = child->start + child->len - t->start; + } + + return t; +} + + +/// Add a new token to the end of a token chain. The new token +/// may or may not also be the start of a chain +void token_chain_append(token * chain_start, token * t) { + if ((chain_start == NULL) || + (t == NULL)) + return; + + // Append t + chain_start->tail->next = t; + t->prev = chain_start->tail; + + // Adjust tail marker + chain_start->tail = t->tail; +} + + +/// Add a new token to the end of a parent's child +/// token chain. The new token may or may not be +/// the start of a chain. +void token_append_child(token * parent, token * t) { + if ((parent == NULL) || (t == NULL)) + return; + + if (parent->child == NULL) { + // Parent has no children + parent->child = t; + } else { + // Append to to existing child chain + token_chain_append(parent->child, t); + } + + // Set len on parent + parent->len = parent->child->tail->start + parent->child->tail->len - parent->start; +} + + +/// Remove the first child of a token +void token_remove_first_child(token * parent) { + if ((parent == NULL) || (parent->child == NULL)) + return; + + token * t = parent->child; + parent->child = t->next; + + if (parent->child) { + parent->child->prev = NULL; + parent->child->tail = t->tail; + } + + token_free(t); +} + + +/// Remove the last child of a token +void token_remove_last_child(token * parent) { + if ((parent == NULL) || (parent->child == NULL)) + return; + + token * t = parent->child->tail; + + if (t->prev) { + t->prev->next = NULL; + parent->child->tail = t->prev; + } + + token_free(t); +} + + +/// Remove the last token in a chain +void token_remove_tail(token * head) { + if ((head == NULL) || (head->tail == head)) + return; + + token * t = head->tail; + + if (t->prev) { + t->prev->next = NULL; + head->tail = t->prev; + } + + token_free(t); +} + + +/// Pop token out of it's chain, connecting head and tail of chain back together. +/// Token must be freed if it is no longer needed. +/// \todo: If t is the tail token of a chain, the tail is no longer correct on the start of chain. +void token_pop_link_from_chain(token * t) { + if (t == NULL) + return; + + token * prev = t->prev; + token * next = t->next; + + t->next = NULL; + t->prev = NULL; + t->tail = t; + + if (prev) { + prev->next = next; + } + + if (next) { + next->prev = prev; + } +} + + +/// Remove one or more tokens from chain +void tokens_prune(token * first, token * last) { + if (first == NULL || last == NULL) + return; + + token * prev = first->prev; + token * next = last->next; + + if (prev != NULL) + prev->next = next; + + if (next != NULL) + next->prev = prev; + + first->prev = NULL; + last->next = NULL; + + token_tree_free(first); +} + + +/// Given a start/stop point in token chain, create a new container token. +/// Return pointer to new container token. +token * token_prune_graft(token * first, token * last, unsigned short container_type) { + if (first == NULL || last == NULL) + return first; + + token * prev = first->prev; + token * next = last->next; + + // If we are head of chain, remember tail + token * tail = NULL; + if (prev == NULL) + tail = first->tail; + + + token * container = token_new(container_type, first->start, last->start + last->len - first->start); + + container->child = first; + container->next = next; + container->prev = prev; + container->can_close = 0; + container->can_open = 0; + + if (tail) + container->tail = tail; + + if (prev) + prev->next = container; + + first->prev = NULL; + + last->next = NULL; + + if (next) + next->prev = container; + + return container; +} + + +/// Free token +void token_free(token * t) { +#ifdef kUseObjectPool + return; +#else + if (t == NULL) + return; + + token_tree_free(t->child); + + free(t); +#endif +} + + +/// Free token chain +void token_tree_free(token * t) { +#ifdef kUseObjectPool + return; +#else + token * n; + + while (t != NULL) { + n = t->next; + token_free(t); + + t = n; + } +#endif +} + + +/// Forward declaration +void print_token_tree(token * t, unsigned short depth, const char * string); + + +/// Print contents of the token based on specified string +void print_token(token * t, unsigned short depth, const char * string) { + if (t != NULL) { + for (int i = 0; i < depth; ++i) + { + fprintf(stderr, "\t"); + } + if (string == NULL) { + fprintf(stderr, "* (%d) %lu:%lu\n", t->type, t->start, t->len); + } else { + fprintf(stderr, "* (%d) %lu:%lu\t'%.*s'\n", t->type, t->start, t->len, (int)t->len, &string[t->start]); + } + + if (t->child != NULL) + print_token_tree(t->child, depth + 1, string); + } +} + + +/// Print contents of the token tree based on specified string +void print_token_tree(token * t, unsigned short depth, const char * string) { + while (t != NULL) { + print_token(t, depth, string); + + t = t->next; + } +} + + +/// Print a description of the token based on specified string +void token_describe(token * t, const char * string) { + print_token(t, 0, string); +} + + +/// Print a description of the token tree based on specified string +void token_tree_describe(token * t, const char * string) { + fprintf(stderr, "=====>\n"); + while (t != NULL) { + print_token(t, 0, string); + + t = t->next; + } + fprintf(stderr, "<=====\n"); +} + + +/// Find the child node of a given parent that contains the specified +/// offset position. +token * token_child_for_offset( + token * parent, //!< Pointer to parent token + size_t offset //!< Search position +) { + if (parent == NULL) + return NULL; + + if ((parent->start > offset) || + (parent->start + parent->len < offset)) + return NULL; + + token * walker = parent->child; + + while (walker != NULL) { + if (walker->start <= offset) { + if (walker->start + walker->len > offset) { + return walker; + } + } + if (walker->start > offset) + return NULL; + + walker = walker->next; + } + + return NULL; +} + + +/// Given two character ranges, see if they intersect (touching doesn't count) +static bool ranges_intersect(size_t start1, size_t len1, size_t start2, size_t len2) { + return ((start1 < start2 + len2) && (start2 < start1 + len1)) ? true : false; +} + +/// Find first child node of a given parent that intersects the specified +/// offset range. +token * token_first_child_in_range( + token * parent, //!< Pointer to parent token + size_t start, //!< Start search position + size_t len //!< Search length +) { + if (parent == NULL) + return NULL; + + if ((parent->start > start + len) || + (parent->start + parent->len < start)) + return NULL; + + token * walker = parent->child; + + while (walker != NULL) { + if (ranges_intersect(start, len, walker->start, walker->len)) + return walker; + + if (walker->start > start) + return NULL; + + walker = walker->next; + } + + return NULL; +} + + +/// Find last child node of a given parent that intersects the specified +/// offset range. +token * token_last_child_in_range( + token * parent, //!< Pointer to parent token + size_t start, //!< Start search position + size_t len //!< Search length +) { + if (parent == NULL) + return NULL; + + if ((parent->start > start + len) || + (parent->start + parent->len < start)) + return NULL; + + token * walker = parent->child; + token * last = NULL; + + while (walker != NULL) { + if (ranges_intersect(start, len, walker->start, walker->len)) + last = walker; + + if (walker->start > start + len) + return last; + + walker = walker->next; + } + + return last; +} + + +void token_trim_leading_whitespace(token * t, const char * string) { + while (t->len && char_is_whitespace(string[t->start])) { + t->start++; + t->len--; + } +} + + +void token_trim_trailing_whitespace(token * t, const char * string) { + while (t->len && char_is_whitespace(string[t->start + t->len - 1])) { + t->len--; + } +} + + +void token_trim_whitespace(token * t, const char * string) { + token_trim_leading_whitespace(t, string); + token_trim_trailing_whitespace(t, string); +} + + +/// Check whether first token in the chain matches the given type. +/// If so, return and advance the chain. +token * token_chain_accept(token ** t, short type) { + token * result = NULL; + + if (t && *t && ((*t)->type == type)) { + result = *t; + *t = (*t)->next; + } + + return result; +} + + +/// Allow checking for multiple token types +token * token_chain_accept_multiple(token ** t, int n, ...) { + token * result = NULL; + va_list valist; + + va_start(valist, n); + + for (int i = 0; i < n; ++i) + { + result = token_chain_accept(t, va_arg(valist, int)); + if (result) + break; + } + + va_end(valist); + + return result; +} + + +void token_skip_until_type(token ** t, short type) { + while ((*t) && ((*t)->type != type)) + *t = (*t)->next; +} + + +/// Allow checking for multiple token types +void token_skip_until_type_multiple(token ** t, int n, ...) { + va_list valist; + int type[n]; + + va_start(valist, n); + + // Load target types + for (int i = 0; i < n; ++i) + { + type[i] = va_arg(valist, int); + } + + // + while (*t) { + for (int i = 0; i < n; ++i) + { + if ((*t)->type == type[i]) + return; + } + + *t = (*t)->next; + } + + va_end(valist); +} + + +void token_split_on_char(token * t, const char * source, const char c) { + if (!t) + return; + + size_t start = t->start; + size_t pos = 0; + size_t stop = t->len; + token * new = NULL; + + while (pos + 1 < stop) { + if (source[start + pos] == c){ + new = token_new(t->type, start + pos + 1, stop - (pos + 1)); + new->next = t->next; + t->next = new; + + t->len = pos; + + t = t->next; + } + + pos++; + } +} + diff --git a/src/token.h b/src/token.h new file mode 100644 index 0000000..080f635 --- /dev/null +++ b/src/token.h @@ -0,0 +1,223 @@ +/** + + Parser-Template -- Boilerplate parser example using re2c lexer and lemon parser. + + @file token.h + + @brief Structure and functions to manage tokens representing portions of a + text string. + + + @author Fletcher T. Penney + + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#ifndef TOKEN_PARSER_TEMPLATE_H +#define TOKEN_PARSER_TEMPLATE_H + + +#define kUseObjectPool 1 //!< Use an object pool to allocate tokens to improve + //!< performance in memory allocation. Frees all + //!< tokens at once, however, at end of parsing. + + +#ifdef kUseObjectPool +void token_pool_init(void); //!< Initialize object pool for allocating tokens +void token_pool_drain(void); //!< Drain pool to free memory when parse complete +void token_pool_free(void); //!< Free the token object pool +#endif + + +/// Definition for token node struct. This can be used to match an +/// abstract syntax tree with the appropriate spans in the original +/// source string. +struct token { + unsigned short type; //!< Type for the token + short can_open; //!< Can token open a matched pair? + short can_close; //!< Can token close a matched pair? + short unmatched; //!< Has token been matched yet? + + size_t start; //!< Starting offset in the source string + size_t len; //!< Length of the token in the source string + + struct token * next; //!< Pointer to next token in the chain + struct token * prev; //!< Pointer to previous marker in the chain + struct token * child; //!< Pointer to child chain + + struct token * tail; //!< Pointer to last token in the chain + + struct token * mate; //!< Pointer to other token in matched pair +}; + +typedef struct token token; + + +/// Get pointer to a new token +token * token_new( + unsigned short type, //!< Type for new token + size_t start, //!< Starting offset for token + size_t len //!< Len of token +); + +/// Create a parent for a chain of tokens +token * token_new_parent( + token * child, //!< Pointer to child token chain + unsigned short type //!< Type for new token +); + +/// Add a new token to the end of a token chain. The new token +/// may or may not also be the start of a chain +void token_chain_append( + token * chain_start, //!< Pointer to start of token chain + token * t //!< Pointer to token to append +); + +/// Add a new token to the end of a parent's child +/// token chain. The new token may or may not be +/// the start of a chain. +void token_append_child( + token * parent, //!< Pointer to parent node + token * t //!< Pointer to token to append +); + +/// Remove the first child of a token +void token_remove_first_child( + token * parent //!< Pointer to parent node +); + +/// Remove the last child of a token +void token_remove_last_child( + token * parent //!< Pointer to parent node +); + +/// Remove the last token in a chain +void token_remove_tail(token * head); + +/// Pop token out of it's chain, connecting head and tail of chain back together. +/// Token must be freed if it is no longer needed. +void token_pop_link_from_chain( + token * t //!< Pointer to token to remove +); + +/// Remove one or more tokens from chain +void tokens_prune( + token * first, //!< Pointer to first node to be removed + token * last //!< Pointer to last node to be removed +); + +/// Given a start/stop point in token chain, create a new parent token. +/// Reinsert the new parent in place of the removed segment. +/// Return pointer to new container token. +token * token_prune_graft( + token * first, //!< Pointer to first node to be removed + token * last, //!< Pointer to last node to be removed + unsigned short container_type //!< Type for new parent node for removed section +); + +/// Free token +void token_free( + token * t //!< Pointer to token to be freed +); + +/// Free token tree +void token_tree_free( + token * t //!< Pointer to token to be freed +); + +/// Print a description of the token based on specified string +void token_describe( + token * t, //!< Pointer to token to described + const char * string //!< Source string +); + +/// Print a description of the token tree based on specified string +void token_tree_describe( + token * t, //!< Pointer to token to described + const char * string //!< Source string +); + +/// Find the child node of a given parent that contains the specified +/// offset position. +token * token_child_for_offset( + token * parent, //!< Pointer to parent token + size_t offset //!< Search position +); + +/// Find first child node of a given parent that intersects the specified +/// offset range. +token * token_first_child_in_range( + token * parent, //!< Pointer to parent token + size_t start, //!< Start search position + size_t len //!< Search length +); + +/// Find last child node of a given parent that intersects the specified +/// offset range. +token * token_last_child_in_range( + token * parent, //!< Pointer to parent token + size_t start, //!< Start search position + size_t len //!< Search length +); + +void token_trim_leading_whitespace(token * t, const char * string); + +void token_trim_trailing_whitespace(token * t, const char * string); + +void token_trim_whitespace(token * t, const char * string); + + +/// +token * token_chain_accept(token ** t, short type); + +token * token_chain_accept_multiple(token ** t, int n, ...); + +void token_skip_until_type(token ** t, short type); + +void token_skip_until_type_multiple(token ** t, int n, ...); + +void token_split_on_char(token * t, const char * source, const char c); + +#endif + diff --git a/src/token_pairs.c b/src/token_pairs.c new file mode 100644 index 0000000..29dd3da --- /dev/null +++ b/src/token_pairs.c @@ -0,0 +1,230 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file token_pairs.c + + @brief Allow for pairing certain tokens together (e.g. '[' and ']') to create + more meaningful token trees. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include +#include +#include +#include + +#include "stack.h" +#include "token.h" +#include "token_pairs.h" + + +/// Create a new token pair engine +token_pair_engine * token_pair_engine_new(void) { + token_pair_engine * e = malloc(sizeof(token_pair_engine)); + + if (e) { + unsigned short empty[kMaxTokenTypes] = {0}; + unsigned short empty2[kMaxTokenTypes][kMaxTokenTypes] = {{0}}; + + memcpy(e->can_open_pair, empty, sizeof(unsigned short) * kMaxTokenTypes); + memcpy(e->can_close_pair, empty, sizeof(unsigned short) * kMaxTokenTypes); + + memcpy(e->pair_type, empty2, sizeof(unsigned short) * kMaxTokenTypes * kMaxTokenTypes); + + memcpy(e->empty_allowed, empty, sizeof(unsigned short) * kMaxTokenTypes); + memcpy(e->match_len, empty, sizeof(unsigned short) * kMaxTokenTypes); + memcpy(e->should_prune, empty, sizeof(unsigned short) * kMaxTokenTypes); + } + + return e; +} + + +/// Free existing token pair engine +void token_pair_engine_free(token_pair_engine * e) { + if (e == NULL) + return; + + free(e); +} + + +/// Add a new pairing configuration to a token pair engine +void token_pair_engine_add_pairing(token_pair_engine * e, unsigned short open_type, unsigned short close_type, + unsigned short pair_type, int options) { + // \todo: This needs to be more sophisticated + e->can_open_pair[open_type] = 1; + e->can_close_pair[close_type] = 1; + (e->pair_type)[open_type][close_type] = pair_type; + + if (options & PAIRING_ALLOW_EMPTY) + e->empty_allowed[pair_type] = true; + + if (options & PAIRING_MATCH_LENGTH) + e->match_len[pair_type] = true; + + if (options & PAIRING_PRUNE_MATCH) + e->should_prune[pair_type] = true; + +} + + +/// Mate opener and closer together +void token_pair_mate(token * a, token * b) { + if (a == NULL | b == NULL) + return; + + a->mate = b; + a->unmatched = false; + + b->mate = a; + b->unmatched = false; +} + + +/// Search a token's childen for matching pairs +void token_pairs_match_pairs_inside_token(token * parent, token_pair_engine * e, stack * s) { +// if ((parent == NULL) || +// (parent->child == NULL) || +// (e == NULL)) { +// return; +// } + + // Walk the child chain + token * walker = parent->child; + + // Counter + size_t start_counter = s->size; + size_t i = start_counter; // We're sharing one stack, so any opener earlier than this belongs to a parent + + token * peek; + unsigned short pair_type; + + while (walker != NULL) { + + if (walker->child) { + token_pairs_match_pairs_inside_token(walker, e, s); + } + + // Is this a closer? + if (e->can_close_pair[walker->type] && walker->can_close && walker->unmatched ) { + // Do we have a valid opener in the stack? + i = s->size; + + while (i > start_counter) { + peek = stack_peek_index(s, i - 1); + + pair_type = e->pair_type[peek->type][walker->type]; + + if (pair_type) { + if (!e->empty_allowed[pair_type]) { + // Make sure they aren't consecutive tokens + if ((peek->next == walker) && + (peek->start + peek->len == walker->start)) { + // i--; + i = start_counter; // In this situation, we can't use this token as a closer + continue; + } + } + + if (e->match_len[pair_type]) { + // Lengths must match + if (peek->len != walker->len) { + i--; + continue; + } + } + + token_pair_mate(peek, walker); + + // Clear portion of stack between opener and closer as they are now unavailable for mating + s->size = i - 1; +#ifndef NDEBUG + fprintf(stderr, "stack now sized %lu\n", s->size); +#endif + // Prune matched section + + if (e->should_prune[pair_type]) { + if (peek->prev == NULL) { + walker = token_prune_graft(peek, walker, e->pair_type[peek->type][walker->type]); + parent->child = walker; + } else { + walker = token_prune_graft(peek, walker, e->pair_type[peek->type][walker->type]); + } + } + + break; + } +#ifndef NDEBUG + else { + fprintf(stderr, "token type %d failed to match stack element\n", walker->type); + } +#endif + i--; + } + } + + // Is this an opener? + // \todo: Need to verify that token->type is a valid opening token for some pairing + if (e->can_open_pair[walker->type] && walker->can_open && walker->unmatched) { + stack_push(s, walker); +#ifndef NDEBUG + fprintf(stderr, "push token type %d to stack (%lu elements)\n", walker->type, s->size); +#endif + } + + walker = walker->next; + } + +#ifndef NDEBUG + fprintf(stderr, "token stack has %lu elements (of %lu)\n", s->size, s->capacity); +#endif + + // Remove unused tokens from stack and return to parent + s->size = start_counter; +} diff --git a/src/token_pairs.h b/src/token_pairs.h new file mode 100644 index 0000000..07f6b14 --- /dev/null +++ b/src/token_pairs.h @@ -0,0 +1,119 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file token_pairs.h + + @brief Allow for pairing certain tokens together (e.g. '[' and ']') to create + more meaningful token trees. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +#ifndef TOKEN_PAIRS_MULTIMARKDOWN_H +#define TOKEN_PAIRS_MULTIMARKDOWN_H + +#include "stack.h" +#include "token.h" + + +#ifdef TEST +#include "CuTest.h" +#endif + +#define kMaxTokenTypes 200 // This needs to be larger than the largest token type being used + +/// Store information about which tokens can be paired, and what actions to take when +/// pairing them. +struct token_pair_engine { + unsigned short can_open_pair[kMaxTokenTypes]; //!< Can token type open a pair? + unsigned short can_close_pair[kMaxTokenTypes]; //!< Can token type close a pair? + + unsigned short pair_type[kMaxTokenTypes][kMaxTokenTypes]; //!< Which pair are we forming? + + unsigned short empty_allowed[kMaxTokenTypes]; //!< Is this pair type allowed to be empty? + unsigned short match_len[kMaxTokenTypes]; //!< Does this pair type require matched lengths of openers/closers? + unsigned short should_prune[kMaxTokenTypes]; //!< Does this pair type need to be pruned to a child token chain? +}; + +typedef struct token_pair_engine token_pair_engine; + + +/// Flags for token pair options +enum pairings_options { + PAIRING_ALLOW_EMPTY = 1 << 0, //!< Allow consecutive tokens to match with each other + PAIRING_MATCH_LENGTH = 1 << 1, //!< Require that opening/closing tokens be same length + PAIRING_PRUNE_MATCH = 1 << 2, //!< Move the matched sub-chain into a child chain +}; + + +/// Create a new token pair engine +token_pair_engine * token_pair_engine_new(void); + +/// Free existing token pair engine +void token_pair_engine_free( + token_pair_engine * e //!< Token pair engine to be freed +); + +/// Add a new pairing configuration to a token pair engine +void token_pair_engine_add_pairing( + token_pair_engine * e, //!< Token pair engine to add to + unsigned short open_type, //!< Token type for opener + unsigned short close_type, //!< Token type for closer + unsigned short pair_type, //!< Token type for pairing + int options //!< Token pair options to use +); + +/// Search a token's childen for matching pairs +void token_pairs_match_pairs_inside_token( + token * parent, //!< Which tokens should we search for pairs + token_pair_engine * e, //!< Token pair engine to be used for matching + stack * s //!< Pointer to a stack to use for pairing tokens +); + + +#endif diff --git a/src/uthash.h b/src/uthash.h new file mode 100644 index 0000000..45d1f9f --- /dev/null +++ b/src/uthash.h @@ -0,0 +1,1074 @@ +/* +Copyright (c) 2003-2016, Troy D. Hanson http://troydhanson.github.com/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#define UTHASH_VERSION 2.0.1 + +#include /* memcmp,strlen */ +#include /* ptrdiff_t */ +#include /* exit() */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__) +#define NO_DECLTYPE +#define DECLTYPE(x) +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while (0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while (0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ +#if defined(_WIN32) +#if defined(_MSC_VER) && _MSC_VER >= 1600 +#include +#elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__) +#include +#else +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#endif +#elif defined(__GNUC__) && !defined(__VXWORKS__) +#include +#else +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#endif + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#endif +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ +#endif +#ifndef uthash_strlen +#define uthash_strlen(s) strlen(s) +#endif +#ifndef uthash_memcmp +#define uthash_memcmp(a,b,n) memcmp(a,b,n) +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhp */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) +/* calculate the hash handle from element address elp */ +#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle *)(((char*)(elp)) + ((tbl)->hho))) + +#define HASH_VALUE(keyptr,keylen,hashv) \ +do { \ + HASH_FCN(keyptr, keylen, hashv); \ +} while (0) + +#define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out) \ +do { \ + (out) = NULL; \ + if (head) { \ + unsigned _hf_bkt; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \ + } \ + } \ +} while (0) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + unsigned _hf_hashv; \ + HASH_VALUE(keyptr, keylen, _hf_hashv); \ + HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) +#define HASH_BLOOM_MAKE(tbl) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ +} while (0) + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0) + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#define HASH_BLOOM_BYTELEN 0U +#endif + +#define HASH_MAKE_TABLE(hh,head) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ + sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl->buckets, 0, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ +} while (0) + +#define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \ +do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ +} while (0) + +#define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \ +do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ +} while (0) + +#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ +do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \ +} while (0) + +#define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn) \ +do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \ +} while (0) + +#define HASH_APPEND_LIST(hh, head, add) \ +do { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail->next = (add); \ + (head)->hh.tbl->tail = &((add)->hh); \ +} while (0) + +#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char*) (keyptr); \ + (add)->hh.keylen = (unsigned) (keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + (head) = (add); \ + HASH_MAKE_TABLE(hh, head); \ + } else { \ + struct UT_hash_handle *_hs_iter = &(head)->hh; \ + (add)->hh.tbl = (head)->hh.tbl; \ + do { \ + if (cmpfcn(DECLTYPE(head) ELMT_FROM_HH((head)->hh.tbl, _hs_iter), add) > 0) \ + break; \ + } while ((_hs_iter = _hs_iter->next)); \ + if (_hs_iter) { \ + (add)->hh.next = _hs_iter; \ + if (((add)->hh.prev = _hs_iter->prev)) { \ + HH_FROM_ELMT((head)->hh.tbl, _hs_iter->prev)->next = (add); \ + } else { \ + (head) = (add); \ + } \ + _hs_iter->prev = (add); \ + } else { \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + } \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + HASH_FSCK(hh, head); \ +} while (0) + +#define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn) \ +do { \ + unsigned _hs_hashv; \ + HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \ +} while (0) + +#define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn) + +#define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn) \ + HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn) + +#define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char*) (keyptr); \ + (add)->hh.keylen = (unsigned) (keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + (head) = (add); \ + HASH_MAKE_TABLE(hh, head); \ + } else { \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + HASH_FSCK(hh, head); \ +} while (0) + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_hashv; \ + HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add); \ +} while (0) + +#define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add) \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) + +#define HASH_TO_BKT(hashv,num_bkts,bkt) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1U)); \ +} while (0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ +do { \ + struct UT_hash_handle *_hd_hh_del; \ + if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + unsigned _hd_bkt; \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = \ + (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev != NULL) { \ + ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ + } \ + if (_hd_hh_del->next != NULL) { \ + ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ + (head)->hh.tbl->hho))->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh,head); \ +} while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ + HASH_FIND(hh,head,findstr,(unsigned)uthash_strlen(findstr),out) +#define HASH_ADD_STR(head,strfield,add) \ + HASH_ADD(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add) +#define HASH_REPLACE_STR(head,strfield,add,replaced) \ + HASH_REPLACE(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add,replaced) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_REPLACE_INT(head,intfield,add,replaced) \ + HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ + HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head) \ +do { \ + struct UT_hash_handle *_thh; \ + if (head) { \ + unsigned _bkt_i; \ + unsigned _count; \ + char *_prev; \ + _count = 0; \ + for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + unsigned _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", \ + _thh->hh_prev, _prev ); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %u, actual %u\n", \ + (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev !=(char*)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", \ + _thh->prev, _prev ); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ + (head)->hh.tbl->hho) : NULL ); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ +#define HASH_BER(key,keylen,hashv) \ +do { \ + unsigned _hb_keylen=(unsigned)keylen; \ + const unsigned char *_hb_key=(const unsigned char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen-- != 0U) { \ + (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ + } \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key,keylen,hashv) \ +do { \ + unsigned _sx_i; \ + const unsigned char *_hs_key=(const unsigned char*)(key); \ + hashv = 0; \ + for(_sx_i=0; _sx_i < keylen; _sx_i++) { \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + } \ +} while (0) +/* FNV-1a variation */ +#define HASH_FNV(key,keylen,hashv) \ +do { \ + unsigned _fn_i; \ + const unsigned char *_hf_key=(const unsigned char*)(key); \ + hashv = 2166136261U; \ + for(_fn_i=0; _fn_i < keylen; _fn_i++) { \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619U; \ + } \ +} while (0) + +#define HASH_OAT(key,keylen,hashv) \ +do { \ + unsigned _ho_i; \ + const unsigned char *_ho_key=(const unsigned char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ +} while (0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,hashv) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + unsigned const char *_hj_key=(unsigned const char*)(key); \ + hashv = 0xfeedbeefu; \ + _hj_i = _hj_j = 0x9e3779b9u; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12U) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12U; \ + } \ + hashv += (unsigned)(keylen); \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ + case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ + case 1: _hj_i += _hj_key[0]; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ +} while (0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,hashv) \ +do { \ + unsigned const char *_sfh_key=(unsigned const char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ + \ + unsigned _sfh_rem = _sfh_len & 3U; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabeu; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0U; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2U*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ +} while (0) + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * MurmurHash uses the faster approach only on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) +#define MUR_GETBLOCK(p,i) p[i] +#else /* non intel */ +#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) +#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) +#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) +#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) +#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) +#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) +#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) +#else /* assume little endian non-intel */ +#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) +#endif +#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ + (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ + (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ + MUR_ONE_THREE(p)))) +#endif +#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +#define MUR_FMIX(_h) \ +do { \ + _h ^= _h >> 16; \ + _h *= 0x85ebca6bu; \ + _h ^= _h >> 13; \ + _h *= 0xc2b2ae35u; \ + _h ^= _h >> 16; \ +} while (0) + +#define HASH_MUR(key,keylen,hashv) \ +do { \ + const uint8_t *_mur_data = (const uint8_t*)(key); \ + const int _mur_nblocks = (int)(keylen) / 4; \ + uint32_t _mur_h1 = 0xf88D5353u; \ + uint32_t _mur_c1 = 0xcc9e2d51u; \ + uint32_t _mur_c2 = 0x1b873593u; \ + uint32_t _mur_k1 = 0; \ + const uint8_t *_mur_tail; \ + const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ + int _mur_i; \ + for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) { \ + _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + \ + _mur_h1 ^= _mur_k1; \ + _mur_h1 = MUR_ROTL32(_mur_h1,13); \ + _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ + } \ + _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ + _mur_k1=0; \ + switch((keylen) & 3U) { \ + case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ + case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ + case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + _mur_h1 ^= _mur_k1; \ + } \ + _mur_h1 ^= (uint32_t)(keylen); \ + MUR_FMIX(_mur_h1); \ + hashv = _mur_h1; \ +} while (0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ +do { \ + if ((head).hh_head != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ + } else { \ + (out) = NULL; \ + } \ + while ((out) != NULL) { \ + if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) { \ + if (uthash_memcmp((out)->hh.key, keyptr, keylen_in) == 0) { \ + break; \ + } \ + } \ + if ((out)->hh.hh_next != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ + } else { \ + (out) = NULL; \ + } \ + } \ +} while (0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,addhh) \ +do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); } \ + (head).hh_head=addhh; \ + if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH)) \ + && ((addhh)->tbl->noexpand != 1U)) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ +} while (0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh,head,hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ + memset(_he_new_buckets, 0, \ + 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = \ + (tbl->num_items >> (tbl->log2_num_buckets+1U)) + \ + (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ + tbl->nonideal_items = 0; \ + for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh != NULL) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / \ + tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev = \ + _he_thh; } \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + tbl->num_buckets *= 2U; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ + (tbl->ineff_expands+1U) : 0U; \ + if (tbl->ineff_expands > 1U) { \ + tbl->noexpand=1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ +} while (0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head != NULL) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping != 0U) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p != NULL) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + if (! (_hs_q) ) { break; } \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\ + if (_hs_psize == 0U) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else if (( \ + cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ + ) <= 0) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail != NULL ) { \ + _hs_tail->next = ((_hs_e != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e != NULL) { \ + _hs_e->prev = ((_hs_tail != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail != NULL){ \ + _hs_tail->next = NULL; \ + } \ + if ( _hs_nmerges <= 1U ) { \ + _hs_looping=0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2U; \ + } \ + HASH_FSCK(hh,head); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt=NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if (src != NULL) { \ + for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh != NULL; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; } \ + if (dst == NULL) { \ + DECLTYPE_ASSIGN(dst,_elt); \ + HASH_MAKE_TABLE(hh_dst,dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst,dst); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if (head != NULL) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)=NULL; \ + } \ +} while (0) + +#define HASH_OVERHEAD(hh,head) \ + ((head != NULL) ? ( \ + (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + sizeof(UT_hash_table) + \ + (HASH_BLOOM_BYTELEN))) : 0U) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ + (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ + (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1u +#define HASH_BLOOM_SIGNATURE 0xb12220f2u + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + uint8_t bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ diff --git a/src/writer.c b/src/writer.c new file mode 100644 index 0000000..5c7e3de --- /dev/null +++ b/src/writer.c @@ -0,0 +1,1194 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file writer.c + + @brief Coordinate conversion of token tree to output formats. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + +#include +#include +#include +#include + +#include "libMultiMarkdown.h" + +#include "char.h" +#include "d_string.h" +#include "html.h" +#include "mmd.h" +#include "scanners.h" +#include "token.h" +#include "writer.h" + + +void store_citation(scratch_pad * scratch, footnote * f); + +void store_footnote(scratch_pad * scratch, footnote * f); + +void store_link(scratch_pad * scratch, link * l); + + +/// Temporary storage while exporting parse tree to output format +scratch_pad * scratch_pad_new(mmd_engine * e) { + scratch_pad * p = malloc(sizeof(scratch_pad)); + + if (p) { + p->padded = 2; // Prevent unnecessary leading space + p->list_is_tight = false; // Tight vs Loose list + p->skip_token = 0; // Skip over next n tokens + + p->link_hash = NULL; // Store defined links in a hash + + link * l; + + for (int i = 0; i < e->link_stack->size; ++i) + { + l = stack_peek_index(e->link_stack, i); + + store_link(p, l); + } + + p->used_footnotes = stack_new(0); // Store footnotes as we use them + p->inline_footnotes_to_free = stack_new(0); // Inline footnotes need to be freed + p->footnote_being_printed = 0; + p->footnote_para_counter = -1; + + p->footnote_hash = NULL; // Store defined footnotes in a hash + + footnote * f; + + for (int i = 0; i < e->footnote_stack->size; ++i) + { + f = stack_peek_index(e->footnote_stack, i); + + store_footnote(p, f); + } + + p->used_citations = stack_new(0); + p->inline_citations_to_free = stack_new(0); + p->citation_being_printed = 0; + + p->citation_hash = NULL; + + for (int i = 0; i < e->citation_stack->size; ++i) + { + f = stack_peek_index(e->citation_stack, i); + + store_citation(p, f); + } + + + + p->extensions = e->extensions; + } + + return p; +} + + +void scratch_pad_free(scratch_pad * scratch) { +// HASH_CLEAR(hh, scratch->link_hash); + + link * l, * l_tmp; + + // Free link hash + HASH_ITER(hh, scratch->link_hash, l, l_tmp) { + HASH_DEL(scratch->link_hash, l); // Remove item from hash + free(l); // "Shallow" free -- the pointers will be freed + // with the original later. + } + + fn_holder * f, * f_tmp; + + + // Free footnote hash + HASH_ITER(hh, scratch->footnote_hash, f, f_tmp) { + HASH_DEL(scratch->footnote_hash, f); // Remove item from hash + free(f); // Free the fn_holder + } + + stack_free(scratch->used_footnotes); + + while (scratch->inline_footnotes_to_free->size) { + footnote_free(stack_pop(scratch->inline_footnotes_to_free)); + } + stack_free(scratch->inline_footnotes_to_free); + + + // Free citation hash + HASH_ITER(hh, scratch->citation_hash, f, f_tmp) { + HASH_DEL(scratch->citation_hash, f); // Remove item from hash + free(f); // Free the fn_holder + } + + stack_free(scratch->used_citations); + + while (scratch->inline_citations_to_free->size) { + footnote_free(stack_pop(scratch->inline_citations_to_free)); + } + stack_free(scratch->inline_citations_to_free); + + + free(scratch); +} + + +/// Ensure at least num newlines at end of output buffer +void pad(DString * d, short num, scratch_pad * scratch) { + while (num > scratch->padded) { + d_string_append_c(d, '\n'); + scratch->padded++; + } +} + + +void print_token_raw(DString * out, const char * source, token * t) { + if (t) { + switch (t->type) { + case EMPH_START: + case EMPH_STOP: + case STRONG_START: + case STRONG_STOP: + case TEXT_EMPTY: + break; + default: + d_string_append_c_array(out, &source[t->start], t->len); + break; + } + } +} + + +void print_token_tree_raw(DString * out, const char * source, token * t) { + while (t) { + print_token_raw(out, source, t); + + t = t->next; + } +} + + +char * text_inside_pair(const char * source, token * pair) { + char * result = NULL; + + if (source && pair) { + result = strndup(&source[pair->start + pair->child->len], pair->len - (pair->child->len + 1)); + } + + return result; +} + + +char * label_from_string(const char * str) { + const char * next_char; + char * label = NULL; + + DString * out = d_string_new(""); + + while (*str != '\0') { + next_char = str; + next_char++; + + if ((*next_char & 0xC0) == 0x80) { + // Allow multibyte characters + d_string_append_c(out, *str); + + while ((*next_char & 0xC0) == 0x80) { + str++; + d_string_append_c(out, *str); + next_char++; + } + } else if ((*str >= '0' && *str <= '9') || (*str >= 'A' && *str <= 'Z') + || (*str >= 'a' && *str <= 'z') || (*str == '.') || (*str== '_') + || (*str== '-') || (*str== ':')) + { + // Allow 0-9, A-Z, a-z, ., _, -, : + d_string_append_c(out, tolower(*str)); + } + + str++; + } + + label = out->str; + d_string_free(out, false); + + return label; +} + + +char * label_from_token(const char * source, token * t) { + char * label = NULL; + + DString * raw = d_string_new(""); + + d_string_append_c_array(raw, &source[t->start], t->len); + + label = label_from_string(raw->str); + + d_string_free(raw, true); + + return label; +} + + +/// Clean up whitespace in string for standardization +char * clean_string(const char * str, bool lowercase) { + if (str == NULL) + return NULL; + + DString * out = d_string_new(""); + char * clean = NULL; + bool block_whitespace = true; + + while (*str != '\0') { + switch (*str) { + case '\t': + case ' ': + case '\n': + case '\r': + if (!block_whitespace) { + d_string_append_c(out, ' '); + block_whitespace = true; + } + break; + default: + if (lowercase) + d_string_append_c(out, tolower(*str)); + else + d_string_append_c(out, *str); + + block_whitespace = false; + break; + } + + str++; + } + + clean = out->str; + + // Trim trailing whitespace/newlines + while (out->currentStringLength && char_is_whitespace_or_line_ending(clean[out->currentStringLength - 1])) { + out->currentStringLength--; + clean[out->currentStringLength] = '\0'; + } + + d_string_free(out, false); + + // Trim trailing whitespace + return clean; +} + + +char * clean_string_from_token(const char * source, token * t, bool lowercase) { + char * clean = NULL; + + DString * raw = d_string_new(""); + + d_string_append_c_array(raw, &source[t->start], t->len); + + clean = clean_string(raw->str, lowercase); + + d_string_free(raw, true); + + return clean; +} + + +char * clean_inside_pair(const char * source, token * t, bool lowercase) { + char * text = text_inside_pair(source, t); + + char * clean = clean_string(text, lowercase); + + free(text); + + return clean; +} + + +attr * attr_new(char * key, char * value) { + attr * a = malloc(sizeof(attr)); + size_t len = strlen(value); + + // Strip quotes if present + if (value[0] == '"') { + value++; + len--; + } + + if (value[len - 1] == '"') { + value[len - 1] = '\0'; + } + + if (a) { + a->key = key; + a->value = strdup(value); + a->next = NULL; + } + + return a; +} + + +attr * parse_attributes(char * source) { + attr * attributes = NULL; + attr * a = NULL; + char * key = NULL; + char * value = NULL; + size_t scan_len; + size_t pos = 0; + + while (scan_attr(&source[pos])) { + pos += scan_spnl(&source[pos]); + + // Get key + scan_len = scan_key(&source[pos]); + key = strndup(&source[pos], scan_len); + + // Skip '=' + pos += scan_len + 1; + + // Get value + scan_len = scan_value(&source[pos]); + value = strndup(&source[pos], scan_len); + + pos += scan_len; + + if (a) { + a->next = attr_new(key, value); + a = a->next; + } else { + a = attr_new(key, value); + attributes = a; + } + + free(value); // We stored a copy + } + + return attributes; +} + + +link * link_new(const char * source, token * label, char * url, char * title, char * attributes) { + link * l = malloc(sizeof(link)); + + if (l) { + l->label = label; + l->clean_text = clean_inside_pair(source, label, true); + l->label_text = label_from_token(source, label); + l->url = clean_string(url, false); + l->title = (title == NULL) ? NULL : strdup(title); + l->attributes = (attributes == NULL) ? NULL : parse_attributes(attributes); + } + + return l; +} + + +/// Store shallow copies of links in the storage hash. The link +/// itself is new, but references the same data as the original. +/// This allows the copied link to simply be `free()`'d without +/// freeing the pointers. +link * link_shallow_copy(link * l) { + link * new = malloc(sizeof(link)); + + if (new) { + new->label = l->label; + new->clean_text = l->clean_text; + new->label_text = l->label_text; + new->url = l->url; + new->title = l->title; + new->attributes = l->attributes; + } + + return new; +} + + +/// Copy stored links to a hash for quick searching during export. +/// Links are stored via a clean version of their text(from +/// `clean_string()`) and a label version (`label_from_string()`). +/// The first link for each string is stored. +void store_link(scratch_pad * scratch, link * l) { + link * temp_link; + + // Add link via `clean_text`? + HASH_FIND_STR(scratch->link_hash, l->clean_text, temp_link); + + if (!temp_link) { + // Only add if another link is not found with clean_text + temp_link = link_shallow_copy(l); + HASH_ADD_KEYPTR(hh, scratch->link_hash, l->clean_text, strlen(l->clean_text), temp_link); + } + + // Add link via `label_text`? + HASH_FIND_STR(scratch->link_hash, l->label_text, temp_link); + + if (!temp_link) { + // Only add if another link is not found with label_text + temp_link = link_shallow_copy(l); + HASH_ADD_KEYPTR(hh, scratch->link_hash, l->label_text, strlen(l->label_text), temp_link); + } +} + +link * retrieve_link(scratch_pad * scratch, const char * key) { + link * l; + + HASH_FIND_STR(scratch->link_hash, key, l); + + if (l) + return l; + + char * clean = clean_string(key, true); + + HASH_FIND_STR(scratch->link_hash, clean, l); + + free(clean); + + return l; +} + + +fn_holder * fn_holder_new(footnote * f) { + fn_holder * h = malloc(sizeof(fn_holder)); + + if (h) { + h->note = f; + } + + return h; +} + + +void store_footnote(scratch_pad * scratch, footnote * f) { + fn_holder * temp_holder; + + // Store by `clean_text`? + HASH_FIND_STR(scratch->footnote_hash, f->clean_text, temp_holder); + + if (!temp_holder) { + temp_holder = fn_holder_new(f); + HASH_ADD_KEYPTR(hh, scratch->footnote_hash, f->clean_text, strlen(f->clean_text), temp_holder); + } + + // Store by `label_text`? + HASH_FIND_STR(scratch->footnote_hash, f->label_text, temp_holder); + + if (!temp_holder) { + temp_holder = fn_holder_new(f); + HASH_ADD_KEYPTR(hh, scratch->footnote_hash, f->label_text, strlen(f->label_text), temp_holder); + } +} + + +void store_citation(scratch_pad * scratch, footnote * f) { + fn_holder * temp_holder; + + // Store by `clean_text`? + HASH_FIND_STR(scratch->citation_hash, f->clean_text, temp_holder); + + if (!temp_holder) { + temp_holder = fn_holder_new(f); + HASH_ADD_KEYPTR(hh, scratch->citation_hash, f->clean_text, strlen(f->clean_text), temp_holder); + } + + // Store by `label_text`? + HASH_FIND_STR(scratch->citation_hash, f->label_text, temp_holder); + + if (!temp_holder) { + temp_holder = fn_holder_new(f); + HASH_ADD_KEYPTR(hh, scratch->citation_hash, f->label_text, strlen(f->label_text), temp_holder); + } +} + + +void link_free(link * l) { + free(l->label_text); + free(l->clean_text); + free(l->url); + free(l->title); +// free(l->id); + + attr * a = l->attributes; + attr * b; + + while (a) { + b = a->next; + free(a->key); + free(a->value); + free(a); + a = b; + } + + free(l); +} + + +void whitespace_accept(token ** remainder) { + while (token_chain_accept_multiple(remainder, 3, NON_INDENT_SPACE, INDENT_SPACE, INDENT_TAB)); +} + + +/// Find link based on label +link * extract_link_from_stack(scratch_pad * scratch, const char * target) { + char * key = clean_string(target, true); + + link * temp = NULL; + + HASH_FIND_STR(scratch->link_hash, key, temp); + + free(key); + + if (temp) + return temp; + + key = label_from_string(target); + + HASH_FIND_STR(scratch->link_hash, key, temp); + + free(key); + + return temp; +} + + +bool validate_url(const char * url) { + size_t len = scan_url(url); + + return (len && len == strlen(url)) ? true : false; +} + + +char * url_accept(const char * source, token ** remainder, bool validate) { + char * url = NULL; + char * clean = NULL; + token * t = NULL; + token * first = NULL; + token * last = NULL; + + switch ((*remainder)->type) { + case PAIR_PAREN: + case PAIR_ANGLE: + case PAIR_QUOTE_SINGLE: + case PAIR_QUOTE_DOUBLE: + t = token_chain_accept_multiple(remainder, 2, PAIR_ANGLE, PAIR_PAREN); + url = text_inside_pair(source, t); + break; + case TEXT_PLAIN: + first = *remainder; + + // Grab parts for URL + while (token_chain_accept_multiple(remainder, 5, AMPERSAND, COLON, TEXT_PERIOD, TEXT_PLAIN, UL)); + + last = (*remainder)->prev; + + // Is there a space in a URL concatenated with a title or attribute? + // e.g. [foo]: http://foo.bar/ class="foo" + // Since only one space between URL and class, they are joined. + + if (last->type == TEXT_PLAIN) { + // Trim leading whitespace + token_trim_leading_whitespace(last, source); + token_split_on_char(last, source, ' '); + *remainder = last->next; + } + + url = strndup(&source[first->start], last->start + last->len - first->start); + break; + } + + // Is this a valid URL? + clean = clean_string(url, false); + + if (validate && !validate_url(clean)) { + free(clean); + clean = NULL; + } + + free(url); + return clean; +} + + +/// Extract url string from `(foo)` or `()` or `(foo "bar")` +void extract_from_paren(token * paren, const char * source, char ** url, char ** title, char ** attributes) { + token * t; + size_t attr_len; + + token * remainder = paren->child->next; + + if (remainder) { + // Skip whitespace + whitespace_accept(&remainder); + + // Grab URL + *url = url_accept(source, &remainder, false); + + // Skip whitespace + whitespace_accept(&remainder); + + // Grab title, if present + t = token_chain_accept_multiple(&remainder, 3, PAIR_QUOTE_DOUBLE, PAIR_QUOTE_SINGLE, PAIR_PAREN); + + if (t) { + *title = text_inside_pair(source, t); + } + + // Grab attributes, if present + if (t) { + attr_len = scan_attributes(&source[t->start + t->len]); + + if (attr_len) { + *attributes = strndup(&source[t->start + t->len], attr_len); + } + } + } +} + + +/// Create a link from an explicit link `[foo](bar)` +link * explicit_link(scratch_pad * scratch, token * bracket, token * paren, const char * source) { + char * url_char =NULL; + char * title_char = NULL; + char * attr_char = NULL; + link * l = NULL; + + extract_from_paren(paren, source, &url_char, &title_char, &attr_char); + + if (attr_char) { + if (!(scratch->extensions & EXT_COMPATIBILITY)) + l = link_new(source, bracket, url_char, title_char, attr_char); + } else { + l = link_new(source, bracket, url_char, title_char, attr_char); + } + + free(url_char); + free(title_char); + free(attr_char); + + return l; +} + + +footnote * footnote_new(const char * source, token * label, token * content) { + footnote * f = malloc(sizeof(footnote)); + + if (f) { + f->label = label; + f->clean_text = (label == NULL) ? NULL : clean_inside_pair(source, label, true); + f->label_text = (label == NULL) ? NULL : label_from_token(source, label); + f->free_para = false; + f->count = -1; + + if (content) { + switch (content->type) { + case BLOCK_PARA: + f->content = content; + break; + case TEXT_PLAIN: + token_trim_leading_whitespace(content, source); + default: + f->content = token_new_parent(content, BLOCK_PARA); + f->free_para = true; + break; + } + } + } + + return f; +} + + +void footnote_free(footnote * f) { + if (f) { + if (f->free_para) { + // I'm not sure why, but the following causes a memory error. + // Strangely, not freeing it does *not* seem to cause memory + // leaks?? + + //free(f->content); + } + free(f->clean_text); + free(f->label_text); + + free(f); + } +} + + +bool definition_extract(mmd_engine * e, token ** remainder) { + char * source = e->dstr->str; + token * label = NULL; + token * title = NULL; + char * url_char = NULL; + char * title_char = NULL; + char * attr_char = NULL; + token * temp = NULL; + size_t attr_len; + + link * l = NULL; + footnote * f = NULL; + + // Store label + label = *remainder; + + *remainder = (*remainder)->next; + + // Prepare for parsing + + switch (label->type) { + case PAIR_BRACKET: + // Reference Link Definition + + if (!token_chain_accept(remainder, COLON)) + return false; + + // Skip space + whitespace_accept(remainder); + + // Grab URL + url_char = url_accept(e->dstr->str, remainder, false); + + whitespace_accept(remainder); + + // Grab title, if present + temp = *remainder; + + title = token_chain_accept_multiple(remainder, 2, PAIR_QUOTE_DOUBLE, PAIR_QUOTE_SINGLE); + + if (!title) { + // See if there's a title on next line + whitespace_accept(remainder); + token_chain_accept_multiple(remainder, 2, TEXT_NL, TEXT_LINEBREAK); + whitespace_accept(remainder); + + title = token_chain_accept_multiple(remainder, 2, PAIR_QUOTE_DOUBLE, PAIR_QUOTE_SINGLE); + + if (!title) + *remainder = temp; + } + + title_char = text_inside_pair(e->dstr->str, title); + + // Get attributes + if ((*remainder) && (((*remainder)->type != TEXT_NL) && ((*remainder)->type != TEXT_LINEBREAK))) { + if (!(e->extensions & EXT_COMPATIBILITY)) { + attr_len = scan_attributes(&source[(*remainder)->start]); + + if (attr_len) { + attr_char = strndup(&source[(*remainder)->start], attr_len); + + // Skip forward + attr_len += (*remainder)->start; + + while ((*remainder) && (*remainder)->start < attr_len) + *remainder = (*remainder)->next; + } + + l = link_new(e->dstr->str, label, url_char, title_char, attr_char); + } else { + // Not valid match + } + } else { + l = link_new(e->dstr->str, label, url_char, title_char, attr_char); + } + + // Store link for later use + if (l) + stack_push(e->link_stack, l); + + break; + case PAIR_BRACKET_CITATION: + if (!token_chain_accept(remainder, COLON)) + return false; + + title = *remainder; // Track first token of content in 'title' + f = footnote_new(e->dstr->str, label, title); + + // Store citation for later use + stack_push(e->citation_stack, f); + + break; + case PAIR_BRACKET_FOOTNOTE: + if (!token_chain_accept(remainder, COLON)) + return false; + + title = *remainder; // Track first token of content in 'title' + f = footnote_new(e->dstr->str, label, title); + + // Store footnote for later use + stack_push(e->footnote_stack, f); + + break; + case PAIR_BRACKET_VARIABLE: + fprintf(stderr, "Process variable:\n"); + token_describe(label, e->dstr->str); + break; + default: + // Rest of block is not definitions (or has already been processed) + return false; + } + + // Advance to next line + token_skip_until_type_multiple(remainder, 2, TEXT_NL, TEXT_LINEBREAK); + if (*remainder) + *remainder = (*remainder)->next; + + // Clean up + free(url_char); + free(title_char); + free(attr_char); + + return true; +} + + +void process_definition_block(mmd_engine * e, token * block) { + token * remainder = block->child; + bool def_list = false; + +// while (remainder) { + switch (remainder->type) { + case PAIR_BRACKET_FOOTNOTE: + case PAIR_BRACKET_CITATION: + case PAIR_BRACKET_VARIABLE: + if (!(e->extensions & EXT_NOTES)) + return; + case PAIR_BRACKET: + if (definition_extract(e, &remainder)) + def_list = true; + break; + default: + // Rest of block is not definitions (or has already been processed) + if (def_list) { + tokens_prune(block->child, remainder->prev); + block->child = remainder; + } + return; + } +// } + + // Ignore this block in the future + block->type = BLOCK_EMPTY; +} + + +void process_definition_stack(mmd_engine * e) { + for (int i = 0; i < e->definition_stack->size; ++i) + { + process_definition_block(e, stack_peek_index(e->definition_stack, i)); + } +} + + +void process_header_to_links(mmd_engine * e, token * h) { + char * label = label_from_token(e->dstr->str, h); + + DString * url = d_string_new("#"); + + d_string_append(url, label); + + link * l = link_new(e->dstr->str, h, url->str, NULL, NULL); + + // Store link for later use + stack_push(e->link_stack, l); + + d_string_free(url, true); + free(label); +} + + +void process_header_stack(mmd_engine * e) { + // NTD in compatibility mode or if disabled + if (e->extensions & EXT_NO_LABELS) + return; + + for (int i = 0; i < e->header_stack->size; ++i) + { + process_header_to_links(e, stack_peek_index(e->header_stack, i)); + } +} + +void mmd_export_token_tree(DString * out, mmd_engine * e, short format) { + + // Process potential reference definitions + process_definition_stack(e); + + // Process headers for potential cross-reference targets + process_header_stack(e); + + // Create scratch pad + scratch_pad * scratch = scratch_pad_new(e); + + switch (format) { + case FORMAT_HTML: + mmd_export_token_tree_html(out, e->dstr->str, e->root, 0, scratch); + mmd_export_footnote_list_html(out, e->dstr->str, scratch); + mmd_export_citation_list_html(out, e->dstr->str, scratch); + break; + } + + scratch_pad_free(scratch); +} + + +void parse_brackets(const char * source, scratch_pad * scratch, token * bracket, link ** final_link, short * skip_token, bool * free_link) { + link * temp_link = NULL; + char * temp_char = NULL; + short temp_short = 0; + + // What is next? + token * next = bracket->next; + + if (next) + temp_short = 1; + + // Do not free this link after using it + *free_link = false; + + if (next && next->type == PAIR_PAREN) { + // We have `[foo](bar)` or `![foo](bar)` + + temp_link = explicit_link(scratch, bracket, next, source); + + if (temp_link) { + // Don't output brackets + bracket->child->type = TEXT_EMPTY; + bracket->child->mate->type = TEXT_EMPTY; + + // This was an explicit link + *final_link = temp_link; + + // Skip over parentheses + *skip_token = temp_short; + + // Free this link + *free_link = true; + return; + } + } + + if (next && next->type == PAIR_BRACKET) { + // Is this a reference link? `[foo][bar]` or `![foo][bar]` + temp_char = text_inside_pair(source, next); + + if (temp_char[0] == '\0') { + // Empty label, use first bracket + free(temp_char); + temp_char = text_inside_pair(source, bracket); + } + } else { + temp_char = text_inside_pair(source, bracket); + // Don't skip tokens + temp_short = 0; + } + + temp_link = extract_link_from_stack(scratch, temp_char); + + if (temp_char) + free(temp_char); + + if (temp_link) { + // Don't output brackets + bracket->child->type = TEXT_EMPTY; + bracket->child->mate->type = TEXT_EMPTY; + + *final_link = temp_link; + + // Skip over second bracket if present + *skip_token = temp_short; + return; + } + + // No existing links, so nothing to do + *final_link = NULL; +} + + +void mark_citation_as_used(scratch_pad * scratch, footnote * c) { + if (c->count == -1) { + // Add citation to used stack + stack_push(scratch->used_citations, c); + + // Update counter + c->count = scratch->used_citations->size; + } +} + + +void mark_footnote_as_used(scratch_pad * scratch, footnote * f) { + if (f->count == -1) { + // Add footnote to used stack + stack_push(scratch->used_footnotes, f); + + // Update counter + f->count = scratch->used_footnotes->size; + } +} + + +size_t extract_citation_from_stack(scratch_pad * scratch, const char * target) { + char * key = clean_string(target, true); + + fn_holder * h; + + HASH_FIND_STR(scratch->citation_hash, key, h); + + free(key); + + if (h) { + mark_citation_as_used(scratch, h->note); + return h->note->count; + } + + key = label_from_string(target); + + HASH_FIND_STR(scratch->citation_hash, key, h); + + free(key); + + if (h) { + mark_citation_as_used(scratch, h->note); + return h->note->count; + } + + // None found + return -1; +} + + +size_t extract_footnote_from_stack(scratch_pad * scratch, const char * target) { + char * key = clean_string(target, true); + + fn_holder * h; + + HASH_FIND_STR(scratch->footnote_hash, key, h); + + free(key); + + if (h) { + mark_footnote_as_used(scratch, h->note); + return h->note->count; + } + + key = label_from_string(target); + + HASH_FIND_STR(scratch->footnote_hash, key, h); + + free(key); + + if (h) { + mark_footnote_as_used(scratch, h->note); + return h->note->count; + } + + // None found + return -1; +} + + +void footnote_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) { + // Get text inside bracket + char * text = text_inside_pair(source, t); + short footnote_id = extract_footnote_from_stack(scratch, text); + + free(text); + + if (footnote_id == -1) { + // No match, this is an inline footnote -- create a new one + t->child->type = TEXT_EMPTY; + t->child->mate->type = TEXT_EMPTY; + + // Create footnote + footnote * temp = footnote_new(source, NULL, t->child); + + // Store as used + stack_push(scratch->used_footnotes, temp); + *num = scratch->used_footnotes->size; + temp->count = *num; + + // We need to free this one later since it doesn't exist + // in the engine's stack, on the scratch_pad stack + stack_push(scratch->inline_footnotes_to_free, temp); + } else { + // Footnote in stack + *num = footnote_id; + } +} + + +void citation_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num) { + // Get text inside bracket + char * text = text_inside_pair(source, t); + short citation_id = extract_citation_from_stack(scratch, text); + + free(text); + + if (citation_id == -1) { + // No match, this is an inline footnote -- create a new one + t->child->type = TEXT_EMPTY; + t->child->mate->type = TEXT_EMPTY; + + // Create footnote + footnote * temp = footnote_new(source, NULL, t->child); + + // Store as used + stack_push(scratch->used_citations, temp); + *num = scratch->used_citations->size; + temp->count = *num; + + // We need to free this one later since it doesn't exist + // in the engine's stack, on the scratch_pad stack + stack_push(scratch->inline_citations_to_free, temp); + } else { + // Citation in stack + *num = citation_id; + } +} + diff --git a/src/writer.h b/src/writer.h new file mode 100644 index 0000000..83f4829 --- /dev/null +++ b/src/writer.h @@ -0,0 +1,169 @@ +/** + + MultiMarkdown 6 -- Lightweight markup processor to produce HTML, LaTeX, and more. + + @file writer.h + + @brief Coordinate conversion of token tree to output formats. + + + @author Fletcher T. Penney + @bug + +**/ + +/* + + Copyright © 2016 - 2017 Fletcher T. Penney. + + + The `MultiMarkdown 6` project is released under the MIT License.. + + GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: + + https://github.com/fletcher/MultiMarkdown-4/ + + MMD 4 is released under both the MIT License and GPL. + + + CuTest is released under the zlib/libpng license. See CuTest.c for the text + of the license. + + + ## The MIT License ## + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + +*/ + + +#ifndef WRITER_MULTIMARKDOWN_H +#define WRITER_MULTIMARKDOWN_H + +#ifdef TEST +#include "CuTest.h" +#endif + +#include "d_string.h" +#include "mmd.h" +#include "stack.h" +#include "token.h" +#include "uthash.h" + + +typedef struct { + struct link * link_hash; + + unsigned long extensions; + short padded; //!< How many empty lines at end output buffer + short list_is_tight; + short skip_token; + + short footnote_para_counter; + stack * used_footnotes; + stack * inline_footnotes_to_free; + struct fn_holder * footnote_hash; + short footnote_being_printed; + + stack * used_citations; + stack * inline_citations_to_free; + struct fn_holder * citation_hash; + short citation_being_printed; + + char _PADDING[6]; //!< pad struct for alignment +} scratch_pad; + + +struct attr { + char * key; + char * value; + struct attr * next; +}; + +typedef struct attr attr; + +struct link { + token * label; + char * label_text; + char * clean_text; + char * url; + char * title; + attr * attributes; + UT_hash_handle hh; +}; + +typedef struct link link; + +struct footnote { + token * label; + char * label_text; + char * clean_text; + token * content; + size_t count; + bool free_para; + + char _PADDING[7]; //!< pad struct for alignment +}; + +typedef struct footnote footnote; + +struct fn_holder { + footnote * note; + UT_hash_handle hh; +}; + +typedef struct fn_holder fn_holder; + + +/// Temporary storage while exporting parse tree to output format +scratch_pad * scratch_pad_new(mmd_engine * e); + +void scratch_pad_free(scratch_pad * scratch); + + +/// Ensure at least num newlines at end of output buffer +void pad(DString * d, short num, scratch_pad * scratch); + +link * explicit_link(scratch_pad * scratch, token * label, token * url, const char * source); + +/// Find link based on label +link * extract_link_from_stack(scratch_pad * scratch, const char * target); + +char * text_inside_pair(const char * source, token * pair); + +void link_free(link * l); +void footnote_free(footnote * f); + +char * label_from_token(const char * source, token * t); + +void parse_brackets(const char * source, scratch_pad * scratch, token * bracket, link ** link, short * skip_token, bool * free_link); + + +void print_token_raw(DString * out, const char * source, token * t); + +void print_token_tree_raw(DString * out, const char * source, token * t); + +char * url_accept(const char * source, token ** remainder, bool validate); + +void footnote_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num); +void citation_from_bracket(const char * source, scratch_pad * scratch, token * t, short * num); + + +#endif + diff --git a/templates/README.md.in b/templates/README.md.in index b2d218b..33644cb 100644 --- a/templates/README.md.in +++ b/templates/README.md.in @@ -9,236 +9,478 @@ | Version: | @My_Project_Version@ | -## Introduction ## +## An Announcement! ## -This template was created out of a desire to simplify some of the setup and -configuration that I was doing over and over each time I started a new project. -Additionally, I wanted to try to start encouraging some "better practices" -(though not necessarily "best practices"): +I would like to officially announce that MultiMarkdown version 6 is in public +alpha. It's finally at a point where it is usable, but there are quite a few +caveats. -1. [Test-driven development][tdd] -- My development of MultiMarkdown - focused on integration testing, but really had no unit testing to - speak of. Some newer projects I began working on were a bit math- - heavy, and ensuring that each piece works properly became even more - important. It was also nice to be able to actually develop code that - could do *something* (via the test suite), even though the project as - a whole was nowhere near complete.) To accomplish this, I include the - [CuTest] project to support writing tests for your code. +This post is a way for me to organize some of my thoughts, provide some +history for those who are interested, and to provide some tips and tricks from +my experiences for those who are working on their own products. -2. Use of the [cmake] build system. `cmake` is not perfect by any - means, but it does offer some very useful features and a means for - better integrating the compilation and packaging/installation aspects - of development. Rather than reinventing the wheel each time, this - setup incorporates basic `cmake` functionality to make it easy to - control how your project is compiled, and includes automated generation - of the test command. +But first, some background... -3. Templates -- `cmake` has a reasonable templating system, so that you - can define basic variables (e.g. author, project name, etc.) and allow - `cmake` to combine those elements to ensure consistency across source - code and README files. -4. Documentation -- some default setup to allow for [Doxygen]-generated - documentation. The generated `README.md` file is used as the main - page, and the source c/header files are included. Naturally, Doxygen - is a complex system, so you're responsible for figuring out how to - properly document your code. +### Why a New Version? ### -5. Simplify `git` a touch -- In my larger projects, I make heavy use of - git modules. One project may make use of 20-30 modules, which are - designed to be re-usable across other projects. I found that I was - spending too much time making sure that I had the latest version - of a module checked out, so I created two scripts to help me keep - my modules in line: `link_git_modules` and `update_git_modules`. - You run the `link` script once to ensure that your modules are properly - set up, and can then run the `update` script at any time to be sure - you've pulled the latest version. One advantage of this is that your - modules are set to a branch, rather than just a detached commit. It - may or may not work for your needs, but it saves me a bunch of time - and headache. +MultiMarkdown version 5 was released in November of 2015, but the codebase was +essentially the same as that of v4 -- and that was released in beta in April +of 2013. A few key things prompted work on a new version: +* Accuracy -- MMD v4 and v5 were the most accurate versions yet, and a lot of +effort went into finding and resolving various edge cases. However, it began +to feel like a game of whack-a-mole where new bugs would creep in every time I +fixed an old one. The PEG began to feel rather convoluted in spots, even +though it did allow for a precise (if not always accurate) specification of +the grammar. -[tdd]: https://en.wikipedia.org/wiki/Test-driven_development -[cmake]: http://www.cmake.org/ -[CuTest]: http://cutest.sourceforge.net -[Doxygen]: http://www.stack.nl/~dimitri/doxygen/ +* Performance -- "Back in the day" [peg-markdown] was one of the fastest +Markdown parsers around. MMD v3 was based on peg-markdown, and would leap- +frog with it in terms of performance. Then [CommonMark] was released, which +was a bit faster. Then a couple of years went by and CommonMark became *much* +faster -- in one of my test suites, MMD v 5.4.0 takes about 25 times longer to +process a long document than CommonMark 0.27.0. +[peg-markdown]: https://github.com/jgm/peg-markdown +[CommonMark]: http://commonmark.org/ -## How do I use it? ## +Last spring, I decided I wanted to rewrite MultiMarkdown from scratch, +building the parser myself rather than relying on a pre-rolled solution. (I +had been using [greg](https://github.com/ooc-lang/greg) to compile the PEG +into parser code. It worked well overall, but lacked some features I needed, +requiring a lot of workarounds.) -You can download the source from [github] and get to work. The file "IMPORTANT" -contains instructions on the various build commands you can use. +## First Attempt ## -I recommend using the following script to automatically create a new git repo, -pull in the default project template, and configure git-flow. You simply have -to rename your project directory from `new-project` to whatever you desire: +My first attempt started by hand-crafting a parser that scanned through the +document a line at a time, deciding what to do with each line as it found +them. I used regex parsers made with [re2c](http://re2c.org/index.html) to +help classify each line, and then a separate parser layer to process groups of +lines into blocks. Initially this approach worked well, and was really +efficient. But I quickly began to code my way into a dead-end -- the strategy +was not elegant enough to handle things like nested lists, etc. +One thing that did turn out well from the first attempt, however, was an +approach for handling `` and `` parsing. I've learned over the +years that this can be one of the hardest parts of coding accurately for +Markdown. There are many examples that are obvious to a person, but difficult +to properly "explain" how to parse to a computer. - #!/bin/sh +No solution is perfect, but I developed an approach that seems to accurately +handle a wide range of situations without a great deal of complexity: - git init new-project +1. Scan the documents for asterisks (`*`). Each one will be handled one at a +time. - cd new-project +2. Unlike brackets (`[` and `]`), an asterisk is "ambidextrous", in that it +may be able to open a matched pair of asterisks, close a pair, or both. For +example, in `foo *bar* foo`: - git remote add "template" https://github.com/fletcher/c-template.git + 1. The first asterisk can open a pair, but not close one. - git pull template master + 2. The second asterisk can close a pair, but not open one. - git flow init -d +3. So, once the asterisks have been identified, each has to be examined to +determine whether it can open/close/both. The algorithm is not that complex, +but I'll describe it in general terms. Check the code for more specifics. +This approach seems to work, but might still need some slight tweaking. In +the future, I'll codify this better in language rather than just in code. - git checkout develop + 1. If there is whitespace to the left of an asterisk, it can't close. + 2. If there is whitespace or punctuation to the right it can't open. -Using this approach, you can define your own `origin` remote if you like, but -the `template` remote can be used to update the core project files should any -improvements come about: + 3. "Runs" of asterisks, e.g. `**bar` are treated as a unit in terms of + looking left/right. - git checkout develop - git merge template master + 4. Asterisks inside a word are a bit trickier -- we look at the number of + asterisks before the word, the number in the current run, and the number + of asterisks after the word to determine which combinations, if any, are + permitted. -**NOTE**: `cmake` is a complex suite of utilities, and if you have trouble you -will need to get support elsewhere. If you find errors in this template, by -all means I want to hear about them and fix them, but this is just a basic -framework to get you started. In all likelihood, all but the most basic -projects will need some customization. +4. Once all asterisks have been tagged as able to open/close/both, we proceed +through them in order: + + 1. When we encounter a tag that can close, we look to see if there is a + previous opener that has not been paired off. If so, pair the two and + remove the opener from the list of available asterisks. + + 2. When we encounter an opener, add it to the stack of available openers. + + 3. When encounter an asterisk that can do both, see if it can close an + existing opener. If not, then add it to the stack. + +5. After all tokens in the block have been paired, then we look for nesting +pairs of asterisks in order to create `` and `` sets. For +example, assume we have six asterisks wrapped around a word, three in front, +and three after. The asterisks are indicated with numbers: `123foo456`. We +proceed in the following manner: + 1. Based on the pairing algorithm above, these asterisks would be paired as + follows, with matching asterisks sharing numbers -- `123foo321`. + + 2. Moving forwards, we come to asterisk "1". It is followed by an + asterisk, so we check to see if they should be grouped as a ``. + Since the "1" asterisks are wrapped immediately outside the "2" asterisks, + they are joined together. More than two pairs can't be joined, so we now + get the following -- `112foo211`, where the "11" represents the opening + and closing of a ``, and the "2" represents a ``. + +6. When matching a pair, any unclosed openers that are on the stack are +removed, preventing pairs from "crossing" or "intersecting". Pairs can wrap +around each other, e.g. `[(foo)]`, but not intersect like `[(foo])`. In the +second case, the brackets would close, removing the `(` from the stack. + +7. This same approach is used in all tokens that are matched in pairs-- +`[foo]`, `(foo)`, `_foo_`, etc. There's slightly more to it, but once you +figure out how to assign opening/closing ability, the rest is easy. By using +a stack to track available openers, it can be performed efficiently. + +In my testing, this approach has worked quite well. It handles all the basic +scenarios I've thrown at it, and all of the "basic" and "devious" edge cases I +have thought of (some of these don't necessarily have a "right" answer -- but +v6 gives consistency answers that seem as reasonable as any others to me). +There are also three more edge cases I've come up can still stump it, and +ironically they are handled correctly by most implementations. They just +don't follow the rules above. I'll continue to work on this. + +In the end, I scrapped this effort, but kept the lessons learned in the token +pairing algorithm. -[github]: https://github.com/fletcher/c-template +## Second Attempt ## + +I tried again this past Fall. This time, I approached the problem with lots +of reading. *Lots and lots* of reading -- tons of websites, computer science +journal articles, PhD theses, etc. Learned a lot about lexers, and a lot +about parsers, including hand-crafting vs using parser generators. In brief: + +1. I learned about the [Aho–Corasick algorithm], which is a great way to +efficiently search a string for multiple target strings at once. I used this +to create a custom lexer to identify tokens in a MultiMarkdown text document +(e.g. `*`, `[ `, `{++`, etc.). I learned a lot, and had a good time working +out the implementation. This code efficiently allowed me to break a string of +text into the tokens that mattered for Markdown parsing. + +2. However, in a few instances I really needed some features of regular +expressions to simplify more complex structures. After a quick bit of testing, +using re2c to create a tokenizer was just as efficient, and allowed me to +incorporate some regex functionality that simplified later parsing. I'll keep +the Aho-Corasick stuff around, and will probably experiment more with it +later. But I didn't need it for MMD now. `lexer.re` contains the source for +the tokenizer. -## Configuration ## +[Aho–Corasick algorithm]: https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm +I looked long and hard for a way to simplify the parsing algorithm to try and +"touch" each token only once. Ideally, the program could step through each +token, and decide when to create a new block, when to pair things together, +etc. But I'm not convinced it's possible. Since Markdown's grammar varies +based on context, it seems to work best when handled in distinct phases: + +1. Tokenize the string to identify key sections of text. This includes line +breaks, allowing the text to be examined one line at time. + +2. Join series of lines together into blocks, such as paragraphs, code blocks, +lists, etc. + +3. The tokens inside each block can then be paired together to create more +complex syntax such as links, strong, emphasis, etc. + +To handle the block parsing, I started off using the [Aho-Corasick] code to +handle my first attempt. I had actually implemented some basic regex +functionality, and used that to group lines together to create blocks. But +this quickly fell apart in the face of more complex structures such as +recursive lists. After a lot of searching, and *tons* more reading, I +ultimately decided to use a parser generator to handle the task of group lines +into blocks. `parser.y` has the source for this, and it is processed by the +[lemon](http://www.hwaci.com/sw/lemon/) parser generator to create the actual +code. + +I chose to do this because hand-crafting the block parser would be complex. +The end result would likely be difficult to read and understand, which would +make it difficult to update later on. Using the parser generator allows me to +write things out in a way that can more easily be understood by a person. In +all likelihood, the performance is probably as good as anything I could do +anyway, if not better. + +Because lemon is a LALR(1) parser, it does require a bit of thinking ahead +about how to create the grammar used. But so far, it has been able to handle +everything I have thrown at it. + + +## Optimization ## + +One of my goals for MMD 6 was performance. So I've paid attention to speed +along the way, and have tried to use a few tricks to keep things fast. Here +are some things I've learned along the way. In no particular order: + + +### Memory Allocation ### + +When parsing a long document, a *lot* of token structures are created. Each +one requires a small bit of memory to be allocated. In aggregate, that time +added up and slowed down performance. + +After reading for a bit, I ended up coming up with an approach that uses +larger chunks of memory. I allocate pools of of memory in large slabs for +smaller "objects"". For example, I allocate memory for 1024 tokens at a +single time, and then dole that memory out as needed. When the slab is empty, +a new one is allocated. This dramatically improved performance. + +When pairing tokens, I created a new stack for each block. I realized that an +empty stack didn't have any "leftover" cruft to interfere with re-use, so I +just used one for the entire document. Again a sizeable improvement in +performance from only allocating one object instead of many. When recursing +to a deeper level, the stack just gets deeper, but earlier levels aren't +modified. -### CMakeLists.txt File ### +Speaking of tokens, I realized that the average document contains a lot of +single spaces (there's one between every two words I have written, for +example.) The vast majority of the time, these single spaces have no effect +on the output of Markdown documents. I changed my whitespace token search to +only flag runs of 2 or more spaces, dramatically reducing the number of +tokens. This gives the benefit of needing fewer memory allocations, and also +reduces the number of tokens that need to be processed later on. The only +downside is remember to check for a single space character in a few instances +where it matters. -First, you should update the project information under the "Define Our Project" -section, including the title, description, etc. This information will be used -to update the README, as well as to create the `version.h` file so that the -project can have access to its own version number. -You will then need to update the various groups in the "Source Files" section -so that Cmake will be able to determine which files are used to build your -project. For reasons that will become clear later, try to follow the -suggestions for the different groups of files. +### Proper input buffering ### -You then need to define your targets, such as a library, or executable, etc. -Obviously, this will depend on the needs of your project. You can also add -custom steps based on the Target OS (OS X, Windows, *nix, etc.). +When I first began last spring, I was amazed to see how much time was being +spent by MultiMarkdown simply reading the input file. Then I discovered it +was because I was reading it one character at a time. I switched to using a +buffered read approach and the time to read the file went to almost nothing. I +experimented with different buffer sizes, but they did not seem to make a +measurable difference. -You can use CPack to generate installers for your software. This can be -complex, and you will need to modify this section heavily. -CuTest is used by default to provide unit testing (see below), but you -can also use CMake/CTest to provide integration testing. Again, this will -be up to you to configure. +### Output Buffering ### + +I experimented with different approaches to creating the output after parsing. +I tried printing directly to `stdout`, and even played with different +buffering settings. None of those seemed to work well, and all were slower +than using the `d_string` approach (formerly call `GString` in MMD 5). -### CuTest ### +### Fast Searches ### -[CuTest] provides a means to integrate unit testing with your C source code. -Once you get the hang of it, it's easy to use. +After getting basic Markdown functionality complete, I discovered during +testing that the time required to parse a document grew exponentially as the +document grew longer. Performance was on par with CommonMark for shorter +documents, but fell increasingly behind in larger tests. Time profiling found +that the culprit was searching for link definitions when they didn't exist. +My first approach was to keep a stack of used link definitions, and to iterate +through them when necessary. In long documents, this performs very poorly. +More research and I ended up using +[uthash](http://troydhanson.github.io/uthash/). This allows me to search for +a link (or footnote, etc.) by "name" rather than searching through an array. +This allowed me to get MMD's performance back to O(n), taking roughly twice as +much time to process a document that is twice as long. -### Doxygen ### +### Efficient Utility Functions ### -[Doxygen] is used to generate documentation from the source code itself. -Properly configuring your source for this is up to you. You can modify the -`doxygen.conf.in` template with your desired settings as desired, but most -of the basics are handled for you based on your CMake configuration. +It is frequently necessary when parsing Markdown to check what sort of +character we are dealing with at a certain position -- a letter, whitespace, +punctuation, etc. I created a lookup table for this via `char_lookup.c` and +hard-coded it in `char.c`. These routines allow me to quickly, and +consistently, classify any byte within a document. This saved a lot of +programming time, and saved time tracking down bugs from handling things +slightly differently under different circumstances. I also suspect it +improved performance, but don't have the data to back it up. -### GitHub Pages Support ### +### Testing While Writing ### -The `configure-gh-pages` script sets up a `documentation` directory that is -linked to a `gh-pages` branch of the project. You can then run `make gh-pages` -to update the documentation in this directory. Commit and push to your origin, -and your projects gh-page is updated. +I developed several chunks of code in parallel while creating MMD 6. The vast +majority of it was developed largely in a [test-driven development] approach. +The other code was largely created with extensive unit testing to accomplish +this. +[test-driven development]: https://en.wikipedia.org/wiki/Test-driven_development -### Makefile ### +MMD isn't particularly amenable to this approach at the small level, but +instead I relied more on integration testing with an ever-growing collection +of text files and the corresponding HTML files in the MMD 6 test suite. This +allowed me to ensure new features work properly and that old features aren't +broken. At this time, there are 29 text files in the test suite, and many +more to come. -The overall build process is controlled by the master `Makefile`. It provides -the following commands: - make - make release +### Other Lessons ### -Generate the CMake build files for use or distribution. Once complete you will -need to change to the `build` directory and run `make`, `make test`, and -`cpack` as desired. +Some things that didn't do me any good.... - make zip +I considered differences between using `malloc` and `calloc` when initializing +tokens. The time saved by using `malloc` was basically exactly offset by the +initial time required to initialize the token to default null values as +compared to using `calloc`. When trying `calloc` failed to help me out +(thinking that clearing a single slab in the object pool would be faster), I +stuck with `malloc` as it makes more sense to me in my workflow. -Direct CPack to create a zip installer rather than a graphical installer. +I read a bit about [struct padding] and reordered some of my structs. It was +until later that I discovered the `-Wpadded` option, and it's not clear +whether my changes modified anything. Since the structs were being padded +automatically, there was no noticeable performance change, and I didn't have +the tools to measure whether I could have improved memory usage at all. Not +sure this would be worth the effort -- much lower hanging fruit available. - make debug +[struct padding]: http://www.catb.org/esr/structure-packing/ -Generate build files for [CuTest] unit testing. In the `build` directory, -run `make`, then `make test`. - make analyze +## Differences in MultiMarkdown Itself ## -If you have `clang` installed, this will generate debug build files with the -`scan-build` command. In the `build` directory, run `scan-build -V make` -to compile the software and view the static analysis results. +MultiMarkdown v6 is mostly about making a better MMD parser, but it will +likely involve a few changes to the MultiMarkdown language itself. - make xcode -Build a project file for Xcode on OS X. +1. I am thinking about removing Setext headers from the language. I almost +never use them, much preferring to use ATX style headers (`# foo #`). +Additionally, I have never liked the fact that Setext headers allow the +meaning of a line to be completely changed by the following line. It makes +the parsing slightly more difficult on a technical level (requiring some +backtracking at times). I'm not 100% certain on this, but right now I believe +it's the only Markdown feature that doesn't exist in MMD 6 yet. - make windows - make windows-zip - make windows-32 - make windows-zip-32 +2. Whitespace is not allowed between the text brackets and label brackets in +reference links, images, footnotes, etc. For example `[foo] [bar]` will no +longer be the same as `[foo][bar]`. -Use the MinGW software to cross-compile for Windows on a *nix machine. You can -specify the 32 bit option, and also the zip option as indicated. +3. Link and image titles can be quoted with `'foo'`, `"foo"`, or `(foo)`. - make documentation +4. HTML elements are handled slightly differently. There is no longer a +`markdown="1"` feature. Instead, HTML elements that are on a line by +themselves will open an HTML block that will cause the rest of the "paragraph" +to be treated as HTML such that Markdown will not be parsed in side of it. +HTML block-level tags are even "stronger" at starting an HTML block. It is +not quite as complex as the approach used in CommonMark, but is similar under +most circumstances. -Build the [Doxygen]-generated documentation. + For example, this would not be parsed: - make clean +
    + *foo* +
    -Clean out the `build` directory. Be sure to run this before running another -command. + But this would be: +
    -## Git Submodules ## + *foo* -Apparently, submodules are a rather controversial feature in git. For me, -however, they have proven invaluable. My most active projects depend on each -other, and the submodule feature allows me to easily keep everything up to -date. That said, however, I quickly realized that submodules don't work very -well using default commands. +
    -The problem is that I want to always use the latest version of my submodules. -This is more easily accomplished when the submodule is set to the `master` -branch of the original repository, rather than a detached commit as happens -by default. In order to easily keep all submodules updated, there are two -scripts: +5. I haven't worked a lot yet on the MMD-specific features, so there may be +more changes to come. One thing I do anticipate is that if fenced code blocks +stay, they will work slightly differently. Currently, an opening fence +doesn't mean anything unless there is a closing fence that follows it. Again, +this requires backtracking in the parser. I suspect that an opening fence +will definitely open a code block. If there is no closing fence, then the +rest of the document will remain inside the code block. This is the approach +used by CommonMark and it's a reasonable one, IMO. -1. `link_git_modules` -- this script is generally only run when the master -repository is first cloned, but can also be run after a new submodule is -added. It causes the submodules to automatically track the master branch. -If you need to modify this, there are instructions in the script itself -explaining how to modify it on a per submodule basis. Running this script -more than one time will not hurt anything. -2. `update_git_modules` -- this script simply causes each submodule to be -updated to the latest commit in the original repository. Again, running it -multiple times doesn't hurt anything. +## Where Does MultiMarkdown 6 Stand? ## -## Source File Templates ## +### Features ### -In the `templates` directory are two files, `template.c.in` and -`template.h.in`. These are used to create default source files that include -the project title, copyright, license, etc. They are also set up to include -some example information for [Doxygen] and [CuTest]. +I *think* that all basic Markdown features have been implemented, except for +Setext headers, as mentioned above. Additionally, the following MultiMarkdown +features have been implemented: + +* Automatic cross-reference targets +* Basic Citation support +* CriticMarkup support +* Inline and reference footnotes +* Image and Link attributes (attributes can now be used with inline links as + well as reference links) +* Math support +* Smart quotes (support for languages other than english is not fully + implemented yet) +* Superscripts/subscripts + + +Things that are partially completed: + +* Citations -- still need: + * Syntax for "not cited" entries + * Output format + * HTML --> separate footnotes and citations? + * Locators required? +* CriticMarkup -- need to decide: + * How to handle CM stretches that include blank lines +* Fenced code blocks + + +Things yet to be completed: + +* Multiple blocks inside of reference footnotes +* Manually specified labels for headers +* Definition lists +* Abbreviations +* Metadata +* Glossaries +* Tables +* Table of Contents +* File Transclusion + + +### Accuracy ### + +MultiMarkdown v6 successfully parses the Markdown [syntax page], except for +the Setext header at the top. It passes the 29 test files currently in place. +There are a few ad + +[syntax page]: https://daringfireball.net/projects/markdown/syntax + + +### Performance ### + +Basic tests show that currently MMD 6 takes about 20-25% longer the CommonMark +0.27.0 to process long files (e.g. 0.2 MB). However, it is around 5% *faster* +than CommonMark when parsing a shorter file (27 kB) (measured by parsing the +same file 200 times over). This test suite is performed by using the Markdown +[syntax page], modified to avoid the use of the Setext header at the top. The +longer files tested are created by copying the same syntax page onto itself, +thereby doubling the length of the file with each iteration. + +The largest file I test is approximately 108 MB (4096 copies of the syntax +page). On my machine (2012 Mac mini with 2.3 GHz Intel Core i7, 16 GB RAM), +it takes approximately 4.4 seconds to parse with MMD 6 and 3.7 seconds with +CommonMark. MMD 6 processes approximately 25 MB/s on this test file. +CommonMark 0.27.0 gets about 29 MB/s on the same machine. + +There are some slight variations with the smaller test files (8-32 copies), +but overall the performance of both programs (MMD 6 and CommonMark) are +roughly linear as the test file gets bigger (double the file size and it takes +twice as long to parse, aka O(n)). + +Out of curiosity, I ran the same tests on the original Markdown.pl by Gruber +(v 1.0.2b8). It took approximately 178 seconds to parse 128 copies of the +file (3.4 MB) and was demonstrating quadratic performance characteristics +(double the file size and it takes 2^2 or 4 times longer to process, aka +O(n^2)). I didn't bother running it on larger versions of the test file. For +comparison, MMD 6 can process 128 copies in approximately 140 msec. + +Of note, the throughput speed drops when testing more complicated files +containing more advanced MultiMarkdown features, though it still seems to +maintain linear performance characteristics. A second test file is created by +concatenating all of the test suite files (including the Markdown syntax +file). In this case, MMD gets about 13 MB/s. CommonMark doesn't support +these additional features, so testing it with that file is not relevant. I +will work to see whether there are certain features in particular that are +more challenging and see whether they can be reworked to improve performance. + +As above, I have done some high level optimization of the parse strategy, but +I'm sure there's still a lot of room for further improvement to be made. +Suggestions welcome! ## License ## -@My_Project_License@ + @My_Project_License_Indented@ diff --git a/templates/template.c.in b/templates/template.c.in index e9830d4..45897c4 100644 --- a/templates/template.c.in +++ b/templates/template.c.in @@ -17,7 +17,7 @@ @My_Project_Copyright@ - @My_Project_License_Indent@ + @My_Project_License_Indented@ */ diff --git a/templates/template.h.in b/templates/template.h.in index 8709eca..ab64f45 100644 --- a/templates/template.h.in +++ b/templates/template.h.in @@ -17,7 +17,7 @@ @My_Project_Copyright@ - @My_Project_License_Indent@ + @My_Project_License_Indented@ */ diff --git a/test/speed-full.sh b/test/speed-full.sh new file mode 100755 index 0000000..f8c68c8 --- /dev/null +++ b/test/speed-full.sh @@ -0,0 +1,59 @@ +#!/bin/bash +cp ../tests/MMD6Tests/Markdown\ Syntax.text ../build/speed.txt + +cd ../build; + + +echo "MMD 6 - 8" +cat speed.txt{,}{,}{,} > speeda.txt +cat speeda.txt > speedbig.txt +/usr/bin/env time -p ./multimarkdown speedbig.txt > /dev/null + +echo "MMD 6 - 16" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown speedbig.txt > /dev/null + +echo "MMD 6 - 32" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown speedbig.txt > /dev/null + +echo "MMD 6 - 64" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown speedbig.txt > /dev/null + +echo "MMD 6 - 128" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown speedbig.txt > /dev/null + +echo "MMD 6 - 256" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown speedbig.txt > /dev/null + +echo "MMD 6 - 512" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown speedbig.txt > /dev/null + +echo "MMD 6 - 1024" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown speedbig.txt > /dev/null + +echo "MMD 6 - 2048" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown speedbig.txt > /dev/null + +echo "MMD 6 - 4096" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown speedbig.txt > /dev/null + +rm speed.txt +rm speedbig.txt +rm speeda.txt diff --git a/test/speed.sh b/test/speed.sh new file mode 100755 index 0000000..3421352 --- /dev/null +++ b/test/speed.sh @@ -0,0 +1,59 @@ +#!/bin/bash +cp ../tests/MMD6Tests/Markdown\ Syntax.text ../build/speed.txt + +cd ../build; + + +echo "MMD 6 - 8" +cat speed.txt{,}{,}{,} > speeda.txt +cat speeda.txt > speedbig.txt +/usr/bin/env time -p ./multimarkdown -c speedbig.txt > /dev/null + +echo "MMD 6 - 16" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown -c speedbig.txt > /dev/null + +echo "MMD 6 - 32" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown -c speedbig.txt > /dev/null + +echo "MMD 6 - 64" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown -c speedbig.txt > /dev/null + +echo "MMD 6 - 128" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown -c speedbig.txt > /dev/null + +echo "MMD 6 - 256" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown -c speedbig.txt > /dev/null + +echo "MMD 6 - 512" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown -c speedbig.txt > /dev/null + +echo "MMD 6 - 1024" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown -c speedbig.txt > /dev/null + +echo "MMD 6 - 2048" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown -c speedbig.txt > /dev/null + +echo "MMD 6 - 4096" +cat speeda.txt >> speedbig.txt +cat speedbig.txt > speeda.txt +/usr/bin/env time -p ./multimarkdown -c speedbig.txt > /dev/null + +rm speed.txt +rm speedbig.txt +rm speeda.txt diff --git a/tests/Disabled/Advanced Footnotes.text b/tests/Disabled/Advanced Footnotes.text new file mode 100644 index 0000000..09c21fd --- /dev/null +++ b/tests/Disabled/Advanced Footnotes.text @@ -0,0 +1,17 @@ +Reference.[^foo] + +Reference.[^foo2] + +Reference.[^foo3] + + +[^foo]: This is a *short* footnote. +[^foo2]: This is a longer footnote. + + With two paragraphs. + +[^foo3]: This is a longer footnote. + + * With + * a + * list diff --git a/tests/Disabled/Advanced.html b/tests/Disabled/Advanced.html new file mode 100644 index 0000000..7f0bf0e --- /dev/null +++ b/tests/Disabled/Advanced.html @@ -0,0 +1,5 @@ +
    \begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}
    +
    + +
    \begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}
    +
    diff --git a/tests/Disabled/Advanced.text b/tests/Disabled/Advanced.text new file mode 100644 index 0000000..e25850c --- /dev/null +++ b/tests/Disabled/Advanced.text @@ -0,0 +1,13 @@ +``` +\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation} +``` + +```latex +\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation} +``` + + +A slightly more [complicated *example] string* with *improperly* nested structures. + +[complicated *example]: http://foo.bar/ + diff --git a/tests/Disabled/zEmph and Strong Complex.html b/tests/Disabled/zEmph and Strong Complex.html new file mode 100644 index 0000000..3de46b5 --- /dev/null +++ b/tests/Disabled/zEmph and Strong Complex.html @@ -0,0 +1,5 @@ +

    foobarfoo

    + +

    foo foobarfoo

    + +

    foobarfoo foo

    diff --git a/tests/Disabled/zEmph and Strong Complex.text b/tests/Disabled/zEmph and Strong Complex.text new file mode 100644 index 0000000..babca82 --- /dev/null +++ b/tests/Disabled/zEmph and Strong Complex.text @@ -0,0 +1,5 @@ +**foo*bar*foo** + +**foo foo*bar*foo** + +**foo*bar*foo foo** diff --git a/tests/MMD6Tests/Advanced Emph and Strong.html b/tests/MMD6Tests/Advanced Emph and Strong.html new file mode 100644 index 0000000..0799b20 --- /dev/null +++ b/tests/MMD6Tests/Advanced Emph and Strong.html @@ -0,0 +1,59 @@ +

    foobar

    + +

    foo*bar**

    + +

    foo*bar***

    + +

    foo**bar*

    + +

    foobar

    + +

    5

    + +

    foo**bar***

    + +

    foo***bar*

    + +

    foo***bar**

    + +

    foobar

    + +

    foobar

    + +

    10

    + +

    *foo**bar

    + +

    foobar*

    + +

    foo**bar

    + +

    foobar

    + +

    foo****bar

    + +

    15

    + +

    foobarfoo

    + +

    **foo*bar

    + +

    foo*bar

    + +

    foobar**

    + +

    foobar

    + +

    20

    + +

    foo****bar

    + +

    foo foobarfoo foo

    + +

    foobar

    + +

    foobar

    + +

    abduct instead of abduct

    + +

    25

    diff --git a/tests/MMD6Tests/Advanced Emph and Strong.htmlc b/tests/MMD6Tests/Advanced Emph and Strong.htmlc new file mode 100644 index 0000000..0799b20 --- /dev/null +++ b/tests/MMD6Tests/Advanced Emph and Strong.htmlc @@ -0,0 +1,59 @@ +

    foobar

    + +

    foo*bar**

    + +

    foo*bar***

    + +

    foo**bar*

    + +

    foobar

    + +

    5

    + +

    foo**bar***

    + +

    foo***bar*

    + +

    foo***bar**

    + +

    foobar

    + +

    foobar

    + +

    10

    + +

    *foo**bar

    + +

    foobar*

    + +

    foo**bar

    + +

    foobar

    + +

    foo****bar

    + +

    15

    + +

    foobarfoo

    + +

    **foo*bar

    + +

    foo*bar

    + +

    foobar**

    + +

    foobar

    + +

    20

    + +

    foo****bar

    + +

    foo foobarfoo foo

    + +

    foobar

    + +

    foobar

    + +

    abduct instead of abduct

    + +

    25

    diff --git a/tests/MMD6Tests/Advanced Emph and Strong.text b/tests/MMD6Tests/Advanced Emph and Strong.text new file mode 100644 index 0000000..5af2235 --- /dev/null +++ b/tests/MMD6Tests/Advanced Emph and Strong.text @@ -0,0 +1,59 @@ +foo*bar* + +foo*bar** + +foo*bar*** + +foo**bar* + +foo**bar** + +5 + +foo**bar*** + +foo***bar* + +foo***bar** + +foo***bar*** + +*foo*bar + +10 + +*foo**bar + +*foo*bar* + +*foo**bar* + +*foo***bar** + +*foo****bar* + +15 + +*foo**bar**foo* + +**foo*bar + +**foo*bar** + +**foo**bar** + +**foo***bar* + +20 + +**foo****bar** + +**foo foo*bar*foo foo** + +***foo*bar** + +***foo**bar* + +*ab*duct instead of ab*duct* + +25 diff --git a/tests/MMD6Tests/Amps and Angles.html b/tests/MMD6Tests/Amps and Angles.html new file mode 100644 index 0000000..6fc4e57 --- /dev/null +++ b/tests/MMD6Tests/Amps and Angles.html @@ -0,0 +1,24 @@ +

    AT&T has an ampersand in their name.

    + +

    AT&T is another way to write it.

    + +

    This & that.

    + +

    4 < 5.

    + +

    6 > 5.

    + +

    5

    + +

    Here is a link with an ampersand in the URL.

    + +

    Here is a link with an amersand in the link text: AT&T.

    + +

    Here is an inline link.

    + +

    Here is an inline link.

    + +
    & and &amp; and < and > in code block.
    +
    + +

    10

    diff --git a/tests/MMD6Tests/Amps and Angles.htmlc b/tests/MMD6Tests/Amps and Angles.htmlc new file mode 100644 index 0000000..6fc4e57 --- /dev/null +++ b/tests/MMD6Tests/Amps and Angles.htmlc @@ -0,0 +1,24 @@ +

    AT&T has an ampersand in their name.

    + +

    AT&T is another way to write it.

    + +

    This & that.

    + +

    4 < 5.

    + +

    6 > 5.

    + +

    5

    + +

    Here is a link with an ampersand in the URL.

    + +

    Here is a link with an amersand in the link text: AT&T.

    + +

    Here is an inline link.

    + +

    Here is an inline link.

    + +
    & and &amp; and < and > in code block.
    +
    + +

    10

    diff --git a/tests/MMD6Tests/Amps and Angles.text b/tests/MMD6Tests/Amps and Angles.text new file mode 100644 index 0000000..3ca9d46 --- /dev/null +++ b/tests/MMD6Tests/Amps and Angles.text @@ -0,0 +1,26 @@ +AT&T has an ampersand in their name. + +AT&T is another way to write it. + +This & that. + +4 < 5. + +6 > 5. + +5 + +Here is a [link][1] with an ampersand in the URL. + +Here is a link with an amersand in the link text: [AT&T][2]. + +Here is an inline [link](/script?foo=1&bar=2). + +Here is an inline [link](). + + & and & and < and > in code block. + +10 + +[1]: http://example.com/?foo=1&bar=2 +[2]: http://att.com/ "AT&T" diff --git a/tests/MMD6Tests/Automatic Links.html b/tests/MMD6Tests/Automatic Links.html new file mode 100644 index 0000000..7964b5c --- /dev/null +++ b/tests/MMD6Tests/Automatic Links.html @@ -0,0 +1,3 @@ +

    http://foo.com/

    + +

    foo@bar.com

    diff --git a/tests/MMD6Tests/Automatic Links.htmlc b/tests/MMD6Tests/Automatic Links.htmlc new file mode 100644 index 0000000..7964b5c --- /dev/null +++ b/tests/MMD6Tests/Automatic Links.htmlc @@ -0,0 +1,3 @@ +

    http://foo.com/

    + +

    foo@bar.com

    diff --git a/tests/MMD6Tests/Automatic Links.text b/tests/MMD6Tests/Automatic Links.text new file mode 100644 index 0000000..60da45f --- /dev/null +++ b/tests/MMD6Tests/Automatic Links.text @@ -0,0 +1,3 @@ + + + diff --git a/tests/MMD6Tests/Basic Blocks.html b/tests/MMD6Tests/Basic Blocks.html new file mode 100644 index 0000000..fb63a8f --- /dev/null +++ b/tests/MMD6Tests/Basic Blocks.html @@ -0,0 +1,16 @@ +

    foo

    + +

    Heading foo

    + +

    Heading

    + +

    foo +bar

    + +

    5

    + +

    foo +bar

    + +

    foo +bar

    diff --git a/tests/MMD6Tests/Basic Blocks.htmlc b/tests/MMD6Tests/Basic Blocks.htmlc new file mode 100644 index 0000000..21822d1 --- /dev/null +++ b/tests/MMD6Tests/Basic Blocks.htmlc @@ -0,0 +1,16 @@ +

    foo

    + +

    Heading foo

    + +

    Heading

    + +

    foo +bar

    + +

    5

    + +

    foo +bar

    + +

    foo +bar

    diff --git a/tests/MMD6Tests/Basic Blocks.text b/tests/MMD6Tests/Basic Blocks.text new file mode 100644 index 0000000..92c9486 --- /dev/null +++ b/tests/MMD6Tests/Basic Blocks.text @@ -0,0 +1,17 @@ +foo + +# Heading *foo* # + +## Heading ## + +foo +bar + +5 + +foo + bar + +foo + bar + diff --git a/tests/MMD6Tests/Basic Lists.html b/tests/MMD6Tests/Basic Lists.html new file mode 100644 index 0000000..bd29e7c --- /dev/null +++ b/tests/MMD6Tests/Basic Lists.html @@ -0,0 +1,74 @@ +
      +
    • foo
    • +
    • foo
    • +
    • foo
    • +
    + +

    bar

    + +
      +
    • foo

    • +
    • foo

    • +
    • foo

    • +
    + +

    bar

    + +
      +
    • foo

    • +
    • foo

    • +
    • foo

    • +
    + +

    5

    + +
      +
    1. foo
    2. +
    3. foo
    4. +
    5. foo
    6. +
    + +

    bar

    + +
      +
    1. foo

    2. +
    3. foo

    4. +
    5. foo

    6. +
    + +

    bar

    + +
      +
    1. foo

    2. +
    3. foo

    4. +
    5. foo

    6. +
    + +

    10

    + +
      +
    • foo
    • +
    • foo
    • +
    • foo
    • +
    + +

    bar

    + +
      +
    • foo
    • +
    • foo
    • +
    • foo
    • +
    + +

    bar

    + +
      +
    • foo +bar
    • +
    • foo +bar
    • +
    • foo +bar
    • +
    + +

    15

    diff --git a/tests/MMD6Tests/Basic Lists.htmlc b/tests/MMD6Tests/Basic Lists.htmlc new file mode 100644 index 0000000..bd29e7c --- /dev/null +++ b/tests/MMD6Tests/Basic Lists.htmlc @@ -0,0 +1,74 @@ +
      +
    • foo
    • +
    • foo
    • +
    • foo
    • +
    + +

    bar

    + +
      +
    • foo

    • +
    • foo

    • +
    • foo

    • +
    + +

    bar

    + +
      +
    • foo

    • +
    • foo

    • +
    • foo

    • +
    + +

    5

    + +
      +
    1. foo
    2. +
    3. foo
    4. +
    5. foo
    6. +
    + +

    bar

    + +
      +
    1. foo

    2. +
    3. foo

    4. +
    5. foo

    6. +
    + +

    bar

    + +
      +
    1. foo

    2. +
    3. foo

    4. +
    5. foo

    6. +
    + +

    10

    + +
      +
    • foo
    • +
    • foo
    • +
    • foo
    • +
    + +

    bar

    + +
      +
    • foo
    • +
    • foo
    • +
    • foo
    • +
    + +

    bar

    + +
      +
    • foo +bar
    • +
    • foo +bar
    • +
    • foo +bar
    • +
    + +

    15

    diff --git a/tests/MMD6Tests/Basic Lists.text b/tests/MMD6Tests/Basic Lists.text new file mode 100644 index 0000000..04bd226 --- /dev/null +++ b/tests/MMD6Tests/Basic Lists.text @@ -0,0 +1,64 @@ +* foo +* foo +* foo + + +bar + +* foo + +* foo + +* foo + + +bar + +* foo +* foo + +* foo + +5 + +1. foo +2. foo +3. foo + +bar + +1. foo + +2. foo + +3. foo + +bar + +1. foo +2. foo + +3. foo + +10 + ++ foo ++ foo ++ foo + +bar + +- foo +- foo +- foo + +bar + +* foo + bar +* foo + bar +* foo + bar + +15 diff --git a/tests/MMD6Tests/Blockquotes.html b/tests/MMD6Tests/Blockquotes.html new file mode 100644 index 0000000..94ac71c --- /dev/null +++ b/tests/MMD6Tests/Blockquotes.html @@ -0,0 +1,35 @@ +
    +

    foo +bar

    +
    + +
    +

    foo

    + +
    +

    bar +foo

    +
    +
    + +
    +

    foo

    + +
    +

    bar

    +
    + +

    foo

    +
    + +
    +

    foo +bar + foo

    + +
    	bar
    +foo
    +
    + +

    bar

    +
    diff --git a/tests/MMD6Tests/Blockquotes.htmlc b/tests/MMD6Tests/Blockquotes.htmlc new file mode 100644 index 0000000..94ac71c --- /dev/null +++ b/tests/MMD6Tests/Blockquotes.htmlc @@ -0,0 +1,35 @@ +
    +

    foo +bar

    +
    + +
    +

    foo

    + +
    +

    bar +foo

    +
    +
    + +
    +

    foo

    + +
    +

    bar

    +
    + +

    foo

    +
    + +
    +

    foo +bar + foo

    + +
    	bar
    +foo
    +
    + +

    bar

    +
    diff --git a/tests/MMD6Tests/Blockquotes.text b/tests/MMD6Tests/Blockquotes.text new file mode 100644 index 0000000..f983ae3 --- /dev/null +++ b/tests/MMD6Tests/Blockquotes.text @@ -0,0 +1,19 @@ +> foo +> bar + +> foo +>> bar +> foo + +> foo +> > bar +> +> foo + +> foo +> bar +> foo +> +> bar +> foo +>bar diff --git a/tests/MMD6Tests/Citations.html b/tests/MMD6Tests/Citations.html new file mode 100644 index 0000000..7976de5 --- /dev/null +++ b/tests/MMD6Tests/Citations.html @@ -0,0 +1,18 @@ +

    [1]

    + +

    [p. 123][1]

    + +

    [][1]

    + +

    [Not Cited][1]

    + +
    +
    +
      + +
    1. +

      John Doe. A Totally Fake Book. Vanity Press, 2006.  ↩

      +
    2. + +
    +
    diff --git a/tests/MMD6Tests/Citations.htmlc b/tests/MMD6Tests/Citations.htmlc new file mode 100644 index 0000000..2a77d6e --- /dev/null +++ b/tests/MMD6Tests/Citations.htmlc @@ -0,0 +1,9 @@ +

    [#first]

    + +

    [p. 123][#first]

    + +

    [][#first]

    + +

    [Not Cited][#first]

    + +

    [#first]: John Doe. A Totally Fake Book. Vanity Press, 2006.

    diff --git a/tests/MMD6Tests/Citations.text b/tests/MMD6Tests/Citations.text new file mode 100644 index 0000000..6540e77 --- /dev/null +++ b/tests/MMD6Tests/Citations.text @@ -0,0 +1,10 @@ +[#first] + +[p. 123][#first] + +[][#first] + +[Not Cited][#first] + + +[#first]: John Doe. *A Totally Fake Book*. Vanity Press, 2006. diff --git a/tests/MMD6Tests/Code Spans.html b/tests/MMD6Tests/Code Spans.html new file mode 100644 index 0000000..28f256a --- /dev/null +++ b/tests/MMD6Tests/Code Spans.html @@ -0,0 +1,34 @@ +

    foo

    + +

    foo ` bar

    + +

    ``

    + +

    foo``bar

    + +

    ``foo`

    + +

    5

    + +

    `foo``

    + +

    foo

    + +

    foo bar +baz

    + +

    foo `` bar

    + +

    foo

    + +

    10

    + +

    foo bar

    + +

    *foo*

    + +

    [foo]

    + +

    -<>--&\&---...

    + +

    `foo`

    diff --git a/tests/MMD6Tests/Code Spans.htmlc b/tests/MMD6Tests/Code Spans.htmlc new file mode 100644 index 0000000..28f256a --- /dev/null +++ b/tests/MMD6Tests/Code Spans.htmlc @@ -0,0 +1,34 @@ +

    foo

    + +

    foo ` bar

    + +

    ``

    + +

    foo``bar

    + +

    ``foo`

    + +

    5

    + +

    `foo``

    + +

    foo

    + +

    foo bar +baz

    + +

    foo `` bar

    + +

    foo

    + +

    10

    + +

    foo bar

    + +

    *foo*

    + +

    [foo]

    + +

    -<>--&\&---...

    + +

    `foo`

    diff --git a/tests/MMD6Tests/Code Spans.text b/tests/MMD6Tests/Code Spans.text new file mode 100644 index 0000000..e27ed95 --- /dev/null +++ b/tests/MMD6Tests/Code Spans.text @@ -0,0 +1,36 @@ +`foo` + +`` foo ` bar `` + +` `` ` + +`foo``bar` + +``foo` + +5 + +`foo`` + +`` +foo +`` + +`foo bar + baz` + +`foo `` bar` + +` foo ` + +10 + +` foo bar ` + +`*foo*` + +`[foo]` + +`-<>--&\&---...` + +`` `foo` `` diff --git a/tests/MMD6Tests/CriticMarkup.html b/tests/MMD6Tests/CriticMarkup.html new file mode 100644 index 0000000..4ae2297 --- /dev/null +++ b/tests/MMD6Tests/CriticMarkup.html @@ -0,0 +1,59 @@ +

    foo

    + +

    bar

    + +

    foobar

    + +

    foo

    + +

    bar

    + +

    5

    + +

    foo bar

    + +

    foo bar

    + +

    foo foobar

    + +

    foo bar

    + +

    foo bar

    + +

    10

    + +

    foo bar

    + +

    foo bar

    + +

    foo barbar

    + +

    foo bar

    + +

    foo bar

    + +

    15

    + +

    foo bar

    + +

    foo bar

    + +

    foo foobar

    + +

    foo bar

    + +

    foo bar

    + +

    20

    + +

    foo **bar**

    + +

    foo **bar**

    + +

    foo **foo**bar**

    + +

    foo **bar**

    + +

    foo **bar**

    + +

    25

    diff --git a/tests/MMD6Tests/CriticMarkup.htmlc b/tests/MMD6Tests/CriticMarkup.htmlc new file mode 100644 index 0000000..11d33ae --- /dev/null +++ b/tests/MMD6Tests/CriticMarkup.htmlc @@ -0,0 +1,59 @@ +

    {++foo++}

    + +

    {--bar--}

    + +

    {~~foo~>bar~~}

    + +

    {>>foo<<}

    + +

    {==bar==}

    + +

    5

    + +

    foo{++ bar++}

    + +

    foo{-- bar--}

    + +

    foo {~~foo~>bar~~}

    + +

    foo {>>bar<<}

    + +

    foo {==bar==}

    + +

    10

    + +

    foo{++ bar++}

    + +

    foo{-- bar--}

    + +

    foo {~~bar~>bar~~}

    + +

    foo {>>bar<<}

    + +

    foo {==bar==}

    + +

    15

    + +

    foo {++bar++}

    + +

    foo {--bar--}

    + +

    foo {~~foo~>bar~~}

    + +

    foo {>>bar<<}

    + +

    foo {==bar==}

    + +

    20

    + +

    foo {++bar++}

    + +

    foo {--bar--}

    + +

    foo {~~foo~>bar**~~}

    + +

    foo {>>bar<<}

    + +

    foo {==bar==}

    + +

    25

    diff --git a/tests/MMD6Tests/CriticMarkup.text b/tests/MMD6Tests/CriticMarkup.text new file mode 100644 index 0000000..20d7b11 --- /dev/null +++ b/tests/MMD6Tests/CriticMarkup.text @@ -0,0 +1,59 @@ +{++foo++} + +{--bar--} + +{~~foo~>bar~~} + +{>>foo<<} + +{==bar==} + +5 + +foo{++ bar++} + +foo{-- bar--} + +foo {~~foo~>bar~~} + +foo {>>bar<<} + +foo {==bar==} + +10 + +foo{++ **bar**++} + +foo{-- **bar**--} + +foo {~~**bar**~>**bar**~~} + +foo {>>**bar**<<} + +foo {==**bar**==} + +15 + +foo **{++bar++}** + +foo **{--bar--}** + +foo **{~~foo~>bar~~}** + +foo **{>>bar<<}** + +foo **{==bar==}** + +20 + +foo **{++bar**++} + +foo **{--bar**--} + +foo **{~~foo**~>bar**~~} + +foo **{>>bar**<<} + +foo **{==bar**==} + +25 diff --git a/tests/MMD6Tests/Cross-References.html b/tests/MMD6Tests/Cross-References.html new file mode 100644 index 0000000..b7a152a --- /dev/null +++ b/tests/MMD6Tests/Cross-References.html @@ -0,0 +1,21 @@ +

    A Section

    + +

    109&*&#()^ Can Start With Digit

    + +

    Strip out &%^ characters &*^

    + +

    A Section.

    + +

    1 Cross-References: Special Characters!@#$%&*()<>^

    + +

    5

    + +

    And now, link to 1 Cross-References: Special Characters!@#$%&*()<>^

    + +

    Заголовок по-русски

    + +

    И ссылка на Заголовок по-русски.

    + +

    Test 的 Multibyte

    + +

    10

    diff --git a/tests/MMD6Tests/Cross-References.htmlc b/tests/MMD6Tests/Cross-References.htmlc new file mode 100644 index 0000000..9bbb4d6 --- /dev/null +++ b/tests/MMD6Tests/Cross-References.htmlc @@ -0,0 +1,21 @@ +

    A Section

    + +

    109&*&#()^ Can Start With Digit

    + +

    Strip out &%^ characters &*^

    + +

    [A Section].

    + +

    1 Cross-References: Special Characters!@#$%&*()<>^

    + +

    5

    + +

    And now, link to [1 Cross-References: Special Characters!@#$%&*()<>^][]

    + +

    Заголовок по-русски

    + +

    И ссылка на [Заголовок по-русски].

    + +

    Test 的 Multibyte

    + +

    10

    diff --git a/tests/MMD6Tests/Cross-References.text b/tests/MMD6Tests/Cross-References.text new file mode 100644 index 0000000..bd4d649 --- /dev/null +++ b/tests/MMD6Tests/Cross-References.text @@ -0,0 +1,21 @@ +# A Section # + +# 109&*&#()^ Can Start With Digit # + +# Strip out &%^ characters &*^ # + +[A Section]. + +## 1 Cross-References: Special Characters!@#$%&*()<>^ ## + +5 + +And now, link to [1 Cross-References: Special Characters!@#$%&*()<>^][] + +# Заголовок по-русски # + +И ссылка на [Заголовок по-русски]. + +# Test 的 Multibyte # + +10 diff --git a/tests/MMD6Tests/Edge Cases 2.html b/tests/MMD6Tests/Edge Cases 2.html new file mode 100644 index 0000000..54b2dfd --- /dev/null +++ b/tests/MMD6Tests/Edge Cases 2.html @@ -0,0 +1,70 @@ +

    b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t

    diff --git a/tests/MMD6Tests/Edge Cases 2.htmlc b/tests/MMD6Tests/Edge Cases 2.htmlc new file mode 100644 index 0000000..54b2dfd --- /dev/null +++ b/tests/MMD6Tests/Edge Cases 2.htmlc @@ -0,0 +1,70 @@ +

    b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t +b, u, v, w, x, y, z, t w t

    diff --git a/tests/MMD6Tests/Edge Cases 2.text b/tests/MMD6Tests/Edge Cases 2.text new file mode 100644 index 0000000..2b15883 --- /dev/null +++ b/tests/MMD6Tests/Edge Cases 2.text @@ -0,0 +1,71 @@ + +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* +**b**, _u_, __v__, *__w__*, _**x**_, ***y***, ___z___, *t __w__ t* diff --git a/tests/MMD6Tests/Edge Cases.html b/tests/MMD6Tests/Edge Cases.html new file mode 100644 index 0000000..fe61d34 --- /dev/null +++ b/tests/MMD6Tests/Edge Cases.html @@ -0,0 +1,89 @@ +
      +
    • foo +bar
    • +
    + +

    foo

    + +
      +
    • bar
    • +
    + +

    foo

    + +
      +
    1. bar
    2. +
    + +

    foo

    + +
      +
    1. bar
    2. +
    + +

    foo

    + +
    +

    bar

    +
    + +

    5

    + +

    foo

    + +

    bar

    + +

    foo +bar

    + +

    The quick brown fox jumped

    + +

    The quick brown fox jumped

    + +

    The quick brown fox jumped

    + +

    10

    + +

    The quick brown fox jumped

    + +

    The quick brown fox jumped

    + +

    The quick brown fox jumped

    + +

    This should be parsed – fo***o

    + +

    test thisthing

    + +

    15

    + +

    _test this_thing

    + +

    test thisthing

    + +

    __test this__thing

    + +

    test thisthing

    + +

    ___test this___thing

    + +

    20

    + +

    This is another test of italics and bold.

    + +

    This is another test of bold and italics.

    + +

    This is another test of bold and italics.

    + +

    This is another test of italics and bold.

    + +

    This is another test of italics and bold.

    + +

    25

    + +

    This is another test of bold and italics.

    + +

    This is another test of bold and italics.

    + +

    This is another test of italics and bold.

    + +

    *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a

    diff --git a/tests/MMD6Tests/Edge Cases.htmlc b/tests/MMD6Tests/Edge Cases.htmlc new file mode 100644 index 0000000..6e1a2c5 --- /dev/null +++ b/tests/MMD6Tests/Edge Cases.htmlc @@ -0,0 +1,89 @@ +
      +
    • foo +bar
    • +
    + +

    foo

    + +
      +
    • bar
    • +
    + +

    foo

    + +
      +
    1. bar
    2. +
    + +

    foo

    + +
      +
    1. bar
    2. +
    + +

    foo

    + +
    +

    bar

    +
    + +

    5

    + +

    foo

    + +

    bar

    + +

    foo +bar

    + +

    The quick brown fox jumped

    + +

    The quick brown fox jumped

    + +

    The quick brown fox jumped

    + +

    10

    + +

    The quick brown fox jumped

    + +

    The quick brown fox jumped

    + +

    The quick brown fox jumped

    + +

    This should be parsed -- fo***o

    + +

    test thisthing

    + +

    15

    + +

    _test this_thing

    + +

    test thisthing

    + +

    __test this__thing

    + +

    test thisthing

    + +

    ___test this___thing

    + +

    20

    + +

    This is another test of italics and bold.

    + +

    This is another test of bold and italics.

    + +

    This is another test of bold and italics.

    + +

    This is another test of italics and bold.

    + +

    This is another test of italics and bold.

    + +

    25

    + +

    This is another test of bold and italics.

    + +

    This is another test of bold and italics.

    + +

    This is another test of italics and bold.

    + +

    *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a

    diff --git a/tests/MMD6Tests/Edge Cases.text b/tests/MMD6Tests/Edge Cases.text new file mode 100644 index 0000000..f44df90 --- /dev/null +++ b/tests/MMD6Tests/Edge Cases.text @@ -0,0 +1,74 @@ +* foo +bar + +foo +* bar + +foo +1. bar + +foo +2. bar + +foo +> bar + +5 + +foo +# bar + +foo + bar + +The ***quick*** brown ***fox*** jumped + +The ***quick*** brown fox jumped + +The ***quick** brown fox* jumped + +10 + +The ***quick* brown fox** jumped + +The ***quick* brown *fox*** jumped + +The ***quick** brown **fox*** jumped + +This *should* be parsed -- fo***o + +*test this*thing + +15 + +_test this_thing + +**test this**thing + +__test this__thing + +***test this***thing + +___test this___thing + +20 + +This is ***another* test** of *italics* and **bold**. + +This is ***another* test** of **bold** and *italics*. + +This is ***another** test* of **bold** and *italics*. + +This is ***another** test* of *italics* and **bold**. + +This is ___another_ test__ of _italics_ and __bold__. + +25 + +This is ___another_ test__ of __bold__ and _italics_. + +This is ___another__ test_ of __bold__ and _italics_. + +This is ___another__ test_ of _italics_ and __bold__. + +*a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a *a diff --git a/tests/MMD6Tests/Emph and Strong Star.html b/tests/MMD6Tests/Emph and Strong Star.html new file mode 100644 index 0000000..0fc31b0 --- /dev/null +++ b/tests/MMD6Tests/Emph and Strong Star.html @@ -0,0 +1,195 @@ +

    foo

    + +

    *foo

    + +

    foo*

    + +

    foo bar

    + +

    foo bar

    + +

    5

    + +

    foo bar

    + +

    foo

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    10

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    15

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo

    + +

    20

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    foobar*

    + +

    25

    + +

    foo bar foo bar

    + +

    foo bar foo bar

    + +

    foo barbar

    + +

    foo barbar

    + +

    foo barbar

    + +

    30

    + +

    foobarfoo

    + +

    foobarfoo

    + +

    foobarfoo

    + +

    foobarfoo

    + +

    foo foobarfoo foo

    + +

    35

    + +

    foo*bar*

    + +

    *(foo)

    + +

    foo:

    + +

    foo:

    + +

    foo:

    + +

    40

    + +

    foo*bar

    + +

    foobar foobar

    + +

    foo**bar

    + +

    foobar foobar

    + +

    foo***bar

    + +

    45

    + +

    foobar foobar

    + +

    foo - bar

    + +

    foo 1. bar

    + +

    foo: bar

    + +

    *foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +foo

    diff --git a/tests/MMD6Tests/Emph and Strong Star.htmlc b/tests/MMD6Tests/Emph and Strong Star.htmlc new file mode 100644 index 0000000..0fc31b0 --- /dev/null +++ b/tests/MMD6Tests/Emph and Strong Star.htmlc @@ -0,0 +1,195 @@ +

    foo

    + +

    *foo

    + +

    foo*

    + +

    foo bar

    + +

    foo bar

    + +

    5

    + +

    foo bar

    + +

    foo

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    10

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    15

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo

    + +

    20

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    foobar*

    + +

    25

    + +

    foo bar foo bar

    + +

    foo bar foo bar

    + +

    foo barbar

    + +

    foo barbar

    + +

    foo barbar

    + +

    30

    + +

    foobarfoo

    + +

    foobarfoo

    + +

    foobarfoo

    + +

    foobarfoo

    + +

    foo foobarfoo foo

    + +

    35

    + +

    foo*bar*

    + +

    *(foo)

    + +

    foo:

    + +

    foo:

    + +

    foo:

    + +

    40

    + +

    foo*bar

    + +

    foobar foobar

    + +

    foo**bar

    + +

    foobar foobar

    + +

    foo***bar

    + +

    45

    + +

    foobar foobar

    + +

    foo - bar

    + +

    foo 1. bar

    + +

    foo: bar

    + +

    *foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +foo

    diff --git a/tests/MMD6Tests/Emph and Strong Star.text b/tests/MMD6Tests/Emph and Strong Star.text new file mode 100644 index 0000000..06e3146 --- /dev/null +++ b/tests/MMD6Tests/Emph and Strong Star.text @@ -0,0 +1,195 @@ +*foo* + +**foo* + +*foo** + +*foo bar* + +*foo* bar + +5 + +foo *bar* + +**foo** + +**foo bar** + +**foo** bar + +foo **bar** + +10 + +*foo *bar* foo* + +*foo **bar** foo* + +*foo ***bar*** foo* + +**foo *bar* foo** + +**foo **bar** foo** + +15 + +**foo ***bar*** foo** + +***foo *bar* foo*** + +***foo **bar** foo*** + +***foo ***bar*** foo*** + +***foo*** + +20 + +***foo** bar* + +***foo* bar** + +*foo **bar*** + +**foo *bar*** + +*foo*bar* + +25 + +*foo *bar *foo *bar**** + +****foo* bar* foo* bar* + +*foo bar*bar + +**foo bar**bar + +***foo bar***bar + +30 + +foo*bar*foo + +foo**bar**foo + +foo***bar***foo + +foo*bar*foo + +**foo *foobarfoo* foo** + +35 + +foo**`*bar*`** + +*(*foo*) + +*foo*: + +**foo**: + +***foo***: + +40 + +foo*bar + +foo*bar foo*bar + +foo**bar + +foo**bar foo**bar + +foo***bar + +45 + +foo***bar foo***bar + +foo **- bar** + +foo **1. bar** + +**foo:** bar + +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo +*foo* diff --git a/tests/MMD6Tests/Emph and Strong UL.html b/tests/MMD6Tests/Emph and Strong UL.html new file mode 100644 index 0000000..f1fd99a --- /dev/null +++ b/tests/MMD6Tests/Emph and Strong UL.html @@ -0,0 +1,193 @@ +

    foo

    + +

    _foo

    + +

    foo_

    + +

    foo bar

    + +

    foo bar

    + +

    5

    + +

    foo bar

    + +

    foo

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    10

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    15

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo

    + +

    20

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    foo_bar

    + +

    25

    + +

    foo bar foo bar

    + +

    foo bar foo bar

    + +

    _foo bar_bar

    + +

    __foo bar__bar

    + +

    ___foo bar___bar

    + +

    30

    + +

    foo_bar_foo

    + +

    foo__bar__foo

    + +

    foo___bar___foo

    + +

    foo_bar_foo

    + +

    foo foobarfoo foo

    + +

    35

    + +

    foo___bar___

    + +

    _(foo)

    + +

    foo:

    + +

    foo:

    + +

    foo:

    + +

    40

    + +

    foo_bar

    + +

    foo_bar foo_bar

    + +

    foo__bar

    + +

    foo__bar foo__bar

    + +

    foo___bar

    + +

    45

    + +

    foo___bar foo___bar

    + +

    foo __- bar__

    + +

    foo 1. bar

    + +

    _foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +foo

    diff --git a/tests/MMD6Tests/Emph and Strong UL.htmlc b/tests/MMD6Tests/Emph and Strong UL.htmlc new file mode 100644 index 0000000..f1fd99a --- /dev/null +++ b/tests/MMD6Tests/Emph and Strong UL.htmlc @@ -0,0 +1,193 @@ +

    foo

    + +

    _foo

    + +

    foo_

    + +

    foo bar

    + +

    foo bar

    + +

    5

    + +

    foo bar

    + +

    foo

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    10

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    15

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo bar foo

    + +

    foo

    + +

    20

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    foo bar

    + +

    foo_bar

    + +

    25

    + +

    foo bar foo bar

    + +

    foo bar foo bar

    + +

    _foo bar_bar

    + +

    __foo bar__bar

    + +

    ___foo bar___bar

    + +

    30

    + +

    foo_bar_foo

    + +

    foo__bar__foo

    + +

    foo___bar___foo

    + +

    foo_bar_foo

    + +

    foo foobarfoo foo

    + +

    35

    + +

    foo___bar___

    + +

    _(foo)

    + +

    foo:

    + +

    foo:

    + +

    foo:

    + +

    40

    + +

    foo_bar

    + +

    foo_bar foo_bar

    + +

    foo__bar

    + +

    foo__bar foo__bar

    + +

    foo___bar

    + +

    45

    + +

    foo___bar foo___bar

    + +

    foo __- bar__

    + +

    foo 1. bar

    + +

    _foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +foo

    diff --git a/tests/MMD6Tests/Emph and Strong UL.text b/tests/MMD6Tests/Emph and Strong UL.text new file mode 100644 index 0000000..f2b5723 --- /dev/null +++ b/tests/MMD6Tests/Emph and Strong UL.text @@ -0,0 +1,193 @@ +_foo_ + +__foo_ + +_foo__ + +_foo bar_ + +_foo_ bar + +5 + +foo _bar_ + +__foo__ + +__foo bar__ + +__foo__ bar + +foo __bar__ + +10 + +_foo _bar_ foo_ + +_foo __bar__ foo_ + +_foo ___bar___ foo_ + +__foo _bar_ foo__ + +__foo __bar__ foo__ + +15 + +__foo ___bar___ foo__ + +___foo _bar_ foo___ + +___foo __bar__ foo___ + +___foo ___bar___ foo___ + +___foo___ + +20 + +___foo__ bar_ + +___foo_ bar__ + +_foo __bar___ + +__foo _bar___ + +_foo_bar_ + +25 + +_foo _bar _foo _bar____ + +____foo_ bar_ foo_ bar_ + +_foo bar_bar + +__foo bar__bar + +___foo bar___bar + +30 + +foo_bar_foo + +foo__bar__foo + +foo___bar___foo + +foo_bar_foo + +__foo _foobarfoo_ foo__ + +35 + +foo__`_bar_`__ + +_(_foo_) + +_foo_: + +__foo__: + +___foo___: + +40 + +foo_bar + +foo_bar foo_bar + +foo__bar + +foo__bar foo__bar + +foo___bar + +45 + +foo___bar foo___bar + +foo __- bar__ + +foo __1. bar__ + +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo +_foo_ diff --git a/tests/MMD6Tests/Escapes.html b/tests/MMD6Tests/Escapes.html new file mode 100644 index 0000000..32a8f31 --- /dev/null +++ b/tests/MMD6Tests/Escapes.html @@ -0,0 +1,92 @@ +

    .

    + +

    !

    + +

    ?

    + +

    ,

    + +

    ;

    + +

    5

    + +

    :

    + +

    "

    + +

    '

    + +

    `

    + +

    ~

    + +

    10

    + +

    (

    + +

    )

    + +

    {

    + +

    }

    + +

    [

    + +

    15

    + +

    ]

    + +

    #

    + +

    $

    + +

    %

    + +

    +

    + +

    20

    + +

    -

    + +

    =

    + +

    <

    + +

    >

    + +

    &

    + +

    25

    + +

    @

    + +

    \

    + +

    /

    + +

    ^

    + +

    *

    + +

    30

    + +

    _

    + +

    |

    + +
    \-
    +\&
    +\%
    +\\
    +\`
    +
    + +

    \- \& \% \\ \`

    + +

    *foo*

    + +

    35

    + +

    _bar_

    + +

    `foo`

    diff --git a/tests/MMD6Tests/Escapes.htmlc b/tests/MMD6Tests/Escapes.htmlc new file mode 100644 index 0000000..32a8f31 --- /dev/null +++ b/tests/MMD6Tests/Escapes.htmlc @@ -0,0 +1,92 @@ +

    .

    + +

    !

    + +

    ?

    + +

    ,

    + +

    ;

    + +

    5

    + +

    :

    + +

    "

    + +

    '

    + +

    `

    + +

    ~

    + +

    10

    + +

    (

    + +

    )

    + +

    {

    + +

    }

    + +

    [

    + +

    15

    + +

    ]

    + +

    #

    + +

    $

    + +

    %

    + +

    +

    + +

    20

    + +

    -

    + +

    =

    + +

    <

    + +

    >

    + +

    &

    + +

    25

    + +

    @

    + +

    \

    + +

    /

    + +

    ^

    + +

    *

    + +

    30

    + +

    _

    + +

    |

    + +
    \-
    +\&
    +\%
    +\\
    +\`
    +
    + +

    \- \& \% \\ \`

    + +

    *foo*

    + +

    35

    + +

    _bar_

    + +

    `foo`

    diff --git a/tests/MMD6Tests/Escapes.text b/tests/MMD6Tests/Escapes.text new file mode 100644 index 0000000..7c7c18f --- /dev/null +++ b/tests/MMD6Tests/Escapes.text @@ -0,0 +1,91 @@ +\. + +\! + +\? + +\, + +\; + +5 + +\: + +\" + +\' + +\` + +\~ + +10 + +\( + +\) + +\{ + +\} + +\[ + +15 + +\] + +\# + +\$ + +\% + +\+ + +20 + +\- + +\= + +\< + +\> + +\& + +25 + +\@ + +\\ + +\/ + +\^ + +\* + +30 + +\_ + +\| + + \- + \& + \% + \\ + \` + +`\- \& \% \\ \`` + +\*foo\* + +35 + +\_bar\_ + +\`foo\` diff --git a/tests/MMD6Tests/Fenced Code Blocks.html b/tests/MMD6Tests/Fenced Code Blocks.html new file mode 100644 index 0000000..64170aa --- /dev/null +++ b/tests/MMD6Tests/Fenced Code Blocks.html @@ -0,0 +1,23 @@ +
    *foo*
    +
    + +
    *foo*
    +
    +bar
    +
    + +
      +
    • foo

      + +
      *foo*
      +
    • +
    • foo

      + +
      *foo*
      +
      +bar
      +
    • +
    + +
    foo
    +
    diff --git a/tests/MMD6Tests/Fenced Code Blocks.htmlc b/tests/MMD6Tests/Fenced Code Blocks.htmlc new file mode 100644 index 0000000..6586e2a --- /dev/null +++ b/tests/MMD6Tests/Fenced Code Blocks.htmlc @@ -0,0 +1,23 @@ +

    *foo*

    + +

    ``` +foo

    + +

    bar +```

    + +
      +
    • foo

      + +

      *foo*

    • +
    • foo

      + +

      ``` +foo

      + +

      bar +```

    • +
    + +

    ``` +foo

    diff --git a/tests/MMD6Tests/Fenced Code Blocks.text b/tests/MMD6Tests/Fenced Code Blocks.text new file mode 100644 index 0000000..8f074ce --- /dev/null +++ b/tests/MMD6Tests/Fenced Code Blocks.text @@ -0,0 +1,29 @@ +``` +*foo* +``` + +``` +*foo* + +bar +``` + + + + +* foo + + ``` + *foo* + ``` + +* foo + + ``` + *foo* + + bar + ``` + +``` +foo diff --git a/tests/MMD6Tests/HTML Blocks.html b/tests/MMD6Tests/HTML Blocks.html new file mode 100644 index 0000000..7413787 --- /dev/null +++ b/tests/MMD6Tests/HTML Blocks.html @@ -0,0 +1,58 @@ +

    bar

    + + +*bar* + + +

    foo + +bar +

    + + +*bar* + + + + + +

    bar

    + +
    + +

    5

    + +
    *bar*
    + +
    +*bar* +
    + +

    foo

    + +
    +*bar* +
    + +
    +*bar* + +
    + +
    + +

    bar

    + +
    + +

    10

    + + + + + + + + + + + + + + + diff --git a/tests/MMD6Tests/HTML Inline.html b/tests/MMD6Tests/HTML Inline.html new file mode 100644 index 0000000..6a2a05a --- /dev/null +++ b/tests/MMD6Tests/HTML Inline.html @@ -0,0 +1,15 @@ +

    bar

    + +

    bar

    + +
    <div>
    +    foo
    +</div>
    +
    + +

    test.

    + +

    test.

    diff --git a/tests/MMD6Tests/HTML Inline.htmlc b/tests/MMD6Tests/HTML Inline.htmlc new file mode 100644 index 0000000..6a2a05a --- /dev/null +++ b/tests/MMD6Tests/HTML Inline.htmlc @@ -0,0 +1,15 @@ +

    bar

    + +

    bar

    + +
    <div>
    +    foo
    +</div>
    +
    + +

    test.

    + +

    test.

    diff --git a/tests/MMD6Tests/HTML Inline.text b/tests/MMD6Tests/HTML Inline.text new file mode 100644 index 0000000..7af8412 --- /dev/null +++ b/tests/MMD6Tests/HTML Inline.text @@ -0,0 +1,15 @@ +*bar* + +*bar* + + +
    + foo +
    + +test. + +test. diff --git a/tests/MMD6Tests/Headers.html b/tests/MMD6Tests/Headers.html new file mode 100644 index 0000000..be7dc36 --- /dev/null +++ b/tests/MMD6Tests/Headers.html @@ -0,0 +1,36 @@ +

    foo

    + +

    foo

    + +

    foo

    + +

    foo

    + +
    # foo #
    +
    + +

    5

    + +

    #foo#

    + +

    #foo #

    + +

    foo # bar

    + +

    # foo #

    + +

    foo

    + +

    10

    + +

    foo

    + +

    foo

    + +
    foo
    + +
    foo
    + +

    ####### foo #######

    + +

    15

    diff --git a/tests/MMD6Tests/Headers.htmlc b/tests/MMD6Tests/Headers.htmlc new file mode 100644 index 0000000..579c177 --- /dev/null +++ b/tests/MMD6Tests/Headers.htmlc @@ -0,0 +1,36 @@ +

    foo

    + +

    foo

    + +

    foo

    + +

    foo

    + +
    # foo #
    +
    + +

    5

    + +

    #foo#

    + +

    #foo #

    + +

    foo # bar

    + +

    # foo #

    + +

    foo

    + +

    10

    + +

    foo

    + +

    foo

    + +
    foo
    + +
    foo
    + +

    ####### foo #######

    + +

    15

    diff --git a/tests/MMD6Tests/Headers.text b/tests/MMD6Tests/Headers.text new file mode 100644 index 0000000..c159a0c --- /dev/null +++ b/tests/MMD6Tests/Headers.text @@ -0,0 +1,35 @@ +# foo # + + # foo # + + # foo # + + # foo # + + # foo # + +5 + +#foo# + +#foo # + +# foo # bar + +\# foo # + +## foo ## + +10 + +### foo ### + +#### foo #### + +##### foo ##### + +###### foo ###### + +####### foo ####### + +15 diff --git a/tests/MMD6Tests/Horizontal Rules.html b/tests/MMD6Tests/Horizontal Rules.html new file mode 100644 index 0000000..dc38c62 --- /dev/null +++ b/tests/MMD6Tests/Horizontal Rules.html @@ -0,0 +1,83 @@ +

    Dashes:

    + +
    + +
    + +
    + +
    + +
    ---
    +
    + +

    5

    + +
    + +
    + +
    + +
    + +
    - - -
    +
    + +

    10

    + +

    Asterisks:

    + +
    + +
    + +
    + +
    + +
    ***
    +
    + +

    15

    + +
    + +
    + +
    + +
    + +
    * * *
    +
    + +

    20

    + +

    Underscores:

    + +
    + +
    + +
    + +
    + +
    ___
    +
    + +

    25

    + +
    + +
    + +
    + +
    + +
    _ _ _
    +
    + +

    30

    diff --git a/tests/MMD6Tests/Horizontal Rules.htmlc b/tests/MMD6Tests/Horizontal Rules.htmlc new file mode 100644 index 0000000..dc38c62 --- /dev/null +++ b/tests/MMD6Tests/Horizontal Rules.htmlc @@ -0,0 +1,83 @@ +

    Dashes:

    + +
    + +
    + +
    + +
    + +
    ---
    +
    + +

    5

    + +
    + +
    + +
    + +
    + +
    - - -
    +
    + +

    10

    + +

    Asterisks:

    + +
    + +
    + +
    + +
    + +
    ***
    +
    + +

    15

    + +
    + +
    + +
    + +
    + +
    * * *
    +
    + +

    20

    + +

    Underscores:

    + +
    + +
    + +
    + +
    + +
    ___
    +
    + +

    25

    + +
    + +
    + +
    + +
    + +
    _ _ _
    +
    + +

    30

    diff --git a/tests/MMD6Tests/Horizontal Rules.text b/tests/MMD6Tests/Horizontal Rules.text new file mode 100644 index 0000000..0c968a6 --- /dev/null +++ b/tests/MMD6Tests/Horizontal Rules.text @@ -0,0 +1,77 @@ +Dashes: + +--- + + --- + + --- + + --- + + --- + +5 + +- - - + + - - - + + - - - + + - - - + + - - - + +10 + +Asterisks: + +*** + + *** + + *** + + *** + + *** + +15 + +* * * + + * * * + + * * * + + * * * + + * * * + +20 + +Underscores: + +___ + + ___ + + ___ + + ___ + + ___ + +25 + +_ _ _ + + _ _ _ + + _ _ _ + + _ _ _ + + _ _ _ + +30 diff --git a/tests/MMD6Tests/Indented Code Blocks.html b/tests/MMD6Tests/Indented Code Blocks.html new file mode 100644 index 0000000..d170468 --- /dev/null +++ b/tests/MMD6Tests/Indented Code Blocks.html @@ -0,0 +1,58 @@ +
    foo
    +
    + +

    foo

    + +
    bar
    +
    + +

    foo

    + +

    foo

    + +

    5

    + +

    foo

    + +

    foo

    + +
    bar
    +
    + +

    foo

    + +
    	bar
    +
    + +

    10

    + +

    foo

    + +
    bar
    +	bar
    +
    + +

    foo

    + +
    bar
    +
    + +

    15

    + +

    foo +bar +bar

    + +

    foo

    + +
    bar
    +
    + +

    foo

    + +
    bar
    +
    +bar
    +
    +bar
    +
    diff --git a/tests/MMD6Tests/Indented Code Blocks.htmlc b/tests/MMD6Tests/Indented Code Blocks.htmlc new file mode 100644 index 0000000..d170468 --- /dev/null +++ b/tests/MMD6Tests/Indented Code Blocks.htmlc @@ -0,0 +1,58 @@ +
    foo
    +
    + +

    foo

    + +
    bar
    +
    + +

    foo

    + +

    foo

    + +

    5

    + +

    foo

    + +

    foo

    + +
    bar
    +
    + +

    foo

    + +
    	bar
    +
    + +

    10

    + +

    foo

    + +
    bar
    +	bar
    +
    + +

    foo

    + +
    bar
    +
    + +

    15

    + +

    foo +bar +bar

    + +

    foo

    + +
    bar
    +
    + +

    foo

    + +
    bar
    +
    +bar
    +
    +bar
    +
    diff --git a/tests/MMD6Tests/Indented Code Blocks.text b/tests/MMD6Tests/Indented Code Blocks.text new file mode 100644 index 0000000..4256b80 --- /dev/null +++ b/tests/MMD6Tests/Indented Code Blocks.text @@ -0,0 +1,50 @@ + foo + +foo + + bar + +foo + + foo + +5 + + foo + + foo + + bar + +foo + + bar + +10 + +foo + + bar + bar + +foo + + bar + +15 + +foo + bar + bar + +foo + + bar + +foo + + bar + + bar + + bar diff --git a/tests/MMD6Tests/Inline Footnotes.html b/tests/MMD6Tests/Inline Footnotes.html new file mode 100644 index 0000000..73f55d4 --- /dev/null +++ b/tests/MMD6Tests/Inline Footnotes.html @@ -0,0 +1,20 @@ +

    Inline.[1]

    + +

    Inline.[2]

    + +
    +
    +
      + +
    1. +

      foo bar  ↩

      +
    2. + +
    3. +

      foo bar +foo +foo.  ↩

      +
    4. + +
    +
    diff --git a/tests/MMD6Tests/Inline Footnotes.htmlc b/tests/MMD6Tests/Inline Footnotes.htmlc new file mode 100644 index 0000000..7e1b7a6 --- /dev/null +++ b/tests/MMD6Tests/Inline Footnotes.htmlc @@ -0,0 +1,5 @@ +

    Inline.[^foo bar]

    + +

    Inline.[^foo bar +foo +foo.]

    diff --git a/tests/MMD6Tests/Inline Footnotes.text b/tests/MMD6Tests/Inline Footnotes.text new file mode 100644 index 0000000..964006a --- /dev/null +++ b/tests/MMD6Tests/Inline Footnotes.text @@ -0,0 +1,5 @@ +Inline.[^foo *bar*] + +Inline.[^foo *bar* +[foo](/bar) +**foo**.] diff --git a/tests/MMD6Tests/Inline Images.html b/tests/MMD6Tests/Inline Images.html new file mode 100644 index 0000000..54b1cac --- /dev/null +++ b/tests/MMD6Tests/Inline Images.html @@ -0,0 +1,27 @@ +

    Just a URL.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    5

    + +

    Empty.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    10

    + +

    URL and title.

    + +

    URL and title.

    diff --git a/tests/MMD6Tests/Inline Images.htmlc b/tests/MMD6Tests/Inline Images.htmlc new file mode 100644 index 0000000..54b1cac --- /dev/null +++ b/tests/MMD6Tests/Inline Images.htmlc @@ -0,0 +1,27 @@ +

    Just a URL.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    5

    + +

    Empty.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    10

    + +

    URL and title.

    + +

    URL and title.

    diff --git a/tests/MMD6Tests/Inline Images.text b/tests/MMD6Tests/Inline Images.text new file mode 100644 index 0000000..643b646 --- /dev/null +++ b/tests/MMD6Tests/Inline Images.text @@ -0,0 +1,27 @@ +Just a ![URL](/url/). + +![URL and title](/url/ "title"). + +![URL and title](/url/ "title preceded by two spaces"). + +![URL and title](/url/ "title preceded by a tab"). + +![URL and title](/url/ "title has spaces afterward" ). + +5 + +[Empty](). + +![**URL** and *title*](/url/ "title"). + +![URL and title](/url/ "title"). + +![URL and title](/url/ 'title'). + +![URL and title](/url/ (title)). + +10 + +![URL and title](/url/ ""). + +![URL and title](/url/ "*title*"). diff --git a/tests/MMD6Tests/Inline Links.html b/tests/MMD6Tests/Inline Links.html new file mode 100644 index 0000000..664c4b7 --- /dev/null +++ b/tests/MMD6Tests/Inline Links.html @@ -0,0 +1,32 @@ +

    Just a URL.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    5

    + +

    Empty.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    10

    + +

    URL and title.

    + +

    URL and title.

    + +

    [URL and title] (/url/file.txt “title”).

    + +

    [URL and title] +(/url/file.txt “title”).

    diff --git a/tests/MMD6Tests/Inline Links.htmlc b/tests/MMD6Tests/Inline Links.htmlc new file mode 100644 index 0000000..302544f --- /dev/null +++ b/tests/MMD6Tests/Inline Links.htmlc @@ -0,0 +1,32 @@ +

    Just a URL.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    5

    + +

    Empty.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    URL and title.

    + +

    10

    + +

    URL and title.

    + +

    URL and title.

    + +

    [URL and title] (/url/file.txt "title").

    + +

    [URL and title] +(/url/file.txt "title").

    diff --git a/tests/MMD6Tests/Inline Links.text b/tests/MMD6Tests/Inline Links.text new file mode 100644 index 0000000..a6a4095 --- /dev/null +++ b/tests/MMD6Tests/Inline Links.text @@ -0,0 +1,32 @@ +Just a [URL](http://url/file.txt). + +[URL and title](/url/file.txt "title"). + +[URL and title](/url/file.txt "title preceded by two spaces"). + +[URL and title](/url/file.txt "title preceded by a tab"). + +[URL and title](/url/file.txt "title has spaces afterward" ). + +5 + +[Empty](). + +[**URL** and *title*](/url/file.txt "title"). + +[URL and title](/url/file.txt "title"). + +[URL and title](/url/file.txt 'title'). + +[URL and title](/url/file.txt (title)). + +10 + +[URL and title](/url/file.txt ""). + +[URL and title](/url/file.txt "*title*"). + +[URL and title] (/url/file.txt "*title*"). + +[URL and title] +(/url/file.txt "*title*"). diff --git a/tests/MMD6Tests/Link Attributes.html b/tests/MMD6Tests/Link Attributes.html new file mode 100644 index 0000000..8afed8d --- /dev/null +++ b/tests/MMD6Tests/Link Attributes.html @@ -0,0 +1,11 @@ +

    foo image

    + +

    foo link

    + +

    foo link2

    + +

    foo link3

    + +

    test

    + +

    5

    diff --git a/tests/MMD6Tests/Link Attributes.htmlc b/tests/MMD6Tests/Link Attributes.htmlc new file mode 100644 index 0000000..f717d6d --- /dev/null +++ b/tests/MMD6Tests/Link Attributes.htmlc @@ -0,0 +1,19 @@ +

    foo ![image][]

    + +

    foo [link][]

    + +

    foo [link2][]

    + +

    foo [link3][]

    + +

    ![test](http://foo.bar/ "title" width="40px" height=400px)

    + +

    5

    + +

    [image]: http://foo.bar/ "title" width="40px" height=400px +[link]: http://foo.bar/1 class=external +style="border: solid black 1px;" +[link2]: http://foo.bar/2 class=external +style="border: solid black 1px;" +[link3]: http://foo.bar/3 class=external +style="border: solid black 1px;"

    diff --git a/tests/MMD6Tests/Link Attributes.text b/tests/MMD6Tests/Link Attributes.text new file mode 100644 index 0000000..e7546a6 --- /dev/null +++ b/tests/MMD6Tests/Link Attributes.text @@ -0,0 +1,20 @@ +foo ![image][] + +foo [link][] + +foo [link2][] + +foo [link3][] + +![test](http://foo.bar/ "title" width="40px" height=400px) + +5 + + +[image]: http://foo.bar/ "title" width="40px" height=400px +[link]: class=external + style="border: solid black 1px;" +[link2]: http://foo.bar/2 class=external + style="border: solid black 1px;" +[link3]: http://foo.bar/3 class=external + style="border: solid black 1px;" diff --git a/tests/MMD6Tests/Lists.html b/tests/MMD6Tests/Lists.html new file mode 100644 index 0000000..3968561 --- /dev/null +++ b/tests/MMD6Tests/Lists.html @@ -0,0 +1,92 @@ +
      +
    • foo
    • +
    • foo
    • +
    • foo
    • +
    + +

    bar

    + +
      +
    • foo

    • +
    • foo

    • +
    • foo

    • +
    + +

    bar

    + +
      +
    • foo

    • +
    • foo

    • +
    • foo

    • +
    + +

    5

    + +
      +
    1. foo
    2. +
    3. foo
    4. +
    5. foo
    6. +
    + +

    bar

    + +
      +
    1. foo

    2. +
    3. foo

    4. +
    5. foo

    6. +
    + +

    bar

    + +
      +
    1. foo

    2. +
    3. foo

    4. +
    5. foo

    6. +
    + +

    10

    + +
      +
    • foo
    • +
    • foo
    • +
    • foo
    • +
    + +

    bar

    + +
      +
    • foo
    • +
    • foo
    • +
    • foo
    • +
    + +

    bar

    + +
      +
    • foo +bar
    • +
    • foo +bar
    • +
    • foo +bar
    • +
    + +

    15

    + +
      +
    • foo + +
        +
      • bar
      • +
    • +
    • foo + +
        +
      • bar
      • +
    • +
    • foo + +
        +
      • bar
      • +
    • +
    diff --git a/tests/MMD6Tests/Markdown Syntax.html b/tests/MMD6Tests/Markdown Syntax.html new file mode 100644 index 0000000..3ed1e49 --- /dev/null +++ b/tests/MMD6Tests/Markdown Syntax.html @@ -0,0 +1,956 @@ +

    Markdown: Syntax

    + + + + + +

    Note: This document is itself written using Markdown; you +can see the source for it by adding ‘.text’ to the URL.

    + +
    + +

    Overview

    + +

    Philosophy

    + +

    Markdown is intended to be as easy-to-read and easy-to-write as is feasible.

    + +

    Readability, however, is emphasized above all else. A Markdown-formatted +document should be publishable as-is, as plain text, without looking +like it’s been marked up with tags or formatting instructions. While +Markdown’s syntax has been influenced by several existing text-to-HTML +filters – including Setext, atx, Textile, reStructuredText, +Grutatext, and EtText – the single biggest source of +inspiration for Markdown’s syntax is the format of plain text email.

    + +

    To this end, Markdown’s syntax is comprised entirely of punctuation +characters, which punctuation characters have been carefully chosen so +as to look like what they mean. E.g., asterisks around a word actually +look like *emphasis*. Markdown lists look like, well, lists. Even +blockquotes look like quoted passages of text, assuming you’ve ever +used email.

    + +

    Inline HTML

    + +

    Markdown’s syntax is intended for one purpose: to be used as a +format for writing for the web.

    + +

    Markdown is not a replacement for HTML, or even close to it. Its +syntax is very small, corresponding only to a very small subset of +HTML tags. The idea is not to create a syntax that makes it easier +to insert HTML tags. In my opinion, HTML tags are already easy to +insert. The idea for Markdown is to make it easy to read, write, and +edit prose. HTML is a publishing format; Markdown is a writing +format. Thus, Markdown’s formatting syntax only addresses issues that +can be conveyed in plain text.

    + +

    For any markup that is not covered by Markdown’s syntax, you simply +use HTML itself. There’s no need to preface it or delimit it to +indicate that you’re switching from Markdown to HTML; you just use +the tags.

    + +

    The only restrictions are that block-level HTML elements – e.g. <div>, +<table>, <pre>, <p>, etc. – must be separated from surrounding +content by blank lines, and the start and end tags of the block should +not be indented with tabs or spaces. Markdown is smart enough not +to add extra (unwanted) <p> tags around HTML block-level tags.

    + +

    For example, to add an HTML table to a Markdown article:

    + +
    This is a regular paragraph.
    +
    +<table>
    +    <tr>
    +        <td>Foo</td>
    +    </tr>
    +</table>
    +
    +This is another regular paragraph.
    +
    + +

    Note that Markdown formatting syntax is not processed within block-level +HTML tags. E.g., you can’t use Markdown-style *emphasis* inside an +HTML block.

    + +

    Span-level HTML tags – e.g. <span>, <cite>, or <del> – can be +used anywhere in a Markdown paragraph, list item, or header. If you +want, you can even use HTML tags instead of Markdown formatting; e.g. if +you’d prefer to use HTML <a> or <img> tags instead of Markdown’s +link or image syntax, go right ahead.

    + +

    Unlike block-level HTML tags, Markdown syntax is processed within +span-level tags.

    + +

    Automatic Escaping for Special Characters

    + +

    In HTML, there are two characters that demand special treatment: < +and &. Left angle brackets are used to start tags; ampersands are +used to denote HTML entities. If you want to use them as literal +characters, you must escape them as entities, e.g. &lt;, and +&amp;.

    + +

    Ampersands in particular are bedeviling for web writers. If you want to +write about ‘AT&T’, you need to write ‘AT&amp;T’. You even need to +escape ampersands within URLs. Thus, if you want to link to:

    + +
    http://images.google.com/images?num=30&q=larry+bird
    +
    + +

    you need to encode the URL as:

    + +
    http://images.google.com/images?num=30&amp;q=larry+bird
    +
    + +

    in your anchor tag href attribute. Needless to say, this is easy to +forget, and is probably the single most common source of HTML validation +errors in otherwise well-marked-up web sites.

    + +

    Markdown allows you to use these characters naturally, taking care of +all the necessary escaping for you. If you use an ampersand as part of +an HTML entity, it remains unchanged; otherwise it will be translated +into &amp;.

    + +

    So, if you want to include a copyright symbol in your article, you can write:

    + +
    &copy;
    +
    + +

    and Markdown will leave it alone. But if you write:

    + +
    AT&T
    +
    + +

    Markdown will translate it to:

    + +
    AT&amp;T
    +
    + +

    Similarly, because Markdown supports inline HTML, if you use +angle brackets as delimiters for HTML tags, Markdown will treat them as +such. But if you write:

    + +
    4 < 5
    +
    + +

    Markdown will translate it to:

    + +
    4 &lt; 5
    +
    + +

    However, inside Markdown code spans and blocks, angle brackets and +ampersands are always encoded automatically. This makes it easy to use +Markdown to write about HTML code. (As opposed to raw HTML, which is a +terrible format for writing about HTML syntax, because every single < +and & in your example code needs to be escaped.)

    + +
    + +

    Block Elements

    + +

    Paragraphs and Line Breaks

    + +

    A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line – a line containing nothing but spaces or tabs is considered +blank.) Normal paragraphs should not be indented with spaces or tabs.

    + +

    The implication of the “one or more consecutive lines of text” rule is +that Markdown supports “hard-wrapped” text paragraphs. This differs +significantly from most other text-to-HTML formatters (including Movable +Type’s “Convert Line Breaks” option) which translate every line break +character in a paragraph into a <br /> tag.

    + +

    When you do want to insert a <br /> break tag using Markdown, you +end a line with two or more spaces, then type return.

    + +

    Yes, this takes a tad more effort to create a <br />, but a simplistic +“every line break is a <br />” rule wouldn’t work for Markdown. +Markdown’s email-style blockquoting and multi-paragraph list items +work best – and look better – when you format them with hard breaks.

    + + + +

    Markdown supports two styles of headers, Setext and atx.

    + +

    Setext-style headers are “underlined” using equal signs (for first-level +headers) and dashes (for second-level headers). For example:

    + +
    This is an H1
    +=============
    +
    +This is an H2
    +-------------
    +
    + +

    Any number of underlining =’s or -’s will work.

    + +

    Atx-style headers use 1–6 hash characters at the start of the line, +corresponding to header levels 1–6. For example:

    + +
    # This is an H1
    +
    +## This is an H2
    +
    +###### This is an H6
    +
    + +

    Optionally, you may “close” atx-style headers. This is purely +cosmetic – you can use this if you think it looks better. The +closing hashes don’t even need to match the number of hashes +used to open the header. (The number of opening hashes +determines the header level.) :

    + +
    # This is an H1 #
    +
    +## This is an H2 ##
    +
    +### This is an H3 ######
    +
    + +

    Blockquotes

    + +

    Markdown uses email-style > characters for blockquoting. If you’re +familiar with quoting passages of text in an email message, then you +know how to create a blockquote in Markdown. It looks best if you hard +wrap the text and put a > before every line:

    + +
    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
    +> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
    +> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
    +> 
    +> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
    +> id sem consectetuer libero luctus adipiscing.
    +
    + +

    Markdown allows you to be lazy and only put the > before the first +line of a hard-wrapped paragraph:

    + +
    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
    +consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
    +Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
    +
    +> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
    +id sem consectetuer libero luctus adipiscing.
    +
    + +

    Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by +adding additional levels of >:

    + +
    > This is the first level of quoting.
    +>
    +> > This is nested blockquote.
    +>
    +> Back to the first level.
    +
    + +

    Blockquotes can contain other Markdown elements, including headers, lists, +and code blocks:

    + +
    > ## This is a header.
    +> 
    +> 1.   This is the first list item.
    +> 2.   This is the second list item.
    +> 
    +> Here's some example code:
    +> 
    +>     return shell_exec("echo $input | $markdown_script");
    +
    + +

    Any decent text editor should make email-style quoting easy. For +example, with BBEdit, you can make a selection and choose Increase +Quote Level from the Text menu.

    + +

    Lists

    + +

    Markdown supports ordered (numbered) and unordered (bulleted) lists.

    + +

    Unordered lists use asterisks, pluses, and hyphens – interchangably +– as list markers:

    + +
    *   Red
    +*   Green
    +*   Blue
    +
    + +

    is equivalent to:

    + +
    +   Red
    ++   Green
    ++   Blue
    +
    + +

    and:

    + +
    -   Red
    +-   Green
    +-   Blue
    +
    + +

    Ordered lists use numbers followed by periods:

    + +
    1.  Bird
    +2.  McHale
    +3.  Parish
    +
    + +

    It’s important to note that the actual numbers you use to mark the +list have no effect on the HTML output Markdown produces. The HTML +Markdown produces from the above list is:

    + +
    <ol>
    +<li>Bird</li>
    +<li>McHale</li>
    +<li>Parish</li>
    +</ol>
    +
    + +

    If you instead wrote the list in Markdown like this:

    + +
    1.  Bird
    +1.  McHale
    +1.  Parish
    +
    + +

    or even:

    + +
    3. Bird
    +1. McHale
    +8. Parish
    +
    + +

    you’d get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don’t have to.

    + +

    If you do use lazy list numbering, however, you should still start the +list with the number 1. At some point in the future, Markdown may support +starting ordered lists at an arbitrary number.

    + +

    List markers typically start at the left margin, but may be indented by +up to three spaces. List markers must be followed by one or more spaces +or a tab.

    + +

    To make lists look nice, you can wrap items with hanging indents:

    + +
    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
    +    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
    +    viverra nec, fringilla in, laoreet vitae, risus.
    +*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
    +    Suspendisse id sem consectetuer libero luctus adipiscing.
    +
    + +

    But if you want to be lazy, you don’t have to:

    + +
    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
    +Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
    +viverra nec, fringilla in, laoreet vitae, risus.
    +*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
    +Suspendisse id sem consectetuer libero luctus adipiscing.
    +
    + +

    If list items are separated by blank lines, Markdown will wrap the +items in <p> tags in the HTML output. For example, this input:

    + +
    *   Bird
    +*   Magic
    +
    + +

    will turn into:

    + +
    <ul>
    +<li>Bird</li>
    +<li>Magic</li>
    +</ul>
    +
    + +

    But this:

    + +
    *   Bird
    +
    +*   Magic
    +
    + +

    will turn into:

    + +
    <ul>
    +<li><p>Bird</p></li>
    +<li><p>Magic</p></li>
    +</ul>
    +
    + +

    List items may consist of multiple paragraphs. Each subsequent +paragraph in a list item must be indented by either 4 spaces +or one tab:

    + +
    1.  This is a list item with two paragraphs. Lorem ipsum dolor
    +    sit amet, consectetuer adipiscing elit. Aliquam hendrerit
    +    mi posuere lectus.
    +
    +    Vestibulum enim wisi, viverra nec, fringilla in, laoreet
    +    vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
    +    sit amet velit.
    +
    +2.  Suspendisse id sem consectetuer libero luctus adipiscing.
    +
    + +

    It looks nice if you indent every line of the subsequent +paragraphs, but here again, Markdown will allow you to be +lazy:

    + +
    *   This is a list item with two paragraphs.
    +
    +    This is the second paragraph in the list item. You're
    +only required to indent the first line. Lorem ipsum dolor
    +sit amet, consectetuer adipiscing elit.
    +
    +*   Another item in the same list.
    +
    + +

    To put a blockquote within a list item, the blockquote’s > +delimiters need to be indented:

    + +
    *   A list item with a blockquote:
    +
    +    > This is a blockquote
    +    > inside a list item.
    +
    + +

    To put a code block within a list item, the code block needs +to be indented twice – 8 spaces or two tabs:

    + +
    *   A list item with a code block:
    +
    +        <code goes here>
    +
    + +

    It’s worth noting that it’s possible to trigger an ordered list by +accident, by writing something like this:

    + +
    1986. What a great season.
    +
    + +

    In other words, a number-period-space sequence at the beginning of a +line. To avoid this, you can backslash-escape the period:

    + +
    1986\. What a great season.
    +
    + +

    Code Blocks

    + +

    Pre-formatted code blocks are used for writing about programming or +markup source code. Rather than forming normal paragraphs, the lines +of a code block are interpreted literally. Markdown wraps a code block +in both <pre> and <code> tags.

    + +

    To produce a code block in Markdown, simply indent every line of the +block by at least 4 spaces or 1 tab. For example, given this input:

    + +
    This is a normal paragraph:
    +
    +    This is a code block.
    +
    + +

    Markdown will generate:

    + +
    <p>This is a normal paragraph:</p>
    +
    +<pre><code>This is a code block.
    +</code></pre>
    +
    + +

    One level of indentation – 4 spaces or 1 tab – is removed from each +line of the code block. For example, this:

    + +
    Here is an example of AppleScript:
    +
    +    tell application "Foo"
    +        beep
    +    end tell
    +
    + +

    will turn into:

    + +
    <p>Here is an example of AppleScript:</p>
    +
    +<pre><code>tell application "Foo"
    +    beep
    +end tell
    +</code></pre>
    +
    + +

    A code block continues until it reaches a line that is not indented +(or the end of the article).

    + +

    Within a code block, ampersands (&) and angle brackets (< and >) +are automatically converted into HTML entities. This makes it very +easy to include example HTML source code using Markdown – just paste +it and indent it, and Markdown will handle the hassle of encoding the +ampersands and angle brackets. For example, this:

    + +
        <div class="footer">
    +        &copy; 2004 Foo Corporation
    +    </div>
    +
    + +

    will turn into:

    + +
    <pre><code>&lt;div class="footer"&gt;
    +    &amp;copy; 2004 Foo Corporation
    +&lt;/div&gt;
    +</code></pre>
    +
    + +

    Regular Markdown syntax is not processed within code blocks. E.g., +asterisks are just literal asterisks within a code block. This means +it’s also easy to use Markdown to write about Markdown’s own syntax.

    + +

    Horizontal Rules

    + +

    You can produce a horizontal rule tag (<hr />) by placing three or +more hyphens, asterisks, or underscores on a line by themselves. If you +wish, you may use spaces between the hyphens or asterisks. Each of the +following lines will produce a horizontal rule:

    + +
    * * *
    +
    +***
    +
    +*****
    +
    +- - -
    +
    +---------------------------------------
    +
    + +
    + +

    Span Elements

    + + + +

    Markdown supports two style of links: inline and reference.

    + +

    In both styles, the link text is delimited by [square brackets].

    + +

    To create an inline link, use a set of regular parentheses immediately +after the link text’s closing square bracket. Inside the parentheses, +put the URL where you want the link to point, along with an optional +title for the link, surrounded in quotes. For example:

    + +
    This is [an example](http://example.com/ "Title") inline link.
    +
    +[This link](http://example.net/) has no title attribute.
    +
    + +

    Will produce:

    + +
    <p>This is <a href="http://example.com/" title="Title">
    +an example</a> inline link.</p>
    +
    +<p><a href="http://example.net/">This link</a> has no
    +title attribute.</p>
    +
    + +

    If you’re referring to a local resource on the same server, you can +use relative paths:

    + +
    See my [About](/about/) page for details.   
    +
    + +

    Reference-style links use a second set of square brackets, inside +which you place a label of your choosing to identify the link:

    + +
    This is [an example][id] reference-style link.
    +
    + +

    You can optionally use a space to separate the sets of brackets:

    + +
    This is [an example] [id] reference-style link.
    +
    + +

    Then, anywhere in the document, you define your link label like this, +on a line by itself:

    + +
    [id]: http://example.com/  "Optional Title Here"
    +
    + +

    That is:

    + +
      +
    • Square brackets containing the link identifier (optionally +indented from the left margin using up to three spaces);
    • +
    • followed by a colon;
    • +
    • followed by one or more spaces (or tabs);
    • +
    • followed by the URL for the link;
    • +
    • optionally followed by a title attribute for the link, enclosed +in double or single quotes, or enclosed in parentheses.
    • +
    + +

    The following three link definitions are equivalent:

    + +
    [foo]: http://example.com/  "Optional Title Here"
    +[foo]: http://example.com/  'Optional Title Here'
    +[foo]: http://example.com/  (Optional Title Here)
    +
    + +

    Note: There is a known bug in Markdown.pl 1.0.1 which prevents +single quotes from being used to delimit link titles.

    + +

    The link URL may, optionally, be surrounded by angle brackets:

    + +
    [id]: <http://example.com/>  "Optional Title Here"
    +
    + +

    You can put the title attribute on the next line and use extra spaces +or tabs for padding, which tends to look better with longer URLs:

    + +
    [id]: http://example.com/longish/path/to/resource/here
    +    "Optional Title Here"
    +
    + +

    Link definitions are only used for creating links during Markdown +processing, and are stripped from your document in the HTML output.

    + +

    Link definition names may consist of letters, numbers, spaces, and +punctuation – but they are not case sensitive. E.g. these two +links:

    + +
    [link text][a]
    +[link text][A]
    +
    + +

    are equivalent.

    + +

    The implicit link name shortcut allows you to omit the name of the +link, in which case the link text itself is used as the name. +Just use an empty set of square brackets – e.g., to link the word +“Google” to the google.com web site, you could simply write:

    + +
    [Google][]
    +
    + +

    And then define the link:

    + +
    [Google]: http://google.com/
    +
    + +

    Because link names may contain spaces, this shortcut even works for +multiple words in the link text:

    + +
    Visit [Daring Fireball][] for more information.
    +
    + +

    And then define the link:

    + +
    [Daring Fireball]: http://daringfireball.net/
    +
    + +

    Link definitions can be placed anywhere in your Markdown document. I +tend to put them immediately after each paragraph in which they’re +used, but if you want, you can put them all at the end of your +document, sort of like footnotes.

    + +

    Here’s an example of reference links in action:

    + +
    I get 10 times more traffic from [Google] [1] than from
    +[Yahoo] [2] or [MSN] [3].
    +
    +  [1]: http://google.com/        "Google"
    +  [2]: http://search.yahoo.com/  "Yahoo Search"
    +  [3]: http://search.msn.com/    "MSN Search"
    +
    + +

    Using the implicit link name shortcut, you could instead write:

    + +
    I get 10 times more traffic from [Google][] than from
    +[Yahoo][] or [MSN][].
    +
    +  [google]: http://google.com/        "Google"
    +  [yahoo]:  http://search.yahoo.com/  "Yahoo Search"
    +  [msn]:    http://search.msn.com/    "MSN Search"
    +
    + +

    Both of the above examples will produce the following HTML output:

    + +
    <p>I get 10 times more traffic from <a href="http://google.com/"
    +title="Google">Google</a> than from
    +<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
    +or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
    +
    + +

    For comparison, here is the same paragraph written using +Markdown’s inline link style:

    + +
    I get 10 times more traffic from [Google](http://google.com/ "Google")
    +than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
    +[MSN](http://search.msn.com/ "MSN Search").
    +
    + +

    The point of reference-style links is not that they’re easier to +write. The point is that with reference-style links, your document +source is vastly more readable. Compare the above examples: using +reference-style links, the paragraph itself is only 81 characters +long; with inline-style links, it’s 176 characters; and as raw HTML, +it’s 234 characters. In the raw HTML, there’s more markup than there +is text.

    + +

    With Markdown’s reference-style links, a source document much more +closely resembles the final output, as rendered in a browser. By +allowing you to move the markup-related metadata out of the paragraph, +you can add links without interrupting the narrative flow of your +prose.

    + +

    Emphasis

    + +

    Markdown treats asterisks (*) and underscores (_) as indicators of +emphasis. Text wrapped with one * or _ will be wrapped with an +HTML <em> tag; double *’s or _’s will be wrapped with an HTML +<strong> tag. E.g., this input:

    + +
    *single asterisks*
    +
    +_single underscores_
    +
    +**double asterisks**
    +
    +__double underscores__
    +
    + +

    will produce:

    + +
    <em>single asterisks</em>
    +
    +<em>single underscores</em>
    +
    +<strong>double asterisks</strong>
    +
    +<strong>double underscores</strong>
    +
    + +

    You can use whichever style you prefer; the lone restriction is that +the same character must be used to open and close an emphasis span.

    + +

    Emphasis can be used in the middle of a word:

    + +
    un*frigging*believable
    +
    + +

    But if you surround an * or _ with spaces, it’ll be treated as a +literal asterisk or underscore.

    + +

    To produce a literal asterisk or underscore at a position where it +would otherwise be used as an emphasis delimiter, you can backslash +escape it:

    + +
    \*this text is surrounded by literal asterisks\*
    +
    + +

    Code

    + +

    To indicate a span of code, wrap it with backtick quotes (`). +Unlike a pre-formatted code block, a code span indicates code within a +normal paragraph. For example:

    + +
    Use the `printf()` function.
    +
    + +

    will produce:

    + +
    <p>Use the <code>printf()</code> function.</p>
    +
    + +

    To include a literal backtick character within a code span, you can use +multiple backticks as the opening and closing delimiters:

    + +
    ``There is a literal backtick (`) here.``
    +
    + +

    which will produce this:

    + +
    <p><code>There is a literal backtick (`) here.</code></p>
    +
    + +

    The backtick delimiters surrounding a code span may include spaces – +one after the opening, one before the closing. This allows you to place +literal backtick characters at the beginning or end of a code span:

    + +
    A single backtick in a code span: `` ` ``
    +
    +A backtick-delimited string in a code span: `` `foo` ``
    +
    + +

    will produce:

    + +
    <p>A single backtick in a code span: <code>`</code></p>
    +
    +<p>A backtick-delimited string in a code span: <code>`foo`</code></p>
    +
    + +

    With a code span, ampersands and angle brackets are encoded as HTML +entities automatically, which makes it easy to include example HTML +tags. Markdown will turn this:

    + +
    Please don't use any `<blink>` tags.
    +
    + +

    into:

    + +
    <p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
    +
    + +

    You can write this:

    + +
    `&#8212;` is the decimal-encoded equivalent of `&mdash;`.
    +
    + +

    to produce:

    + +
    <p><code>&amp;#8212;</code> is the decimal-encoded
    +equivalent of <code>&amp;mdash;</code>.</p>
    +
    + +

    Images

    + +

    Admittedly, it’s fairly difficult to devise a “natural” syntax for +placing images into a plain text document format.

    + +

    Markdown uses an image syntax that is intended to resemble the syntax +for links, allowing for two styles: inline and reference.

    + +

    Inline image syntax looks like this:

    + +
    ![Alt text](/path/to/img.jpg)
    +
    +![Alt text](/path/to/img.jpg "Optional title")
    +
    + +

    That is:

    + +
      +
    • An exclamation mark: !;
    • +
    • followed by a set of square brackets, containing the alt +attribute text for the image;
    • +
    • followed by a set of parentheses, containing the URL or path to +the image, and an optional title attribute enclosed in double +or single quotes.
    • +
    + +

    Reference-style image syntax looks like this:

    + +
    ![Alt text][id]
    +
    + +

    Where “id” is the name of a defined image reference. Image references +are defined using syntax identical to link references:

    + +
    [id]: url/to/image  "Optional title attribute"
    +
    + +

    As of this writing, Markdown has no syntax for specifying the +dimensions of an image; if this is important to you, you can simply +use regular HTML <img> tags.

    + +
    + +

    Miscellaneous

    + + + +

    Markdown supports a shortcut style for creating “automatic” links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:

    + +
    <http://example.com/>
    +
    + +

    Markdown will turn this into:

    + +
    <a href="http://example.com/">http://example.com/</a>
    +
    + +

    Automatic links for email addresses work similarly, except that +Markdown will also perform a bit of randomized decimal and hex +entity-encoding to help obscure your address from address-harvesting +spambots. For example, Markdown will turn this:

    + +
    <address@example.com>
    +
    + +

    into something like this:

    + +
    <a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
    +&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
    +&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
    +&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
    +
    + +

    which will render in a browser as a clickable link to “address@example.com”.

    + +

    (This sort of entity-encoding trick will indeed fool many, if not +most, address-harvesting bots, but it definitely won’t fool all of +them. It’s better than nothing, but an address published in this way +will probably eventually start receiving spam.)

    + +

    Backslash Escapes

    + +

    Markdown allows you to use backslash escapes to generate literal +characters which would otherwise have special meaning in Markdown’s +formatting syntax. For example, if you wanted to surround a word +with literal asterisks (instead of an HTML <em> tag), you can use +backslashes before the asterisks, like this:

    + +
    \*literal asterisks\*
    +
    + +

    Markdown provides backslash escapes for the following characters:

    + +
    \   backslash
    +`   backtick
    +*   asterisk
    +_   underscore
    +{}  curly braces
    +[]  square brackets
    +()  parentheses
    +#   hash mark
    ++   plus sign
    +-   minus sign (hyphen)
    +.   dot
    +!   exclamation mark
    +
    diff --git a/tests/MMD6Tests/Markdown Syntax.htmlc b/tests/MMD6Tests/Markdown Syntax.htmlc new file mode 100644 index 0000000..be89b34 --- /dev/null +++ b/tests/MMD6Tests/Markdown Syntax.htmlc @@ -0,0 +1,956 @@ +

    Markdown: Syntax

    + + + + + +

    Note: This document is itself written using Markdown; you +can see the source for it by adding '.text' to the URL.

    + +
    + +

    Overview

    + +

    Philosophy

    + +

    Markdown is intended to be as easy-to-read and easy-to-write as is feasible.

    + +

    Readability, however, is emphasized above all else. A Markdown-formatted +document should be publishable as-is, as plain text, without looking +like it's been marked up with tags or formatting instructions. While +Markdown's syntax has been influenced by several existing text-to-HTML +filters -- including Setext, atx, Textile, reStructuredText, +Grutatext, and EtText -- the single biggest source of +inspiration for Markdown's syntax is the format of plain text email.

    + +

    To this end, Markdown's syntax is comprised entirely of punctuation +characters, which punctuation characters have been carefully chosen so +as to look like what they mean. E.g., asterisks around a word actually +look like *emphasis*. Markdown lists look like, well, lists. Even +blockquotes look like quoted passages of text, assuming you've ever +used email.

    + +

    Inline HTML

    + +

    Markdown's syntax is intended for one purpose: to be used as a +format for writing for the web.

    + +

    Markdown is not a replacement for HTML, or even close to it. Its +syntax is very small, corresponding only to a very small subset of +HTML tags. The idea is not to create a syntax that makes it easier +to insert HTML tags. In my opinion, HTML tags are already easy to +insert. The idea for Markdown is to make it easy to read, write, and +edit prose. HTML is a publishing format; Markdown is a writing +format. Thus, Markdown's formatting syntax only addresses issues that +can be conveyed in plain text.

    + +

    For any markup that is not covered by Markdown's syntax, you simply +use HTML itself. There's no need to preface it or delimit it to +indicate that you're switching from Markdown to HTML; you just use +the tags.

    + +

    The only restrictions are that block-level HTML elements -- e.g. <div>, +<table>, <pre>, <p>, etc. -- must be separated from surrounding +content by blank lines, and the start and end tags of the block should +not be indented with tabs or spaces. Markdown is smart enough not +to add extra (unwanted) <p> tags around HTML block-level tags.

    + +

    For example, to add an HTML table to a Markdown article:

    + +
    This is a regular paragraph.
    +
    +<table>
    +    <tr>
    +        <td>Foo</td>
    +    </tr>
    +</table>
    +
    +This is another regular paragraph.
    +
    + +

    Note that Markdown formatting syntax is not processed within block-level +HTML tags. E.g., you can't use Markdown-style *emphasis* inside an +HTML block.

    + +

    Span-level HTML tags -- e.g. <span>, <cite>, or <del> -- can be +used anywhere in a Markdown paragraph, list item, or header. If you +want, you can even use HTML tags instead of Markdown formatting; e.g. if +you'd prefer to use HTML <a> or <img> tags instead of Markdown's +link or image syntax, go right ahead.

    + +

    Unlike block-level HTML tags, Markdown syntax is processed within +span-level tags.

    + +

    Automatic Escaping for Special Characters

    + +

    In HTML, there are two characters that demand special treatment: < +and &. Left angle brackets are used to start tags; ampersands are +used to denote HTML entities. If you want to use them as literal +characters, you must escape them as entities, e.g. &lt;, and +&amp;.

    + +

    Ampersands in particular are bedeviling for web writers. If you want to +write about 'AT&T', you need to write 'AT&amp;T'. You even need to +escape ampersands within URLs. Thus, if you want to link to:

    + +
    http://images.google.com/images?num=30&q=larry+bird
    +
    + +

    you need to encode the URL as:

    + +
    http://images.google.com/images?num=30&amp;q=larry+bird
    +
    + +

    in your anchor tag href attribute. Needless to say, this is easy to +forget, and is probably the single most common source of HTML validation +errors in otherwise well-marked-up web sites.

    + +

    Markdown allows you to use these characters naturally, taking care of +all the necessary escaping for you. If you use an ampersand as part of +an HTML entity, it remains unchanged; otherwise it will be translated +into &amp;.

    + +

    So, if you want to include a copyright symbol in your article, you can write:

    + +
    &copy;
    +
    + +

    and Markdown will leave it alone. But if you write:

    + +
    AT&T
    +
    + +

    Markdown will translate it to:

    + +
    AT&amp;T
    +
    + +

    Similarly, because Markdown supports inline HTML, if you use +angle brackets as delimiters for HTML tags, Markdown will treat them as +such. But if you write:

    + +
    4 < 5
    +
    + +

    Markdown will translate it to:

    + +
    4 &lt; 5
    +
    + +

    However, inside Markdown code spans and blocks, angle brackets and +ampersands are always encoded automatically. This makes it easy to use +Markdown to write about HTML code. (As opposed to raw HTML, which is a +terrible format for writing about HTML syntax, because every single < +and & in your example code needs to be escaped.)

    + +
    + +

    Block Elements

    + +

    Paragraphs and Line Breaks

    + +

    A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing but spaces or tabs is considered +blank.) Normal paragraphs should not be indented with spaces or tabs.

    + +

    The implication of the "one or more consecutive lines of text" rule is +that Markdown supports "hard-wrapped" text paragraphs. This differs +significantly from most other text-to-HTML formatters (including Movable +Type's "Convert Line Breaks" option) which translate every line break +character in a paragraph into a <br /> tag.

    + +

    When you do want to insert a <br /> break tag using Markdown, you +end a line with two or more spaces, then type return.

    + +

    Yes, this takes a tad more effort to create a <br />, but a simplistic +"every line break is a <br />" rule wouldn't work for Markdown. +Markdown's email-style blockquoting and multi-paragraph list items +work best -- and look better -- when you format them with hard breaks.

    + + + +

    Markdown supports two styles of headers, Setext and atx.

    + +

    Setext-style headers are "underlined" using equal signs (for first-level +headers) and dashes (for second-level headers). For example:

    + +
    This is an H1
    +=============
    +
    +This is an H2
    +-------------
    +
    + +

    Any number of underlining ='s or -'s will work.

    + +

    Atx-style headers use 1-6 hash characters at the start of the line, +corresponding to header levels 1-6. For example:

    + +
    # This is an H1
    +
    +## This is an H2
    +
    +###### This is an H6
    +
    + +

    Optionally, you may "close" atx-style headers. This is purely +cosmetic -- you can use this if you think it looks better. The +closing hashes don't even need to match the number of hashes +used to open the header. (The number of opening hashes +determines the header level.) :

    + +
    # This is an H1 #
    +
    +## This is an H2 ##
    +
    +### This is an H3 ######
    +
    + +

    Blockquotes

    + +

    Markdown uses email-style > characters for blockquoting. If you're +familiar with quoting passages of text in an email message, then you +know how to create a blockquote in Markdown. It looks best if you hard +wrap the text and put a > before every line:

    + +
    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
    +> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
    +> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
    +> 
    +> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
    +> id sem consectetuer libero luctus adipiscing.
    +
    + +

    Markdown allows you to be lazy and only put the > before the first +line of a hard-wrapped paragraph:

    + +
    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
    +consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
    +Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
    +
    +> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
    +id sem consectetuer libero luctus adipiscing.
    +
    + +

    Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by +adding additional levels of >:

    + +
    > This is the first level of quoting.
    +>
    +> > This is nested blockquote.
    +>
    +> Back to the first level.
    +
    + +

    Blockquotes can contain other Markdown elements, including headers, lists, +and code blocks:

    + +
    > ## This is a header.
    +> 
    +> 1.   This is the first list item.
    +> 2.   This is the second list item.
    +> 
    +> Here's some example code:
    +> 
    +>     return shell_exec("echo $input | $markdown_script");
    +
    + +

    Any decent text editor should make email-style quoting easy. For +example, with BBEdit, you can make a selection and choose Increase +Quote Level from the Text menu.

    + +

    Lists

    + +

    Markdown supports ordered (numbered) and unordered (bulleted) lists.

    + +

    Unordered lists use asterisks, pluses, and hyphens -- interchangably +-- as list markers:

    + +
    *   Red
    +*   Green
    +*   Blue
    +
    + +

    is equivalent to:

    + +
    +   Red
    ++   Green
    ++   Blue
    +
    + +

    and:

    + +
    -   Red
    +-   Green
    +-   Blue
    +
    + +

    Ordered lists use numbers followed by periods:

    + +
    1.  Bird
    +2.  McHale
    +3.  Parish
    +
    + +

    It's important to note that the actual numbers you use to mark the +list have no effect on the HTML output Markdown produces. The HTML +Markdown produces from the above list is:

    + +
    <ol>
    +<li>Bird</li>
    +<li>McHale</li>
    +<li>Parish</li>
    +</ol>
    +
    + +

    If you instead wrote the list in Markdown like this:

    + +
    1.  Bird
    +1.  McHale
    +1.  Parish
    +
    + +

    or even:

    + +
    3. Bird
    +1. McHale
    +8. Parish
    +
    + +

    you'd get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don't have to.

    + +

    If you do use lazy list numbering, however, you should still start the +list with the number 1. At some point in the future, Markdown may support +starting ordered lists at an arbitrary number.

    + +

    List markers typically start at the left margin, but may be indented by +up to three spaces. List markers must be followed by one or more spaces +or a tab.

    + +

    To make lists look nice, you can wrap items with hanging indents:

    + +
    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
    +    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
    +    viverra nec, fringilla in, laoreet vitae, risus.
    +*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
    +    Suspendisse id sem consectetuer libero luctus adipiscing.
    +
    + +

    But if you want to be lazy, you don't have to:

    + +
    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
    +Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
    +viverra nec, fringilla in, laoreet vitae, risus.
    +*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
    +Suspendisse id sem consectetuer libero luctus adipiscing.
    +
    + +

    If list items are separated by blank lines, Markdown will wrap the +items in <p> tags in the HTML output. For example, this input:

    + +
    *   Bird
    +*   Magic
    +
    + +

    will turn into:

    + +
    <ul>
    +<li>Bird</li>
    +<li>Magic</li>
    +</ul>
    +
    + +

    But this:

    + +
    *   Bird
    +
    +*   Magic
    +
    + +

    will turn into:

    + +
    <ul>
    +<li><p>Bird</p></li>
    +<li><p>Magic</p></li>
    +</ul>
    +
    + +

    List items may consist of multiple paragraphs. Each subsequent +paragraph in a list item must be indented by either 4 spaces +or one tab:

    + +
    1.  This is a list item with two paragraphs. Lorem ipsum dolor
    +    sit amet, consectetuer adipiscing elit. Aliquam hendrerit
    +    mi posuere lectus.
    +
    +    Vestibulum enim wisi, viverra nec, fringilla in, laoreet
    +    vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
    +    sit amet velit.
    +
    +2.  Suspendisse id sem consectetuer libero luctus adipiscing.
    +
    + +

    It looks nice if you indent every line of the subsequent +paragraphs, but here again, Markdown will allow you to be +lazy:

    + +
    *   This is a list item with two paragraphs.
    +
    +    This is the second paragraph in the list item. You're
    +only required to indent the first line. Lorem ipsum dolor
    +sit amet, consectetuer adipiscing elit.
    +
    +*   Another item in the same list.
    +
    + +

    To put a blockquote within a list item, the blockquote's > +delimiters need to be indented:

    + +
    *   A list item with a blockquote:
    +
    +    > This is a blockquote
    +    > inside a list item.
    +
    + +

    To put a code block within a list item, the code block needs +to be indented twice -- 8 spaces or two tabs:

    + +
    *   A list item with a code block:
    +
    +        <code goes here>
    +
    + +

    It's worth noting that it's possible to trigger an ordered list by +accident, by writing something like this:

    + +
    1986. What a great season.
    +
    + +

    In other words, a number-period-space sequence at the beginning of a +line. To avoid this, you can backslash-escape the period:

    + +
    1986\. What a great season.
    +
    + +

    Code Blocks

    + +

    Pre-formatted code blocks are used for writing about programming or +markup source code. Rather than forming normal paragraphs, the lines +of a code block are interpreted literally. Markdown wraps a code block +in both <pre> and <code> tags.

    + +

    To produce a code block in Markdown, simply indent every line of the +block by at least 4 spaces or 1 tab. For example, given this input:

    + +
    This is a normal paragraph:
    +
    +    This is a code block.
    +
    + +

    Markdown will generate:

    + +
    <p>This is a normal paragraph:</p>
    +
    +<pre><code>This is a code block.
    +</code></pre>
    +
    + +

    One level of indentation -- 4 spaces or 1 tab -- is removed from each +line of the code block. For example, this:

    + +
    Here is an example of AppleScript:
    +
    +    tell application "Foo"
    +        beep
    +    end tell
    +
    + +

    will turn into:

    + +
    <p>Here is an example of AppleScript:</p>
    +
    +<pre><code>tell application "Foo"
    +    beep
    +end tell
    +</code></pre>
    +
    + +

    A code block continues until it reaches a line that is not indented +(or the end of the article).

    + +

    Within a code block, ampersands (&) and angle brackets (< and >) +are automatically converted into HTML entities. This makes it very +easy to include example HTML source code using Markdown -- just paste +it and indent it, and Markdown will handle the hassle of encoding the +ampersands and angle brackets. For example, this:

    + +
        <div class="footer">
    +        &copy; 2004 Foo Corporation
    +    </div>
    +
    + +

    will turn into:

    + +
    <pre><code>&lt;div class="footer"&gt;
    +    &amp;copy; 2004 Foo Corporation
    +&lt;/div&gt;
    +</code></pre>
    +
    + +

    Regular Markdown syntax is not processed within code blocks. E.g., +asterisks are just literal asterisks within a code block. This means +it's also easy to use Markdown to write about Markdown's own syntax.

    + +

    Horizontal Rules

    + +

    You can produce a horizontal rule tag (<hr />) by placing three or +more hyphens, asterisks, or underscores on a line by themselves. If you +wish, you may use spaces between the hyphens or asterisks. Each of the +following lines will produce a horizontal rule:

    + +
    * * *
    +
    +***
    +
    +*****
    +
    +- - -
    +
    +---------------------------------------
    +
    + +
    + +

    Span Elements

    + + + +

    Markdown supports two style of links: inline and reference.

    + +

    In both styles, the link text is delimited by [square brackets].

    + +

    To create an inline link, use a set of regular parentheses immediately +after the link text's closing square bracket. Inside the parentheses, +put the URL where you want the link to point, along with an optional +title for the link, surrounded in quotes. For example:

    + +
    This is [an example](http://example.com/ "Title") inline link.
    +
    +[This link](http://example.net/) has no title attribute.
    +
    + +

    Will produce:

    + +
    <p>This is <a href="http://example.com/" title="Title">
    +an example</a> inline link.</p>
    +
    +<p><a href="http://example.net/">This link</a> has no
    +title attribute.</p>
    +
    + +

    If you're referring to a local resource on the same server, you can +use relative paths:

    + +
    See my [About](/about/) page for details.   
    +
    + +

    Reference-style links use a second set of square brackets, inside +which you place a label of your choosing to identify the link:

    + +
    This is [an example][id] reference-style link.
    +
    + +

    You can optionally use a space to separate the sets of brackets:

    + +
    This is [an example] [id] reference-style link.
    +
    + +

    Then, anywhere in the document, you define your link label like this, +on a line by itself:

    + +
    [id]: http://example.com/  "Optional Title Here"
    +
    + +

    That is:

    + +
      +
    • Square brackets containing the link identifier (optionally +indented from the left margin using up to three spaces);
    • +
    • followed by a colon;
    • +
    • followed by one or more spaces (or tabs);
    • +
    • followed by the URL for the link;
    • +
    • optionally followed by a title attribute for the link, enclosed +in double or single quotes, or enclosed in parentheses.
    • +
    + +

    The following three link definitions are equivalent:

    + +
    [foo]: http://example.com/  "Optional Title Here"
    +[foo]: http://example.com/  'Optional Title Here'
    +[foo]: http://example.com/  (Optional Title Here)
    +
    + +

    Note: There is a known bug in Markdown.pl 1.0.1 which prevents +single quotes from being used to delimit link titles.

    + +

    The link URL may, optionally, be surrounded by angle brackets:

    + +
    [id]: <http://example.com/>  "Optional Title Here"
    +
    + +

    You can put the title attribute on the next line and use extra spaces +or tabs for padding, which tends to look better with longer URLs:

    + +
    [id]: http://example.com/longish/path/to/resource/here
    +    "Optional Title Here"
    +
    + +

    Link definitions are only used for creating links during Markdown +processing, and are stripped from your document in the HTML output.

    + +

    Link definition names may consist of letters, numbers, spaces, and +punctuation -- but they are not case sensitive. E.g. these two +links:

    + +
    [link text][a]
    +[link text][A]
    +
    + +

    are equivalent.

    + +

    The implicit link name shortcut allows you to omit the name of the +link, in which case the link text itself is used as the name. +Just use an empty set of square brackets -- e.g., to link the word +"Google" to the google.com web site, you could simply write:

    + +
    [Google][]
    +
    + +

    And then define the link:

    + +
    [Google]: http://google.com/
    +
    + +

    Because link names may contain spaces, this shortcut even works for +multiple words in the link text:

    + +
    Visit [Daring Fireball][] for more information.
    +
    + +

    And then define the link:

    + +
    [Daring Fireball]: http://daringfireball.net/
    +
    + +

    Link definitions can be placed anywhere in your Markdown document. I +tend to put them immediately after each paragraph in which they're +used, but if you want, you can put them all at the end of your +document, sort of like footnotes.

    + +

    Here's an example of reference links in action:

    + +
    I get 10 times more traffic from [Google] [1] than from
    +[Yahoo] [2] or [MSN] [3].
    +
    +  [1]: http://google.com/        "Google"
    +  [2]: http://search.yahoo.com/  "Yahoo Search"
    +  [3]: http://search.msn.com/    "MSN Search"
    +
    + +

    Using the implicit link name shortcut, you could instead write:

    + +
    I get 10 times more traffic from [Google][] than from
    +[Yahoo][] or [MSN][].
    +
    +  [google]: http://google.com/        "Google"
    +  [yahoo]:  http://search.yahoo.com/  "Yahoo Search"
    +  [msn]:    http://search.msn.com/    "MSN Search"
    +
    + +

    Both of the above examples will produce the following HTML output:

    + +
    <p>I get 10 times more traffic from <a href="http://google.com/"
    +title="Google">Google</a> than from
    +<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
    +or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
    +
    + +

    For comparison, here is the same paragraph written using +Markdown's inline link style:

    + +
    I get 10 times more traffic from [Google](http://google.com/ "Google")
    +than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
    +[MSN](http://search.msn.com/ "MSN Search").
    +
    + +

    The point of reference-style links is not that they're easier to +write. The point is that with reference-style links, your document +source is vastly more readable. Compare the above examples: using +reference-style links, the paragraph itself is only 81 characters +long; with inline-style links, it's 176 characters; and as raw HTML, +it's 234 characters. In the raw HTML, there's more markup than there +is text.

    + +

    With Markdown's reference-style links, a source document much more +closely resembles the final output, as rendered in a browser. By +allowing you to move the markup-related metadata out of the paragraph, +you can add links without interrupting the narrative flow of your +prose.

    + +

    Emphasis

    + +

    Markdown treats asterisks (*) and underscores (_) as indicators of +emphasis. Text wrapped with one * or _ will be wrapped with an +HTML <em> tag; double *'s or _'s will be wrapped with an HTML +<strong> tag. E.g., this input:

    + +
    *single asterisks*
    +
    +_single underscores_
    +
    +**double asterisks**
    +
    +__double underscores__
    +
    + +

    will produce:

    + +
    <em>single asterisks</em>
    +
    +<em>single underscores</em>
    +
    +<strong>double asterisks</strong>
    +
    +<strong>double underscores</strong>
    +
    + +

    You can use whichever style you prefer; the lone restriction is that +the same character must be used to open and close an emphasis span.

    + +

    Emphasis can be used in the middle of a word:

    + +
    un*frigging*believable
    +
    + +

    But if you surround an * or _ with spaces, it'll be treated as a +literal asterisk or underscore.

    + +

    To produce a literal asterisk or underscore at a position where it +would otherwise be used as an emphasis delimiter, you can backslash +escape it:

    + +
    \*this text is surrounded by literal asterisks\*
    +
    + +

    Code

    + +

    To indicate a span of code, wrap it with backtick quotes (`). +Unlike a pre-formatted code block, a code span indicates code within a +normal paragraph. For example:

    + +
    Use the `printf()` function.
    +
    + +

    will produce:

    + +
    <p>Use the <code>printf()</code> function.</p>
    +
    + +

    To include a literal backtick character within a code span, you can use +multiple backticks as the opening and closing delimiters:

    + +
    ``There is a literal backtick (`) here.``
    +
    + +

    which will produce this:

    + +
    <p><code>There is a literal backtick (`) here.</code></p>
    +
    + +

    The backtick delimiters surrounding a code span may include spaces -- +one after the opening, one before the closing. This allows you to place +literal backtick characters at the beginning or end of a code span:

    + +
    A single backtick in a code span: `` ` ``
    +
    +A backtick-delimited string in a code span: `` `foo` ``
    +
    + +

    will produce:

    + +
    <p>A single backtick in a code span: <code>`</code></p>
    +
    +<p>A backtick-delimited string in a code span: <code>`foo`</code></p>
    +
    + +

    With a code span, ampersands and angle brackets are encoded as HTML +entities automatically, which makes it easy to include example HTML +tags. Markdown will turn this:

    + +
    Please don't use any `<blink>` tags.
    +
    + +

    into:

    + +
    <p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
    +
    + +

    You can write this:

    + +
    `&#8212;` is the decimal-encoded equivalent of `&mdash;`.
    +
    + +

    to produce:

    + +
    <p><code>&amp;#8212;</code> is the decimal-encoded
    +equivalent of <code>&amp;mdash;</code>.</p>
    +
    + +

    Images

    + +

    Admittedly, it's fairly difficult to devise a "natural" syntax for +placing images into a plain text document format.

    + +

    Markdown uses an image syntax that is intended to resemble the syntax +for links, allowing for two styles: inline and reference.

    + +

    Inline image syntax looks like this:

    + +
    ![Alt text](/path/to/img.jpg)
    +
    +![Alt text](/path/to/img.jpg "Optional title")
    +
    + +

    That is:

    + +
      +
    • An exclamation mark: !;
    • +
    • followed by a set of square brackets, containing the alt +attribute text for the image;
    • +
    • followed by a set of parentheses, containing the URL or path to +the image, and an optional title attribute enclosed in double +or single quotes.
    • +
    + +

    Reference-style image syntax looks like this:

    + +
    ![Alt text][id]
    +
    + +

    Where "id" is the name of a defined image reference. Image references +are defined using syntax identical to link references:

    + +
    [id]: url/to/image  "Optional title attribute"
    +
    + +

    As of this writing, Markdown has no syntax for specifying the +dimensions of an image; if this is important to you, you can simply +use regular HTML <img> tags.

    + +
    + +

    Miscellaneous

    + + + +

    Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:

    + +
    <http://example.com/>
    +
    + +

    Markdown will turn this into:

    + +
    <a href="http://example.com/">http://example.com/</a>
    +
    + +

    Automatic links for email addresses work similarly, except that +Markdown will also perform a bit of randomized decimal and hex +entity-encoding to help obscure your address from address-harvesting +spambots. For example, Markdown will turn this:

    + +
    <address@example.com>
    +
    + +

    into something like this:

    + +
    <a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
    +&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
    +&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
    +&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
    +
    + +

    which will render in a browser as a clickable link to "address@example.com".

    + +

    (This sort of entity-encoding trick will indeed fool many, if not +most, address-harvesting bots, but it definitely won't fool all of +them. It's better than nothing, but an address published in this way +will probably eventually start receiving spam.)

    + +

    Backslash Escapes

    + +

    Markdown allows you to use backslash escapes to generate literal +characters which would otherwise have special meaning in Markdown's +formatting syntax. For example, if you wanted to surround a word +with literal asterisks (instead of an HTML <em> tag), you can use +backslashes before the asterisks, like this:

    + +
    \*literal asterisks\*
    +
    + +

    Markdown provides backslash escapes for the following characters:

    + +
    \   backslash
    +`   backtick
    +*   asterisk
    +_   underscore
    +{}  curly braces
    +[]  square brackets
    +()  parentheses
    +#   hash mark
    ++   plus sign
    +-   minus sign (hyphen)
    +.   dot
    +!   exclamation mark
    +
    diff --git a/tests/MMD6Tests/Markdown Syntax.text b/tests/MMD6Tests/Markdown Syntax.text new file mode 100644 index 0000000..487035a --- /dev/null +++ b/tests/MMD6Tests/Markdown Syntax.text @@ -0,0 +1,895 @@ +# Markdown: Syntax # + + + + +* [Overview](#overview) + * [Philosophy](#philosophy) + * [Inline HTML](#html) + * [Automatic Escaping for Special Characters](#autoescape) +* [Block Elements](#block) + * [Paragraphs and Line Breaks](#p) + * [Headers](#header) + * [Blockquotes](#blockquote) + * [Lists](#list) + * [Code Blocks](#precode) + * [Horizontal Rules](#hr) +* [Span Elements](#span) + * [Links](#link) + * [Emphasis](#em) + * [Code](#code) + * [Images](#img) +* [Miscellaneous](#misc) + * [Backslash Escapes](#backslash) + * [Automatic Links](#autolink) + + +**Note:** This document is itself written using Markdown; you +can [see the source for it by adding '.text' to the URL][src]. + + [src]: /projects/markdown/syntax.text + +* * * + +

    Overview

    + +

    Philosophy

    + +Markdown is intended to be as easy-to-read and easy-to-write as is feasible. + +Readability, however, is emphasized above all else. A Markdown-formatted +document should be publishable as-is, as plain text, without looking +like it's been marked up with tags or formatting instructions. While +Markdown's syntax has been influenced by several existing text-to-HTML +filters -- including [Setext][1], [atx][2], [Textile][3], [reStructuredText][4], +[Grutatext][5], and [EtText][6] -- the single biggest source of +inspiration for Markdown's syntax is the format of plain text email. + + [1]: http://docutils.sourceforge.net/mirror/setext.html + [2]: http://www.aaronsw.com/2002/atx/ + [3]: http://textism.com/tools/textile/ + [4]: http://docutils.sourceforge.net/rst.html + [5]: http://www.triptico.com/software/grutatxt.html + [6]: http://ettext.taint.org/doc/ + +To this end, Markdown's syntax is comprised entirely of punctuation +characters, which punctuation characters have been carefully chosen so +as to look like what they mean. E.g., asterisks around a word actually +look like \*emphasis\*. Markdown lists look like, well, lists. Even +blockquotes look like quoted passages of text, assuming you've ever +used email. + + + +

    Inline HTML

    + +Markdown's syntax is intended for one purpose: to be used as a +format for *writing* for the web. + +Markdown is not a replacement for HTML, or even close to it. Its +syntax is very small, corresponding only to a very small subset of +HTML tags. The idea is *not* to create a syntax that makes it easier +to insert HTML tags. In my opinion, HTML tags are already easy to +insert. The idea for Markdown is to make it easy to read, write, and +edit prose. HTML is a *publishing* format; Markdown is a *writing* +format. Thus, Markdown's formatting syntax only addresses issues that +can be conveyed in plain text. + +For any markup that is not covered by Markdown's syntax, you simply +use HTML itself. There's no need to preface it or delimit it to +indicate that you're switching from Markdown to HTML; you just use +the tags. + +The only restrictions are that block-level HTML elements -- e.g. `
    `, +``, `
    `, `

    `, etc. -- must be separated from surrounding +content by blank lines, and the start and end tags of the block should +not be indented with tabs or spaces. Markdown is smart enough not +to add extra (unwanted) `

    ` tags around HTML block-level tags. + +For example, to add an HTML table to a Markdown article: + + This is a regular paragraph. + +

    + + + +
    Foo
    + + This is another regular paragraph. + +Note that Markdown formatting syntax is not processed within block-level +HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an +HTML block. + +Span-level HTML tags -- e.g. ``, ``, or `` -- can be +used anywhere in a Markdown paragraph, list item, or header. If you +want, you can even use HTML tags instead of Markdown formatting; e.g. if +you'd prefer to use HTML `` or `` tags instead of Markdown's +link or image syntax, go right ahead. + +Unlike block-level HTML tags, Markdown syntax *is* processed within +span-level tags. + + +

    Automatic Escaping for Special Characters

    + +In HTML, there are two characters that demand special treatment: `<` +and `&`. Left angle brackets are used to start tags; ampersands are +used to denote HTML entities. If you want to use them as literal +characters, you must escape them as entities, e.g. `<`, and +`&`. + +Ampersands in particular are bedeviling for web writers. If you want to +write about 'AT&T', you need to write '`AT&T`'. You even need to +escape ampersands within URLs. Thus, if you want to link to: + + http://images.google.com/images?num=30&q=larry+bird + +you need to encode the URL as: + + http://images.google.com/images?num=30&q=larry+bird + +in your anchor tag `href` attribute. Needless to say, this is easy to +forget, and is probably the single most common source of HTML validation +errors in otherwise well-marked-up web sites. + +Markdown allows you to use these characters naturally, taking care of +all the necessary escaping for you. If you use an ampersand as part of +an HTML entity, it remains unchanged; otherwise it will be translated +into `&`. + +So, if you want to include a copyright symbol in your article, you can write: + + © + +and Markdown will leave it alone. But if you write: + + AT&T + +Markdown will translate it to: + + AT&T + +Similarly, because Markdown supports [inline HTML](#html), if you use +angle brackets as delimiters for HTML tags, Markdown will treat them as +such. But if you write: + + 4 < 5 + +Markdown will translate it to: + + 4 < 5 + +However, inside Markdown code spans and blocks, angle brackets and +ampersands are *always* encoded automatically. This makes it easy to use +Markdown to write about HTML code. (As opposed to raw HTML, which is a +terrible format for writing about HTML syntax, because every single `<` +and `&` in your example code needs to be escaped.) + + +* * * + + +

    Block Elements

    + + +

    Paragraphs and Line Breaks

    + +A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing but spaces or tabs is considered +blank.) Normal paragraphs should not be indented with spaces or tabs. + +The implication of the "one or more consecutive lines of text" rule is +that Markdown supports "hard-wrapped" text paragraphs. This differs +significantly from most other text-to-HTML formatters (including Movable +Type's "Convert Line Breaks" option) which translate every line break +character in a paragraph into a `
    ` tag. + +When you *do* want to insert a `
    ` break tag using Markdown, you +end a line with two or more spaces, then type return. + +Yes, this takes a tad more effort to create a `
    `, but a simplistic +"every line break is a `
    `" rule wouldn't work for Markdown. +Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l] +work best -- and look better -- when you format them with hard breaks. + + [bq]: #blockquote + [l]: #list + + + + + +Markdown supports two styles of headers, [Setext][1] and [atx][2]. + +Setext-style headers are "underlined" using equal signs (for first-level +headers) and dashes (for second-level headers). For example: + + This is an H1 + ============= + + This is an H2 + ------------- + +Any number of underlining `=`'s or `-`'s will work. + +Atx-style headers use 1-6 hash characters at the start of the line, +corresponding to header levels 1-6. For example: + + # This is an H1 + + ## This is an H2 + + ###### This is an H6 + +Optionally, you may "close" atx-style headers. This is purely +cosmetic -- you can use this if you think it looks better. The +closing hashes don't even need to match the number of hashes +used to open the header. (The number of opening hashes +determines the header level.) : + + # This is an H1 # + + ## This is an H2 ## + + ### This is an H3 ###### + + +

    Blockquotes

    + +Markdown uses email-style `>` characters for blockquoting. If you're +familiar with quoting passages of text in an email message, then you +know how to create a blockquote in Markdown. It looks best if you hard +wrap the text and put a `>` before every line: + + > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, + > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. + > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. + > + > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse + > id sem consectetuer libero luctus adipiscing. + +Markdown allows you to be lazy and only put the `>` before the first +line of a hard-wrapped paragraph: + + > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, + consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. + Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. + + > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse + id sem consectetuer libero luctus adipiscing. + +Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by +adding additional levels of `>`: + + > This is the first level of quoting. + > + > > This is nested blockquote. + > + > Back to the first level. + +Blockquotes can contain other Markdown elements, including headers, lists, +and code blocks: + + > ## This is a header. + > + > 1. This is the first list item. + > 2. This is the second list item. + > + > Here's some example code: + > + > return shell_exec("echo $input | $markdown_script"); + +Any decent text editor should make email-style quoting easy. For +example, with BBEdit, you can make a selection and choose Increase +Quote Level from the Text menu. + + +

    Lists

    + +Markdown supports ordered (numbered) and unordered (bulleted) lists. + +Unordered lists use asterisks, pluses, and hyphens -- interchangably +-- as list markers: + + * Red + * Green + * Blue + +is equivalent to: + + + Red + + Green + + Blue + +and: + + - Red + - Green + - Blue + +Ordered lists use numbers followed by periods: + + 1. Bird + 2. McHale + 3. Parish + +It's important to note that the actual numbers you use to mark the +list have no effect on the HTML output Markdown produces. The HTML +Markdown produces from the above list is: + +
      +
    1. Bird
    2. +
    3. McHale
    4. +
    5. Parish
    6. +
    + +If you instead wrote the list in Markdown like this: + + 1. Bird + 1. McHale + 1. Parish + +or even: + + 3. Bird + 1. McHale + 8. Parish + +you'd get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don't have to. + +If you do use lazy list numbering, however, you should still start the +list with the number 1. At some point in the future, Markdown may support +starting ordered lists at an arbitrary number. + +List markers typically start at the left margin, but may be indented by +up to three spaces. List markers must be followed by one or more spaces +or a tab. + +To make lists look nice, you can wrap items with hanging indents: + + * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. + * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. + +But if you want to be lazy, you don't have to: + + * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. + * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. + +If list items are separated by blank lines, Markdown will wrap the +items in `

    ` tags in the HTML output. For example, this input: + + * Bird + * Magic + +will turn into: + +

      +
    • Bird
    • +
    • Magic
    • +
    + +But this: + + * Bird + + * Magic + +will turn into: + +
      +
    • Bird

    • +
    • Magic

    • +
    + +List items may consist of multiple paragraphs. Each subsequent +paragraph in a list item must be indented by either 4 spaces +or one tab: + + 1. This is a list item with two paragraphs. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. Aliquam hendrerit + mi posuere lectus. + + Vestibulum enim wisi, viverra nec, fringilla in, laoreet + vitae, risus. Donec sit amet nisl. Aliquam semper ipsum + sit amet velit. + + 2. Suspendisse id sem consectetuer libero luctus adipiscing. + +It looks nice if you indent every line of the subsequent +paragraphs, but here again, Markdown will allow you to be +lazy: + + * This is a list item with two paragraphs. + + This is the second paragraph in the list item. You're + only required to indent the first line. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. + + * Another item in the same list. + +To put a blockquote within a list item, the blockquote's `>` +delimiters need to be indented: + + * A list item with a blockquote: + + > This is a blockquote + > inside a list item. + +To put a code block within a list item, the code block needs +to be indented *twice* -- 8 spaces or two tabs: + + * A list item with a code block: + + + + +It's worth noting that it's possible to trigger an ordered list by +accident, by writing something like this: + + 1986. What a great season. + +In other words, a *number-period-space* sequence at the beginning of a +line. To avoid this, you can backslash-escape the period: + + 1986\. What a great season. + + + +

    Code Blocks

    + +Pre-formatted code blocks are used for writing about programming or +markup source code. Rather than forming normal paragraphs, the lines +of a code block are interpreted literally. Markdown wraps a code block +in both `
    ` and `` tags.
    +
    +To produce a code block in Markdown, simply indent every line of the
    +block by at least 4 spaces or 1 tab. For example, given this input:
    +
    +    This is a normal paragraph:
    +
    +        This is a code block.
    +
    +Markdown will generate:
    +
    +    

    This is a normal paragraph:

    + +
    This is a code block.
    +    
    + +One level of indentation -- 4 spaces or 1 tab -- is removed from each +line of the code block. For example, this: + + Here is an example of AppleScript: + + tell application "Foo" + beep + end tell + +will turn into: + +

    Here is an example of AppleScript:

    + +
    tell application "Foo"
    +        beep
    +    end tell
    +    
    + +A code block continues until it reaches a line that is not indented +(or the end of the article). + +Within a code block, ampersands (`&`) and angle brackets (`<` and `>`) +are automatically converted into HTML entities. This makes it very +easy to include example HTML source code using Markdown -- just paste +it and indent it, and Markdown will handle the hassle of encoding the +ampersands and angle brackets. For example, this: + + + +will turn into: + +
    <div class="footer">
    +        &copy; 2004 Foo Corporation
    +    </div>
    +    
    + +Regular Markdown syntax is not processed within code blocks. E.g., +asterisks are just literal asterisks within a code block. This means +it's also easy to use Markdown to write about Markdown's own syntax. + + + +

    Horizontal Rules

    + +You can produce a horizontal rule tag (`
    `) by placing three or +more hyphens, asterisks, or underscores on a line by themselves. If you +wish, you may use spaces between the hyphens or asterisks. Each of the +following lines will produce a horizontal rule: + + * * * + + *** + + ***** + + - - - + + --------------------------------------- + + +* * * + +

    Span Elements

    + + + +Markdown supports two style of links: *inline* and *reference*. + +In both styles, the link text is delimited by [square brackets]. + +To create an inline link, use a set of regular parentheses immediately +after the link text's closing square bracket. Inside the parentheses, +put the URL where you want the link to point, along with an *optional* +title for the link, surrounded in quotes. For example: + + This is [an example](http://example.com/ "Title") inline link. + + [This link](http://example.net/) has no title attribute. + +Will produce: + +

    This is + an example inline link.

    + +

    This link has no + title attribute.

    + +If you're referring to a local resource on the same server, you can +use relative paths: + + See my [About](/about/) page for details. + +Reference-style links use a second set of square brackets, inside +which you place a label of your choosing to identify the link: + + This is [an example][id] reference-style link. + +You can optionally use a space to separate the sets of brackets: + + This is [an example] [id] reference-style link. + +Then, anywhere in the document, you define your link label like this, +on a line by itself: + + [id]: http://example.com/ "Optional Title Here" + +That is: + +* Square brackets containing the link identifier (optionally + indented from the left margin using up to three spaces); +* followed by a colon; +* followed by one or more spaces (or tabs); +* followed by the URL for the link; +* optionally followed by a title attribute for the link, enclosed + in double or single quotes, or enclosed in parentheses. + +The following three link definitions are equivalent: + + [foo]: http://example.com/ "Optional Title Here" + [foo]: http://example.com/ 'Optional Title Here' + [foo]: http://example.com/ (Optional Title Here) + +**Note:** There is a known bug in Markdown.pl 1.0.1 which prevents +single quotes from being used to delimit link titles. + +The link URL may, optionally, be surrounded by angle brackets: + + [id]: "Optional Title Here" + +You can put the title attribute on the next line and use extra spaces +or tabs for padding, which tends to look better with longer URLs: + + [id]: http://example.com/longish/path/to/resource/here + "Optional Title Here" + +Link definitions are only used for creating links during Markdown +processing, and are stripped from your document in the HTML output. + +Link definition names may consist of letters, numbers, spaces, and +punctuation -- but they are *not* case sensitive. E.g. these two +links: + + [link text][a] + [link text][A] + +are equivalent. + +The *implicit link name* shortcut allows you to omit the name of the +link, in which case the link text itself is used as the name. +Just use an empty set of square brackets -- e.g., to link the word +"Google" to the google.com web site, you could simply write: + + [Google][] + +And then define the link: + + [Google]: http://google.com/ + +Because link names may contain spaces, this shortcut even works for +multiple words in the link text: + + Visit [Daring Fireball][] for more information. + +And then define the link: + + [Daring Fireball]: http://daringfireball.net/ + +Link definitions can be placed anywhere in your Markdown document. I +tend to put them immediately after each paragraph in which they're +used, but if you want, you can put them all at the end of your +document, sort of like footnotes. + +Here's an example of reference links in action: + + I get 10 times more traffic from [Google] [1] than from + [Yahoo] [2] or [MSN] [3]. + + [1]: http://google.com/ "Google" + [2]: http://search.yahoo.com/ "Yahoo Search" + [3]: http://search.msn.com/ "MSN Search" + +Using the implicit link name shortcut, you could instead write: + + I get 10 times more traffic from [Google][] than from + [Yahoo][] or [MSN][]. + + [google]: http://google.com/ "Google" + [yahoo]: http://search.yahoo.com/ "Yahoo Search" + [msn]: http://search.msn.com/ "MSN Search" + +Both of the above examples will produce the following HTML output: + +

    I get 10 times more traffic from Google than from + Yahoo + or MSN.

    + +For comparison, here is the same paragraph written using +Markdown's inline link style: + + I get 10 times more traffic from [Google](http://google.com/ "Google") + than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or + [MSN](http://search.msn.com/ "MSN Search"). + +The point of reference-style links is not that they're easier to +write. The point is that with reference-style links, your document +source is vastly more readable. Compare the above examples: using +reference-style links, the paragraph itself is only 81 characters +long; with inline-style links, it's 176 characters; and as raw HTML, +it's 234 characters. In the raw HTML, there's more markup than there +is text. + +With Markdown's reference-style links, a source document much more +closely resembles the final output, as rendered in a browser. By +allowing you to move the markup-related metadata out of the paragraph, +you can add links without interrupting the narrative flow of your +prose. + + +

    Emphasis

    + +Markdown treats asterisks (`*`) and underscores (`_`) as indicators of +emphasis. Text wrapped with one `*` or `_` will be wrapped with an +HTML `` tag; double `*`'s or `_`'s will be wrapped with an HTML +`` tag. E.g., this input: + + *single asterisks* + + _single underscores_ + + **double asterisks** + + __double underscores__ + +will produce: + + single asterisks + + single underscores + + double asterisks + + double underscores + +You can use whichever style you prefer; the lone restriction is that +the same character must be used to open and close an emphasis span. + +Emphasis can be used in the middle of a word: + + un*frigging*believable + +But if you surround an `*` or `_` with spaces, it'll be treated as a +literal asterisk or underscore. + +To produce a literal asterisk or underscore at a position where it +would otherwise be used as an emphasis delimiter, you can backslash +escape it: + + \*this text is surrounded by literal asterisks\* + + + +

    Code

    + +To indicate a span of code, wrap it with backtick quotes (`` ` ``). +Unlike a pre-formatted code block, a code span indicates code within a +normal paragraph. For example: + + Use the `printf()` function. + +will produce: + +

    Use the printf() function.

    + +To include a literal backtick character within a code span, you can use +multiple backticks as the opening and closing delimiters: + + ``There is a literal backtick (`) here.`` + +which will produce this: + +

    There is a literal backtick (`) here.

    + +The backtick delimiters surrounding a code span may include spaces -- +one after the opening, one before the closing. This allows you to place +literal backtick characters at the beginning or end of a code span: + + A single backtick in a code span: `` ` `` + + A backtick-delimited string in a code span: `` `foo` `` + +will produce: + +

    A single backtick in a code span: `

    + +

    A backtick-delimited string in a code span: `foo`

    + +With a code span, ampersands and angle brackets are encoded as HTML +entities automatically, which makes it easy to include example HTML +tags. Markdown will turn this: + + Please don't use any `` tags. + +into: + +

    Please don't use any <blink> tags.

    + +You can write this: + + `—` is the decimal-encoded equivalent of `—`. + +to produce: + +

    &#8212; is the decimal-encoded + equivalent of &mdash;.

    + + + +

    Images

    + +Admittedly, it's fairly difficult to devise a "natural" syntax for +placing images into a plain text document format. + +Markdown uses an image syntax that is intended to resemble the syntax +for links, allowing for two styles: *inline* and *reference*. + +Inline image syntax looks like this: + + ![Alt text](/path/to/img.jpg) + + ![Alt text](/path/to/img.jpg "Optional title") + +That is: + +* An exclamation mark: `!`; +* followed by a set of square brackets, containing the `alt` + attribute text for the image; +* followed by a set of parentheses, containing the URL or path to + the image, and an optional `title` attribute enclosed in double + or single quotes. + +Reference-style image syntax looks like this: + + ![Alt text][id] + +Where "id" is the name of a defined image reference. Image references +are defined using syntax identical to link references: + + [id]: url/to/image "Optional title attribute" + +As of this writing, Markdown has no syntax for specifying the +dimensions of an image; if this is important to you, you can simply +use regular HTML `` tags. + + +* * * + + +

    Miscellaneous

    + + + +Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this: + + + +Markdown will turn this into: + + http://example.com/ + +Automatic links for email addresses work similarly, except that +Markdown will also perform a bit of randomized decimal and hex +entity-encoding to help obscure your address from address-harvesting +spambots. For example, Markdown will turn this: + + + +into something like this: + + address@exa + mple.com + +which will render in a browser as a clickable link to "address@example.com". + +(This sort of entity-encoding trick will indeed fool many, if not +most, address-harvesting bots, but it definitely won't fool all of +them. It's better than nothing, but an address published in this way +will probably eventually start receiving spam.) + + + +

    Backslash Escapes

    + +Markdown allows you to use backslash escapes to generate literal +characters which would otherwise have special meaning in Markdown's +formatting syntax. For example, if you wanted to surround a word +with literal asterisks (instead of an HTML `` tag), you can use +backslashes before the asterisks, like this: + + \*literal asterisks\* + +Markdown provides backslash escapes for the following characters: + + \ backslash + ` backtick + * asterisk + _ underscore + {} curly braces + [] square brackets + () parentheses + # hash mark + + plus sign + - minus sign (hyphen) + . dot + ! exclamation mark diff --git a/tests/MMD6Tests/Math.html b/tests/MMD6Tests/Math.html new file mode 100644 index 0000000..7791c2c --- /dev/null +++ b/tests/MMD6Tests/Math.html @@ -0,0 +1,51 @@ +

    foo \({e}^{i\pi }+1=0\) bar

    + +

    \[ {x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a} \]

    + +

    foo \({e}^{i\pi }+1=0\) bar

    + +

    foo \({e}^{i\pi }+1=0\), bar

    + +

    \[{x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}\]

    + +

    5

    + +

    foo $ {e}^{i\pi }+1=0$ bar

    + +

    $$ {x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$

    + +

    foo ${e}^{i\pi }+1=0 $ bar

    + +

    $${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a} $$

    + +

    foo a${e}^{i\pi }+1=0$ bar

    + +

    10

    + +

    a$${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$

    + +

    foo ${e}^{i\pi }+1=0$b bar

    + +

    $${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$b

    + +

    \(\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\)

    + +

    \[\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\]

    + +

    15

    + +

    \(\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\)

    + +

    \[\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\]

    + +

    \(\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}\)

    + +

    \[\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}\]

    + +

    \(\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}\)

    + +

    20

    + +

    \[\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}\]

    + +

    \begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}

    diff --git a/tests/MMD6Tests/Math.htmlc b/tests/MMD6Tests/Math.htmlc new file mode 100644 index 0000000..627ef2f --- /dev/null +++ b/tests/MMD6Tests/Math.htmlc @@ -0,0 +1,51 @@ +

    foo \({e}^{i\pi }+1=0\) bar

    + +

    \[ {x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a} \]

    + +

    foo ${e}^{i\pi }+1=0$ bar

    + +

    foo ${e}^{i\pi }+1=0$, bar

    + +

    $${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$

    + +

    5

    + +

    foo $ {e}^{i\pi }+1=0$ bar

    + +

    $$ {x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$

    + +

    foo ${e}^{i\pi }+1=0 $ bar

    + +

    $${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a} $$

    + +

    foo a${e}^{i\pi }+1=0$ bar

    + +

    10

    + +

    a$${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$

    + +

    foo ${e}^{i\pi }+1=0$b bar

    + +

    $${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$b

    + +

    $\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}$

    + +

    $$\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}$$

    + +

    15

    + +

    \(\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\)

    + +

    \[\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\]

    + +

    $\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}$

    + +

    $$\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}$$

    + +

    \(\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}\)

    + +

    20

    + +

    \[\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}\]

    + +

    \begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}

    diff --git a/tests/MMD6Tests/Math.text b/tests/MMD6Tests/Math.text new file mode 100644 index 0000000..8e6788b --- /dev/null +++ b/tests/MMD6Tests/Math.text @@ -0,0 +1,51 @@ +foo \\({e}^{i\pi }+1=0\\) bar + +\\[ {x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a} \\] + +foo ${e}^{i\pi }+1=0$ bar + +foo ${e}^{i\pi }+1=0$, bar + +$${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$ + +5 + +foo $ {e}^{i\pi }+1=0$ bar + +$$ {x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$ + +foo ${e}^{i\pi }+1=0 $ bar + +$${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a} $$ + +foo a${e}^{i\pi }+1=0$ bar + +10 + +a$${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$ + +foo ${e}^{i\pi }+1=0$b bar + +$${x}_{1,2}=\frac{-b\pm \sqrt{{b}^{2}-4ac}}{2a}$$b + +$\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}$ + +$$\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}$$ + +15 + +\\(\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\\) + +\\[\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\\] + +$\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}$ + +$$\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}$$ + +\\(\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}\\) + +20 + +\\[\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}\\] + +`\begin{equation}\nabla \times \mathbf{E} = - \frac{\partial \mathbf{B}}{\partial t}\end{equation}` diff --git a/tests/MMD6Tests/Nested Lists.html b/tests/MMD6Tests/Nested Lists.html new file mode 100644 index 0000000..e0e954c --- /dev/null +++ b/tests/MMD6Tests/Nested Lists.html @@ -0,0 +1,99 @@ +
      +
    • foo + +
        +
      • bar
      • +
    • +
    • foo + +
        +
      • bar
      • +
    • +
    • foo + +
        +
      • bar
      • +
    • +
    + +

    bar

    + +
      +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    + +

    bar

    + +
      +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    + +

    5

    + +
      +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    + +

    bar

    + +
      +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    + +

    bar

    diff --git a/tests/MMD6Tests/Nested Lists.htmlc b/tests/MMD6Tests/Nested Lists.htmlc new file mode 100644 index 0000000..e0e954c --- /dev/null +++ b/tests/MMD6Tests/Nested Lists.htmlc @@ -0,0 +1,99 @@ +
      +
    • foo + +
        +
      • bar
      • +
    • +
    • foo + +
        +
      • bar
      • +
    • +
    • foo + +
        +
      • bar
      • +
    • +
    + +

    bar

    + +
      +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    + +

    bar

    + +
      +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    + +

    5

    + +
      +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    + +

    bar

    + +
      +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    • foo

      + +
        +
      • bar
      • +
    • +
    + +

    bar

    diff --git a/tests/MMD6Tests/Nested Lists.text b/tests/MMD6Tests/Nested Lists.text new file mode 100644 index 0000000..f9d5c1e --- /dev/null +++ b/tests/MMD6Tests/Nested Lists.text @@ -0,0 +1,58 @@ +* foo + * bar +* foo + * bar +* foo + * bar + +bar + +* foo + + * bar +* foo + + * bar +* foo + + * bar + +bar + +* foo + * bar + +* foo + * bar + +* foo + * bar + +5 + +* foo + * bar + +* foo + * bar + +* foo + * bar + + +bar + +* foo + + * bar + +* foo + + * bar + +* foo + + * bar + +bar + diff --git a/tests/MMD6Tests/Reference Footnotes.html b/tests/MMD6Tests/Reference Footnotes.html new file mode 100644 index 0000000..eeb3645 --- /dev/null +++ b/tests/MMD6Tests/Reference Footnotes.html @@ -0,0 +1,20 @@ +

    Reference.[1]

    + +

    Reference.[2]

    + +
    +
    +
      + +
    1. +

      This is a short footnote.  ↩

      +
    2. + +
    3. +

      This is a longer footnote. +With two lines.  ↩

      +
    4. + +
    +
    + diff --git a/tests/MMD6Tests/Reference Footnotes.htmlc b/tests/MMD6Tests/Reference Footnotes.htmlc new file mode 100644 index 0000000..e7a323c --- /dev/null +++ b/tests/MMD6Tests/Reference Footnotes.htmlc @@ -0,0 +1,7 @@ +

    Reference.[^foo]

    + +

    Reference.[^foo2]

    + +

    [^foo]: This is a short footnote. +[^foo2]: This is a longer footnote. +With two lines.

    diff --git a/tests/MMD6Tests/Reference Footnotes.text b/tests/MMD6Tests/Reference Footnotes.text new file mode 100644 index 0000000..c254336 --- /dev/null +++ b/tests/MMD6Tests/Reference Footnotes.text @@ -0,0 +1,8 @@ +Reference.[^foo] + +Reference.[^foo2] + + +[^foo]: This is a *short* footnote. +[^foo2]: This is a longer footnote. +With two lines. diff --git a/tests/MMD6Tests/Reference Images.html b/tests/MMD6Tests/Reference Images.html new file mode 100644 index 0000000..c277d09 --- /dev/null +++ b/tests/MMD6Tests/Reference Images.html @@ -0,0 +1,17 @@ +

    Test foo.

    + +

    Test foo.

    + +

    Test foo.

    + +

    Test foo.

    + +

    Test foo.

    + +

    5

    + +

    Test foo.

    + +

    Test foo.

    + +

    Test foo.

    diff --git a/tests/MMD6Tests/Reference Images.htmlc b/tests/MMD6Tests/Reference Images.htmlc new file mode 100644 index 0000000..c277d09 --- /dev/null +++ b/tests/MMD6Tests/Reference Images.htmlc @@ -0,0 +1,17 @@ +

    Test foo.

    + +

    Test foo.

    + +

    Test foo.

    + +

    Test foo.

    + +

    Test foo.

    + +

    5

    + +

    Test foo.

    + +

    Test foo.

    + +

    Test foo.

    diff --git a/tests/MMD6Tests/Reference Images.text b/tests/MMD6Tests/Reference Images.text new file mode 100644 index 0000000..2eeba99 --- /dev/null +++ b/tests/MMD6Tests/Reference Images.text @@ -0,0 +1,27 @@ +Test ![*foo*][bar]. + +Test ![*foo*][BAR]. + +Test ![*foo*][foobar]. + +Test ![*foo*][foo bar]. + +Test ![*foo*][foo bar]. + +5 + +Test ![*foo*][long]. + +Test ![foo][]. + +Test ![foo]. + + +[foo]: http://test.0/ +[bar]: http://test.1/ +[BAR]: http://test.2/ +[foobar]: http://test.3/ "title" +[foo bar]: http://test.4/ +[foo bar]: http://test.5/ "" +[long]: http://test.6/ +"title" diff --git a/tests/MMD6Tests/Reference Links.html b/tests/MMD6Tests/Reference Links.html new file mode 100644 index 0000000..e6d8f09 --- /dev/null +++ b/tests/MMD6Tests/Reference Links.html @@ -0,0 +1,32 @@ +

    foo.

    + +

    foo.

    + +

    foo.

    + +

    foo.

    + +

    foo.

    + +

    5

    + +

    foo.

    + +

    foo.

    + +

    foo.

    + +

    foo

    + +

    foo

    + +

    10

    + +

    foo

    + +

    foo

    + +

    foo bar

    + +

    foo +bar

    diff --git a/tests/MMD6Tests/Reference Links.htmlc b/tests/MMD6Tests/Reference Links.htmlc new file mode 100644 index 0000000..e6d8f09 --- /dev/null +++ b/tests/MMD6Tests/Reference Links.htmlc @@ -0,0 +1,32 @@ +

    foo.

    + +

    foo.

    + +

    foo.

    + +

    foo.

    + +

    foo.

    + +

    5

    + +

    foo.

    + +

    foo.

    + +

    foo.

    + +

    foo

    + +

    foo

    + +

    10

    + +

    foo

    + +

    foo

    + +

    foo bar

    + +

    foo +bar

    diff --git a/tests/MMD6Tests/Reference Links.text b/tests/MMD6Tests/Reference Links.text new file mode 100644 index 0000000..45cc503 --- /dev/null +++ b/tests/MMD6Tests/Reference Links.text @@ -0,0 +1,42 @@ +[*foo*][bar]. + +[*foo*][BAR]. + +[*foo*][foobar]. + +[*foo*][foo bar]. + +[*foo*][foo bar]. + +5 + +[*foo*][long]. + +[foo][]. + +[foo]. + +[[foo]][bar] + +[[foo]][] + +10 + +[[foo][bar]][] + +[foo][bar] + +[foo] [bar] + +[foo] +[bar] + + +[foo]: http://test.0/ +[bar]: http://test.1/file.txt +[BAR]: http://test.2/ +[foobar]: http://test.3/file.txt "title" +[foo bar]: http://test.4/ +[foo bar]: http://test.5/ "" +[long]: http://test.6/ +"title" diff --git a/tests/MMD6Tests/Smart Quotes.html b/tests/MMD6Tests/Smart Quotes.html new file mode 100644 index 0000000..5fd0b38 --- /dev/null +++ b/tests/MMD6Tests/Smart Quotes.html @@ -0,0 +1,69 @@ +

    ‘foo’

    + +

    “foo”

    + +

    “foo” ‘bar’

    + +

    ‘foo’ “bar”

    + +

    ‘.foo’

    + +

    5

    + +

    "foo".

    + +

    'foo'.

    + +

    `foo'

    + +

    “foo”

    + +

    ```foo'''

    + +

    10

    + +

    “” foo “”?

    + +

    '' foo ''?

    + +

    isn’t

    + +

    foo – bar

    + +

    foo — bar

    + +

    15

    + +

    1–2

    + +

    1–2

    + +

    1—3

    + +

    --

    + +

    ---

    + +

    20

    + +

    + +

    + +

    ...

    + +

    . . .

    + +

    l’année l’année

    + +

    25

    + +

    l’été l’année

    + +

    l’été l’été

    + +

    l’année l’été

    + +

    foo’s and bar’s

    + +

    foo’s and bar’s

    diff --git a/tests/MMD6Tests/Smart Quotes.htmlc b/tests/MMD6Tests/Smart Quotes.htmlc new file mode 100644 index 0000000..73f9ea2 --- /dev/null +++ b/tests/MMD6Tests/Smart Quotes.htmlc @@ -0,0 +1,69 @@ +

    'foo'

    + +

    "foo"

    + +

    "foo" 'bar'

    + +

    'foo' "bar"

    + +

    '.foo'

    + +

    5

    + +

    "foo".

    + +

    'foo'.

    + +

    `foo'

    + +

    ``foo''

    + +

    ```foo'''

    + +

    10

    + +

    "" foo ""?

    + +

    '' foo ''?

    + +

    isn't

    + +

    foo -- bar

    + +

    foo --- bar

    + +

    15

    + +

    1-2

    + +

    1--2

    + +

    1---3

    + +

    --

    + +

    ---

    + +

    20

    + +

    ...

    + +

    . . .

    + +

    ...

    + +

    . . .

    + +

    l'année l'année

    + +

    25

    + +

    l'été l'année

    + +

    l'été l'été

    + +

    l'année l'été

    + +

    foo's and bar's

    + +

    foo's and bar's

    diff --git a/tests/MMD6Tests/Smart Quotes.text b/tests/MMD6Tests/Smart Quotes.text new file mode 100644 index 0000000..64993dc --- /dev/null +++ b/tests/MMD6Tests/Smart Quotes.text @@ -0,0 +1,69 @@ +'foo' + +"foo" + +"foo" 'bar' + +'foo' "bar" + +'.foo' + +5 + +\"foo\". + +\'foo\'. + +`foo' + +``foo'' + +```foo''' + +10 + +"" foo ""? + +'' foo ''? + +isn't + +foo -- bar + +foo --- bar + +15 + +1-2 + +1--2 + +1---3 + +`--` + +`---` + +20 + +... + +. . . + +`...` + +`. . .` + +l'année l'année + +25 + +l'été l'année + +l'été l'été + +l'année l'été + +foo's and bar's + +`foo`'s and `bar`'s diff --git a/tests/MMD6Tests/Superscript.html b/tests/MMD6Tests/Superscript.html new file mode 100644 index 0000000..adbce18 --- /dev/null +++ b/tests/MMD6Tests/Superscript.html @@ -0,0 +1,27 @@ +

    x2.

    + +

    x2xz.

    + +

    x2.

    + +

    x2 3^

    + +

    x2 32

    + +

    5

    + +

    xz.

    + +

    xxyz.

    + +

    zz.

    + +

    ~/Library/MultiMarkdown

    + +

    ^test

    + +

    10

    + +

    x^y

    + +

    x~y

    diff --git a/tests/MMD6Tests/Superscript.htmlc b/tests/MMD6Tests/Superscript.htmlc new file mode 100644 index 0000000..81a1a55 --- /dev/null +++ b/tests/MMD6Tests/Superscript.htmlc @@ -0,0 +1,27 @@ +

    x^2.

    + +

    x^2xz.

    + +

    x^2.^

    + +

    x^2 3^

    + +

    x^2 3^2

    + +

    5

    + +

    x~z.

    + +

    x~xyz.

    + +

    z~z.~

    + +

    ~/Library/MultiMarkdown

    + +

    ^test

    + +

    10

    + +

    x^y

    + +

    x~y

    diff --git a/tests/MMD6Tests/Superscript.text b/tests/MMD6Tests/Superscript.text new file mode 100644 index 0000000..2e521a7 --- /dev/null +++ b/tests/MMD6Tests/Superscript.text @@ -0,0 +1,27 @@ +x^2. + +x^2xz. + +x^2.^ + +x^2 3^ + +x^2 3^2 + +5 + +x~z. + +x~xyz. + +z~z.~ + +~/Library/MultiMarkdown + +^test + +10 + +x\^y + +x\~y diff --git a/tests/MarkdownTest.pl b/tests/MarkdownTest.pl new file mode 100755 index 0000000..9a521c1 --- /dev/null +++ b/tests/MarkdownTest.pl @@ -0,0 +1,184 @@ +#!/usr/bin/perl + +# +# MarkdownTester -- Run tests for Markdown implementations +# +# Copyright (c) 2004-2005 John Gruber +# +# + +use strict; +use warnings; +use Getopt::Long; +use Benchmark; + +our $VERSION = '1.0.2'; +# Sat 24 Dec 2005 + +my $time_start = new Benchmark; +my $test_dir = "Tests"; +my $script = "./Markdown.pl"; +my $use_tidy = 0; +my ($flag_version); +my $flags = ""; +my $file_ext = "html"; +my $trail = ""; + +GetOptions ( + "script=s" => \$script, + "testdir=s" => \$test_dir, + "tidy" => \$use_tidy, + "version" => \$flag_version, + "flags=s" => \$flags, + "ext=s" => \$file_ext, + "trailflags=s" => \$trail, + ); + +if($flag_version) { + my $progname = $0; + $progname =~ s{.*/}{}; + die "$progname version $VERSION\n"; +} + +unless (-d $test_dir) { die "'$test_dir' is not a directory.\n"; } +unless (-f $script) { die "$script does not exist.\n"; } +unless (-x $script) { die "$script is not executable.\n"; } + +my $tests_passed = 0; +my $tests_failed = 0; + +TEST: +foreach my $testfile (glob "$test_dir/*.text") { + my $testname = $testfile; + $testname =~ s{.*/(.+)\.text$}{$1}i; + print "$testname ... "; + + # Look for a corresponding .html file for each .text file: + my $resultfile = $testfile; + $resultfile =~ s{\.text$}{\.$file_ext}i; + unless (-f $resultfile) { + print "'$resultfile' does not exist.\n\n"; + $tests_failed++; + next TEST; + } + + # open(TEST, $testfile) || die("Can't open testfile: $!"); + open(RESULT, $resultfile) || die("Can't open resultfile: $!"); + undef $/; + # my $t_input = ; + my $t_result = ; + + my $t_output = `'$script' $flags '$testfile' $trail`; + + # Normalize the output and expected result strings: + $t_result =~ s/\s+\z//; # trim trailing whitespace + $t_output =~ s/\s+\z//; # trim trailing whitespace + if ($use_tidy) { + # Escape the strings, pass them through to CLI tidy tool for tag-level equivalency + $t_result =~ s{'}{'\\''}g; # escape ' chars for shell + $t_output =~ s{'}{'\\''}g; + $t_result = `echo '$t_result' | tidy --show-body-only 1 --quiet 1 --show-warnings 0`; + $t_output = `echo '$t_output' | tidy --show-body-only 1 --quiet 1 --show-warnings 0`; + } + + if ($t_output eq $t_result) { + print "OK\n"; + $tests_passed++; + } + else { + print "FAILED\n\n"; +# This part added by JM to print diffs + open(OUT, '>tmp1') or die $!; + print OUT $t_output or die $!; + open(RES, '>tmp2') or die $!; + print RES $t_result or die $!; + print `diff tmp1 tmp2`; + close RES; + close OUT; + print "\n"; + `rm tmp?`; +# End of added part + $tests_failed++; + } +} + +print "\n\n"; +print "$tests_passed passed; $tests_failed failed.\n"; + +my $time_end = new Benchmark; +my $time_diff = timediff($time_end, $time_start); +print "Benchmark: ", timestr($time_diff), "\n"; + +exit($tests_failed); + +__END__ + +=pod + +=head1 NAME + +B + + +=head1 SYNOPSIS + +B [ B<--options> ] [ I ... ] + + +=head1 DESCRIPTION + + +=head1 OPTIONS + +Use "--" to end switch parsing. For example, to open a file named "-z", use: + + MarkdownTest.pl -- -z + +=over 4 + +=item B<--script> + +Specify the path to the Markdown script to test. Defaults to +"./Markdown.pl". Example: + + ./MarkdownTest.pl --script ./PHP-Markdown/php-markdown + +=item B<--testdir> + +Specify the path to a directory containing test data. Defaults to "Tests". + +=item B<--tidy> + +Flag to turn on using the command line 'tidy' tool to normalize HTML +output before comparing script output to the expected test result. +Assumes that the 'tidy' command is available in your PATH. Defaults to +off. + +=back + + + +=head1 BUGS + + + +=head1 VERSION HISTORY + +1.0 Mon 13 Dec 2004-2005 + +1.0.1 Mon 19 Sep 2005 + + + Better handling of case when foo.text exists, but foo.html doesn't. + It now prints a message and moves on, rather than dying. + + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2004-2005 John Gruber + +All rights reserved. + +This is free software; you may redistribute it and/or modify it under +the same terms as Perl itself. + +=cut