From: Fletcher T. Penney
Date: Fri, 10 Mar 2017 19:49:21 +0000 (-0500)
Subject: FIXED: Improve glossaries and abbreviations; Update QuickStart
X-Git-Tag: 6.0.0-b2~1^2~10
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=74ad98ebc91d9dcbbe1ed082ee3ab4afe6883315;p=multimarkdown
FIXED: Improve glossaries and abbreviations; Update QuickStart
---
diff --git a/QuickStart.fodt b/QuickStart.fodt
index 6f24316..37ec55a 100644
--- a/QuickStart.fodt
+++ b/QuickStart.fodt
@@ -1,545 +1,662 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+ MultiMarkdown v6 Quick Start Guide
+
+
+2017-03-10T14:48:35.565769000Fletcher PenneyPT4S1LibreOffice/5.0.3.2$MacOSX_X86_64 LibreOffice_project/e5f16313668ac592c1bfb310f4390624e3dbfb75Fletcher T. Penney6.0-b
+
+
+ 0
+ 0
+ 16298
+ 17762
+ true
+ false
+
+
+ view2
+ 2501
+ 2925
+ 0
+ 0
+ 16297
+ 17761
+ 0
+ 0
+ false
+ 80
+ false
+
+
+
+
+ true
+ true
+ 0
+ true
+ true
+ false
+ true
+
+ false
+ false
+ false
+ false
+ false
+ false
+ false
+ false
+ false
+ false
+ false
+ false
+ true
+ true
+ false
+ false
+ true
+
+ false
+ false
+ false
+
+
+ false
+ false
+ 0
+ false
+ false
+ false
+ true
+ 1
+ false
+ 2066055
+ true
+ false
+ false
+ true
+ false
+
+ true
+ true
+ false
+ false
+ false
+ false
+
+ false
+ high-resolution
+ false
+ false
+ true
+ false
+ true
+ true
+
+ 2066055
+ true
+ false
+ false
+ false
+ false
+ false
+ false
+ true
+ false
+ true
+ false
+ false
+ true
+ 0
+ false
+ true
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
- Bibliography
-
+
+
+
+ Bibliography
+
+
+
-
- MultiMarkdown v6 Quick Start Guide
- Fletcher T. Penney
- 6.0-b
-
-
-
-
-
-Table of Contents
-
-
-
-Table of Contents
-
-Introduction 1
-Performance 1
-Parse Tree 1
-Features 1
-Abbreviations (Or Acronyms) 1
-Citations 1
-CriticMarkup 1
-Emph and Strong 1
-Fenced Code Blocks 1
-Glossary Terms 1
-Internationalization 1
-Metadata 1
-Table of Contents 1
-Future Steps 1
-
-
-
-
-Introduction
-
-Version: 6.0-b
-
-This document serves as a description of MultiMarkdown (MMD) v6, as well as a sample
-document to demonstrate the various features. Specifically, differences from
-MMD v5 will be pointed out.
-
-Performance
-
-A big motivating factor leading to the development of MMD v6 was
-performance. When MMD first migrated from Perl to C (based on peg-
-markdown), it was among the fastest
-Markdown parsers available. That was many years ago, and the “competition”
-has made a great deal of progress since that time.
-
-When developing MMD v6, one of my goals was to keep MMD at least in the
-ballpark of the fastest processors. Of course, being the fastest would be
-fantastic, but I was more concerned with ensuring that the code was easily
-understood, and easily updated with new features in the future.
-
-MMD v3 – v5 used a PEGParsing Expression Grammar https://en.wikipedia.org/wiki/Parsing_expression_grammar to handle the parsing. This made it easy to
-understand the relationship between the MMD grammar and the parsing code,
-since they were one and the same. However, the parsing code generated by
-the parsers was not particularly fast, and was prone to troublesome edge
-cases with terrible performance characteristics.
-
-The first step in MMD v6 parsing is to break the source text into a series
-of tokens, which may consist of plain text, whitespace, or special characters
-such as ‘*’, ‘[’, etc. This chain of tokens is then used to perform the
-actual parsing.
-
-MMD v6 divides the parsing into two separate phases, which actually fits
-more with Markdown’s design philosophically.
-
-
-
-Block parsing consists of identifying the “type” of each line of the
-source text, and grouping the lines into blocks (e.g. paragraphs, lists,
-blockquotes, etc.) Some blocks are a single line (e.g. ATX headers), and
-others can be many lines long. The block parsing in MMD v6 is handled
-by a parser generated by lemon. This
-parser allows the block structure to be more readily understood by
-non-programmers, but the generated parser is still fast.
-
-
-Span parsing consists of identifying Markdown/MMD structures that occur
-inside of blocks, such as links, images, strong, emph, etc. Most of these
-structures require matching pairs of tokens to specify where the span starts
-and where it ends. Most of these spans allow arbitrary levels of nesting as
-well. This made parsing them correctly in the PEG-based code difficult and
-slow. MMD v6 uses a different approach that is accurate and has good
-performance characteristics even with edge cases. Basically, it keeps a stack
-of each “opening” token as it steps through the token chain. When a “closing”
-token is found, it is paired with the most recent appropriate opener on the
-stack. Any tokens in between the opener and closer are removed, as they are
-not able to be matched any more. To avoid unnecessary searches for non-
-existent openers, the parser keeps track of which opening tokens have been
-discovered. This allows the parser to continue moving forwards without having
-to go backwards and re-parse any previously visited tokens.
-
-
-
-The result of this redesigned MMD parser is that it can parse short
-documents more quickly than CommonMark, and takes
-only 15% – 20% longer to parse long documents. I have not delved too deeply
-into this, but I presume that CommonMark has a bit more “set-up” time that
-becomes expensive when parsing a short document (e.g. a paragraph or two). But
-this cost becomes negligible when parsing longer documents (e.g. file sizes of
-1 MB). So depending on your use case, CommonMark may well be faster than
-MMD, but we’re talking about splitting hairs here…. Recent comparisons
-show MMD v6 taking approximately 4.37 seconds to parse a 108 MB file
-(approximately 24.8 MB/second), and CommonMark took 3.72 seconds for the same
-file (29.2 MB/second). For comparison, MMD v5.4 took approximately 94
-second for the same file (1.15 MB/second).
-
-For a more realistic file of approx 28 kb (the source of the Markdown Syntax
-web page), both MMD and CommonMark parse it too quickly to accurately
-measure. In fact, it requires a file consisting of the original file copied
-32 times over (0.85 MB) before /usr/bin/env time reports a time over the
-minimum threshold of 0.01 seconds for either program.
-
-There is still potentially room for additional optimization in MMD.
-However, even if I can’t close the performance gap with CommonMark on longer
-files, the additional features of MMD compared with Markdown in addition to
-the increased legibility of the source code of MMD (in my biased opinion
-anyway) make this project worthwhile.
-
-Parse Tree
-
-MMD v6 performs its parsing in the following steps:
-
-
-
-Start with a null-terminated string of source text (C style string)
-
-
-Lex string into token chain
-
-
-Parse token chain into blocks
-
-
-Parse tokens within each block into span level structures (e.g. strong,
-emph, etc.)
-
-
-Export the token tree into the desired output format (e.g. HTML, LaTeX,
-etc.) and return the resulting C style string
-
-OR
-
-
-Use the resulting token tree for your own purposes.
-
-
-
-The token tree (ASTAbstract Syntax Tree https://en.wikipedia.org/wiki/Abstract_syntax_tree) includes starting offsets and length of each token,
-allowing you to use MMD as part of a syntax highlighter. MMD v5 did not
-have this functionality in the public version, in part because the PEG parsers
-used did not provide reliable offset positions, requiring a great deal of
-effort when I adapted MMD for use in MultiMarkdown
-Composer.
-
-These steps are managed using the mmd_engine “object”. An individual
-mmd_engine cannot be used by multiple threads simultaneously, so if
-libMultiMarkdown is to be used in a multithreaded program, a separate
-mmd_engine should be created for each thread. Alternatively, just use the
-slightly more abstracted mmd_convert_string() function that handles creating
-and destroying the mmd_engine automatically.
-
-Features
-
-Abbreviations (Or Acronyms)
-
-This file includes the use of MMD as an abbreviation for MultiMarkdown. The
-abbreviation will be expanded on the first use, and the shortened form will be
-used on subsequent occurrences.
-
-Abbreviations can be specified using inline or reference syntax. The inline
-variant requires that the abbreviation be wrapped in parentheses and
-immediately follows the >.
-
-[>MMD] is an abbreviation. So is [>(MD) Markdown].[>MMD]: MultiMarkdown
-
-Citations
-
-Citations can be specified using an inline syntax, just like inline footnotes.
-
-CriticMarkup
-
-MMD v6 has improved support for CriticMarkup, both in terms of parsing, and
-in terms of support for each output format. You can insert text,
-delete text, substitute one thingfor another, highlight text,
-and leave comments in the text.
-
-Emph and Strong
-
-The basics of emphasis and strong emphasis are unchanged, but the parsing
-engine has been improved to be more accurate, particularly in various edge
-cases where proper parsing can be difficult.
-
-Fenced Code Blocks
-
-Fenced code blocks are fundamentally the same as MMD v5, except:
-
-
-
-The leading and trailing fences can be 3, 4, or 5 backticks in length. That
-should be sufficient to account for complex documents without requiring a more
-complex parser.
-
-
-If there is no trailing fence, then everything after the leading fence is
-considered to be part of the code block.
-
-
-
-Glossary Terms
-
-If there are terms in your document you wish to define in a glossaryThe
-glossary collects information about important terms used in your document at
-the end of your document, you can define them using the glossary syntax.
-
-Glossary terms can be specified using inline or reference syntax. The inline
-variant requires that the abbreviation be wrapped in parentheses and
-immediately follows the ?.
-
-[?(glossary) The glossary collects information about importantterms used in your document] is a glossary term.[?glossary] is also a glossary term.[?glossary]: The glossary collects information about importantterms used in your document
-
-Internationalization
-
-MMD v6 includes support for substituting certain text phrases in other
-languages. This only affects the HTML format.
-
-Metadata
-
-Metadata in MMD v6 includes new support for LaTeX – the latex config key
-allows you to automatically setup of multiple latex include files at once.
-The default setups that I use would typically consist of one LaTeX file to be
-included at the top of the file, one to be included right at the beginning of
-the document, and one to be included at the end of the document. If you want
-to specify the latex files separately, you can use latex leader, latexbegin, and latex footer.
-
-Table of Contents
-
-By placing {{TOC}} in your document, you can insert an automatically
-generated Table of Contents in your document. As of MMD v6, the native
-Table of Contents functionality is used when exporting to LaTeX or
-OpenDocument formats.
-
-Future Steps
-
-Some features I plan to implement at some point:
-
-
-
-MMD v5 used to automatically identify abbreviated terms throughout the
-document and substitute them automatically. I plan to reimplement this
-functionality, but will probably improve upon it to include glossary terms,
-and possibly even support for indexing documents in LaTeX (and possibly
-OpenOffice).
-
-
-OPML export support is not available in v6. I plan on adding improved
-support for this at some point. I was hoping to be able to re-use the
-existing v6 parser but it might be simpler to use the approach from v5 and
-earlier, which was to have a separate parser tuned to only identify headers
-and “stuff between headers”.
-
-
-Improved EPUB support. Currently, EPUB support is provided by a separate
-tool. At some point, I would like to
-better integrate this into MMD itself.
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+ Table of Contents
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Table of Contents
+
+ Introduction1
+ Performance1
+ Parse Tree3
+ Features3
+ Abbreviations (Or Acronyms)3
+ Citations4
+ CriticMarkup4
+ Emph and Strong4
+ Fenced Code Blocks4
+ Glossary Terms4
+ Internationalization4
+ Metadata5
+ Table of Contents5
+ Future Steps5
+
+
+ Introduction
+ Version: 6.0-b
+ This document serves as a description of MultiMarkdown (MMD) v6, as well as a sample document to demonstrate the various features. Specifically, differences from MMD v5 will be pointed out.
+ Performance
+ A big motivating factor leading to the development of MMD v6 was performance. When MMD first migrated from Perl to C (based on peg- markdown), it was among the fastest Markdown parsers available. That was many years ago, and the âcompetitionâ has made a great deal of progress since that time.
+ When developing MMD v6, one of my goals was to keep MMD at least in the ballpark of the fastest processors. Of course, being the fastest would be fantastic, but I was more concerned with ensuring that the code was easily understood, and easily updated with new features in the future.
+ MMD v3 â v5 used a PEGa
+ Parsing Expression Grammar https://en.wikipedia.org/wiki/Parsing_expression_grammar to handle the parsing. This made it easy to understand the relationship between the MMD grammar and the parsing code, since they were one and the same. However, the parsing code generated by the parsers was not particularly fast, and was prone to troublesome edge cases with terrible performance characteristics.
+ The first step in MMD v6 parsing is to break the source text into a series of tokens, which may consist of plain text, whitespace, or special characters such as â*â, â[â, etc. This chain of tokens is then used to perform the actual parsing.
+ MMD v6 divides the parsing into two separate phases, which actually fits more with Markdownâs design philosophically.
+
+
+ Block parsing consists of identifying the âtypeâ of each line of the source text, and grouping the lines into blocks (e.g. paragraphs, lists, blockquotes, etc.) Some blocks are a single line (e.g. ATX headers), and others can be many lines long. The block parsing in MMD v6 is handled by a parser generated by lemon. This parser allows the block structure to be more readily understood by non-programmers, but the generated parser is still fast.
+
+
+ Span parsing consists of identifying Markdown/MMD structures that occur inside of blocks, such as links, images, strong, emph, etc. Most of these structures require matching pairs of tokens to specify where the span starts and where it ends. Most of these spans allow arbitrary levels of nesting as well. This made parsing them correctly in the PEG-based code difficult and slow. MMD v6 uses a different approach that is accurate and has good performance characteristics even with edge cases. Basically, it keeps a stack of each âopeningâ token as it steps through the token chain. When a âclosingâ token is found, it is paired with the most recent appropriate opener on the stack. Any tokens in between the opener and closer are removed, as they are not able to be matched any more. To avoid unnecessary searches for non- existent openers, the parser keeps track of which opening tokens have been discovered. This allows the parser to continue moving forwards without having to go backwards and re-parse any previously visited tokens.
+
+
+ The result of this redesigned MMD parser is that it can parse short documents more quickly than CommonMark, and takes only 15% â 20% longer to parse long documents. I have not delved too deeply into this, but I presume that CommonMark has a bit more âset-upâ time that becomes expensive when parsing a short document (e.g. a paragraph or two). But this cost becomes negligible when parsing longer documents (e.g. file sizes of 1 MB). So depending on your use case, CommonMark may well be faster than MMD, but weâre talking about splitting hairs hereâ¦. Recent comparisons show MMD v6 taking approximately 4.37 seconds to parse a 108 MB file (approximately 24.8 MB/second), and CommonMark took 3.72 seconds for the same file (29.2 MB/second). For comparison, MMD v5.4 took approximately 94 second for the same file (1.15 MB/second).
+ For a more realistic file of approx 28 kb (the source of the Markdown Syntax web page), both MMD and CommonMark parse it too quickly to accurately measure. In fact, it requires a file consisting of the original file copied 32 times over (0.85 MB) before /usr/bin/env time reports a time over the minimum threshold of 0.01 seconds for either program.
+ There is still potentially room for additional optimization in MMD. However, even if I canât close the performance gap with CommonMark on longer files, the additional features of MMD compared with Markdown in addition to the increased legibility of the source code of MMD (in my biased opinion anyway) make this project worthwhile.
+ Parse Tree
+ MMD v6 performs its parsing in the following steps:
+
+
+ Start with a null-terminated string of source text (C style string)
+
+
+ Lex string into token chain
+
+
+ Parse token chain into blocks
+
+
+ Parse tokens within each block into span level structures (e.g. strong, emph, etc.)
+
+
+ Export the token tree into the desired output format (e.g. HTML, LaTeX, etc.) and return the resulting C style string
+ OR
+
+
+ Use the resulting token tree for your own purposes.
+
+
+ The token tree (ASTa
+ Abstract Syntax Tree https://en.wikipedia.org/wiki/Abstract_syntax_tree) includes starting offsets and length of each token, allowing you to use MMD as part of a syntax highlighter. MMD v5 did not have this functionality in the public version, in part because the PEG parsers used did not provide reliable offset positions, requiring a great deal of effort when I adapted MMD for use in MultiMarkdown Composer.
+ These steps are managed using the mmd_engine âobjectâ. An individual mmd_engine cannot be used by multiple threads simultaneously, so if libMultiMarkdown is to be used in a multithreaded program, a separate mmd_engine should be created for each thread. Alternatively, just use the slightly more abstracted mmd_convert_string() function that handles creating and destroying the mmd_engine automatically.
+ Features
+ Abbreviations (Or Acronyms)
+ This file includes the use of MMD as an abbreviation for MultiMarkdown. The abbreviation will be expanded on the first use, and the shortened form will be used on subsequent occurrences.
+ Abbreviations can be specified using inline or reference syntax. The inline variant requires that the abbreviation be wrapped in parentheses and immediately follows the >.
+ [>MMD] is an abbreviation. So is [>(MD) Markdown].[>MMD]: MultiMarkdown
+ There is also a âshortcutâ method for abbreviations that is similar to the approach used in prior versions of MMD. You specify the definition for the abbreviation in the usual manner, but MMD will automatically identify each instance where the abbreviation is used and substitute it automatically. In this case, the abbreviation is limited to a more basic character set which includes letters, numbers, periods, and hyphens, but not much else. For more complex abbreviations, you must explicitly mark uses of the abbreviation.
+ Citations
+ Citations can be specified using an inline syntax, just like inline footnotes.
+ CriticMarkup
+ MMD v6 has improved support for CriticMarkup, both in terms of parsing, and in terms of support for each output format. You can insert text, delete text, substitute one thingfor another, highlight text, and leave comments in the text.
+ Emph and Strong
+ The basics of emphasis and strong emphasis are unchanged, but the parsing engine has been improved to be more accurate, particularly in various edge cases where proper parsing can be difficult.
+ Fenced Code Blocks
+ Fenced code blocks are fundamentally the same as MMD v5, except:
+
+
+ The leading and trailing fences can be 3, 4, or 5 backticks in length. That should be sufficient to account for complex documents without requiring a more complex parser.
+
+
+ If there is no trailing fence, then everything after the leading fence is considered to be part of the code block.
+
+
+ Glossary Terms
+ If there are terms in your document you wish to define in a glossarya
+ The glossary collects information about important terms used in your document at the end of your document, you can define them using the glossary syntax.
+ Glossary terms can be specified using inline or reference syntax. The inline variant requires that the abbreviation be wrapped in parentheses and immediately follows the ?.
+ [?(glossary) The glossary collects information about importantterms used in your document] is a glossary term.[?glossary] is also a glossary term.[?glossary]: The glossary collects information about importantterms used in your document
+ Much like abbreviations, there is also a âshortcutâ method that is similar to the approach used in prior versions of MMD. You specify the definition for the glossary term in the usual manner, but MMD will automatically identify each instance where the term is used and substitute it automatically. In this case, the term is limited to a more basic character set which includes letters, numbers, periods, and hyphens, but not much else. For more complex glossary terms, you must explicitly mark uses of the term.
+ Internationalization
+ MMD v6 includes support for substituting certain text phrases in other languages. This only affects the HTML format.
+ Metadata
+ Metadata in MMD v6 includes new support for LaTeX â the latex config key allows you to automatically setup of multiple latex include files at once. The default setups that I use would typically consist of one LaTeX file to be included at the top of the file, one to be included right at the beginning of the document, and one to be included at the end of the document. If you want to specify the latex files separately, you can use latex leader, latexbegin, and latex footer.
+ Table of Contents
+ By placing {{TOC}} in your document, you can insert an automatically generated Table of Contents in your document. As of MMD v6, the native Table of Contents functionality is used when exporting to LaTeX or OpenDocument formats.
+ Future Steps
+ Some features I plan to implement at some point:
+
+
+ MMD v5 used to automatically identify abbreviated terms throughout the document and substitute them automatically. I plan to reimplement this functionality, but will probably improve upon it to include glossary terms, and possibly even support for indexing documents in LaTeX (and possibly OpenOffice).
+
+
+ OPML export support is not available in v6. I plan on adding improved support for this at some point. I was hoping to be able to re-use the existing v6 parser but it might be simpler to use the approach from v5 and earlier, which was to have a separate parser tuned to only identify headers and âstuff between headersâ.
+
+
+ Improved EPUB support. Currently, EPUB support is provided by a separate tool. At some point, I would like to better integrate this into MMD itself.
+
+
+
+
+
\ No newline at end of file
diff --git a/QuickStart.html b/QuickStart.html
index 7fa3ff9..ebb2b72 100644
--- a/QuickStart.html
+++ b/QuickStart.html
@@ -78,7 +78,7 @@ non-programmers, but the generated parser is still fast.
inside of blocks, such as links, images, strong, emph, etc. Most of these
structures require matching pairs of tokens to specify where the span starts
and where it ends. Most of these spans allow arbitrary levels of nesting as
-well. This made parsing them correctly in the PEG-based code difficult and
+well. This made parsing them correctly in the PEG-based code difficult and
slow. MMD v6 uses a different approach that is accurate and has good
performance characteristics even with edge cases. Basically, it keeps a stack
of each “opening” token as it steps through the token chain. When a “closing”
@@ -134,9 +134,9 @@ etc.) and return the resulting C style string
The token tree (AST) includes starting offsets and length of each token,
allowing you to use MMD as part of a syntax highlighter. MMD v5 did not
-have this functionality in the public version, in part because the PEG parsers
+have this functionality in the public version, in part because the PEG parsers
used did not provide reliable offset positions, requiring a great deal of
-effort when I adapted MMD for use in MultiMarkdown
+effort when I adapted MMD for use in MultiMarkdown
Composer.
These steps are managed using the mmd_engine
“object”. An individual
@@ -163,6 +163,14 @@ immediately follows the >
.
[>MMD]: MultiMarkdown
+There is also a “shortcut” method for abbreviations that is similar to the
+approach used in prior versions of MMD. You specify the definition for the
+abbreviation in the usual manner, but MMD will automatically identify each
+instance where the abbreviation is used and substitute it automatically. In
+this case, the abbreviation is limited to a more basic character set which
+includes letters, numbers, periods, and hyphens, but not much else. For more
+complex abbreviations, you must explicitly mark uses of the abbreviation.
+
Citations
Citations can be specified using an inline syntax, just like inline footnotes.
@@ -210,6 +218,14 @@ terms used in your document] is a glossary term.
terms used in your document
+Much like abbreviations, there is also a “shortcut” method that is similar to
+the approach used in prior versions of MMD. You specify the definition for
+the glossary term in the usual manner, but MMD will automatically identify
+each instance where the term is used and substitute it automatically. In this
+case, the term is limited to a more basic character set which includes
+letters, numbers, periods, and hyphens, but not much else. For more complex
+glossary terms, you must explicitly mark uses of the term.
+
Internationalization
MMD v6 includes support for substituting certain text phrases in other
diff --git a/QuickStart.pdf b/QuickStart.pdf
index eda4faa..ef6aa03 100644
Binary files a/QuickStart.pdf and b/QuickStart.pdf differ
diff --git a/QuickStart.txt b/QuickStart.txt
index 4818a01..dae3a65 100644
--- a/QuickStart.txt
+++ b/QuickStart.txt
@@ -12,53 +12,53 @@ Base Header Level: 3
Version: [%version]
-This document serves as a description of [>MMD] v6, as well as a sample
+This document serves as a description of MMD v6, as well as a sample
document to demonstrate the various features. Specifically, differences from
-[>MMD] v5 will be pointed out.
+MMD v5 will be pointed out.
# Performance #
-A big motivating factor leading to the development of [>MMD] v6 was
-performance. When [>MMD] first migrated from Perl to C (based on [peg-
+A big motivating factor leading to the development of MMD v6 was
+performance. When MMD first migrated from Perl to C (based on [peg-
markdown](https://github.com/jgm/peg-markdown)), it was among the fastest
Markdown parsers available. That was many years ago, and the "competition"
has made a great deal of progress since that time.
-When developing [>MMD] v6, one of my goals was to keep [>MMD] at least in the
+When developing MMD v6, one of my goals was to keep MMD at least in the
ballpark of the fastest processors. Of course, being *the* fastest would be
fantastic, but I was more concerned with ensuring that the code was easily
understood, and easily updated with new features in the future.
-[>MMD] v3 -- v5 used a [?PEG] to handle the parsing. This made it easy to
-understand the relationship between the [>MMD] grammar and the parsing code,
+MMD v3 -- v5 used a PEG to handle the parsing. This made it easy to
+understand the relationship between the MMD grammar and the parsing code,
since they were one and the same. However, the parsing code generated by
the parsers was not particularly fast, and was prone to troublesome edge
cases with terrible performance characteristics.
-The first step in [>MMD] v6 parsing is to break the source text into a series
+The first step in MMD v6 parsing is to break the source text into a series
of tokens, which may consist of plain text, whitespace, or special characters
such as '*', '[', etc. This chain of tokens is then used to perform the
actual parsing.
-[>MMD] v6 divides the parsing into two separate phases, which actually fits
+MMD v6 divides the parsing into two separate phases, which actually fits
more with Markdown's design philosophically.
1. Block parsing consists of identifying the "type" of each line of the
source text, and grouping the lines into blocks (e.g. paragraphs, lists,
blockquotes, etc.) Some blocks are a single line (e.g. ATX headers), and
-others can be many lines long. The block parsing in [>MMD] v6 is handled
+others can be many lines long. The block parsing in MMD v6 is handled
by a parser generated by [lemon](http://www.hwaci.com/sw/lemon/). This
parser allows the block structure to be more readily understood by
non-programmers, but the generated parser is still fast.
-2. Span parsing consists of identifying Markdown/[>MMD] structures that occur
+2. Span parsing consists of identifying Markdown/MMD structures that occur
inside of blocks, such as links, images, strong, emph, etc. Most of these
structures require matching pairs of tokens to specify where the span starts
and where it ends. Most of these spans allow arbitrary levels of nesting as
well. This made parsing them correctly in the PEG-based code difficult and
-slow. [>MMD] v6 uses a different approach that is accurate and has good
+slow. MMD v6 uses a different approach that is accurate and has good
performance characteristics even with edge cases. Basically, it keeps a stack
of each "opening" token as it steps through the token chain. When a "closing"
token is found, it is paired with the most recent appropriate opener on the
@@ -68,35 +68,35 @@ existent openers, the parser keeps track of which opening tokens have been
discovered. This allows the parser to continue moving forwards without having
to go backwards and re-parse any previously visited tokens.
-The result of this redesigned [>MMD] parser is that it can parse short
+The result of this redesigned MMD parser is that it can parse short
documents more quickly than [CommonMark](http://commonmark.org/), and takes
only 15% -- 20% longer to parse long documents. I have not delved too deeply
into this, but I presume that CommonMark has a bit more "set-up" time that
becomes expensive when parsing a short document (e.g. a paragraph or two). But
this cost becomes negligible when parsing longer documents (e.g. file sizes of
1 MB). So depending on your use case, CommonMark may well be faster than
-[>MMD], but we're talking about splitting hairs here.... Recent comparisons
-show [>MMD] v6 taking approximately 4.37 seconds to parse a 108 MB file
+MMD, but we're talking about splitting hairs here.... Recent comparisons
+show MMD v6 taking approximately 4.37 seconds to parse a 108 MB file
(approximately 24.8 MB/second), and CommonMark took 3.72 seconds for the same
-file (29.2 MB/second). For comparison, [>MMD] v5.4 took approximately 94
+file (29.2 MB/second). For comparison, MMD v5.4 took approximately 94
second for the same file (1.15 MB/second).
For a more realistic file of approx 28 kb (the source of the Markdown Syntax
-web page), both [>MMD] and CommonMark parse it too quickly to accurately
+web page), both MMD and CommonMark parse it too quickly to accurately
measure. In fact, it requires a file consisting of the original file copied
32 times over (0.85 MB) before `/usr/bin/env time` reports a time over the
minimum threshold of 0.01 seconds for either program.
-There is still potentially room for additional optimization in [>MMD].
+There is still potentially room for additional optimization in MMD.
However, even if I can't close the performance gap with CommonMark on longer
-files, the additional features of [>MMD] compared with Markdown in addition to
-the increased legibility of the source code of [>MMD] (in my biased opinion
+files, the additional features of MMD compared with Markdown in addition to
+the increased legibility of the source code of MMD (in my biased opinion
anyway) make this project worthwhile.
# Parse Tree #
-[>MMD] v6 performs its parsing in the following steps:
+MMD v6 performs its parsing in the following steps:
1. Start with a null-terminated string of source text (C style string)
@@ -115,7 +115,7 @@ etc.) and return the resulting C style string
6. Use the resulting token tree for your own purposes.
The token tree ([?AST]) includes starting offsets and length of each token,
-allowing you to use [>MMD] as part of a syntax highlighter. [>MMD] v5 did not
+allowing you to use MMD as part of a syntax highlighter. MMD v5 did not
have this functionality in the public version, in part because the PEG parsers
used did not provide reliable offset positions, requiring a great deal of
effort when I adapted MMD for use in [MultiMarkdown
@@ -133,7 +133,7 @@ and destroying the `mmd_engine` automatically.
## Abbreviations (Or Acronyms) ##
-This file includes the use of [>MMD] as an abbreviation for MultiMarkdown. The
+This file includes the use of MMD as an abbreviation for MultiMarkdown. The
abbreviation will be expanded on the first use, and the shortened form will be
used on subsequent occurrences.
@@ -145,6 +145,14 @@ immediately follows the `>`.
[>MMD]: MultiMarkdown
+There is also a "shortcut" method for abbreviations that is similar to the
+approach used in prior versions of MMD. You specify the definition for the
+abbreviation in the usual manner, but MMD will automatically identify each
+instance where the abbreviation is used and substitute it automatically. In
+this case, the abbreviation is limited to a more basic character set which
+includes letters, numbers, periods, and hyphens, but not much else. For more
+complex abbreviations, you must explicitly mark uses of the abbreviation.
+
## Citations ##
@@ -153,7 +161,7 @@ Citations can be specified using an inline syntax, just like inline footnotes.
## CriticMarkup ##
-[>MMD] v6 has improved support for [CriticMarkup], both in terms of parsing, and
+MMD v6 has improved support for [CriticMarkup], both in terms of parsing, and
in terms of support for each output format. You can {++insert text++},
{--delete text--}, substitute {~~one thing~>for another~~}, {==highlight text==},
and {>>leave comments<<} in the text.
@@ -168,7 +176,7 @@ cases where proper parsing can be difficult.
## Fenced Code Blocks ##
-Fenced code blocks are fundamentally the same as [>MMD] v5, except:
+Fenced code blocks are fundamentally the same as MMD v5, except:
1. The leading and trailing fences can be 3, 4, or 5 backticks in length. That
should be sufficient to account for complex documents without requiring a more
@@ -196,16 +204,24 @@ immediately follows the `?`.
[?glossary]: The glossary collects information about important
terms used in your document
+Much like abbreviations, there is also a "shortcut" method that is similar to
+the approach used in prior versions of MMD. You specify the definition for
+the glossary term in the usual manner, but MMD will automatically identify
+each instance where the term is used and substitute it automatically. In this
+case, the term is limited to a more basic character set which includes
+letters, numbers, periods, and hyphens, but not much else. For more complex
+glossary terms, you must explicitly mark uses of the term.
+
## Internationalization ##
-[>MMD] v6 includes support for substituting certain text phrases in other
+MMD v6 includes support for substituting certain text phrases in other
languages. This only affects the HTML format.
## Metadata ##
-Metadata in [>MMD] v6 includes new support for LaTeX -- the `latex config` key
+Metadata in MMD v6 includes new support for LaTeX -- the `latex config` key
allows you to automatically setup of multiple `latex include` files at once.
The default setups that I use would typically consist of one LaTeX file to be
included at the top of the file, one to be included right at the beginning of
@@ -217,7 +233,7 @@ begin`, and `latex footer`.
## Table of Contents ##
By placing `{{TOC}}` in your document, you can insert an automatically
-generated Table of Contents in your document. As of [>MMD] v6, the native
+generated Table of Contents in your document. As of MMD v6, the native
Table of Contents functionality is used when exporting to LaTeX or
OpenDocument formats.
@@ -226,7 +242,7 @@ OpenDocument formats.
Some features I plan to implement at some point:
-1. [>MMD] v5 used to automatically identify abbreviated terms throughout the
+1. MMD v5 used to automatically identify abbreviated terms throughout the
document and substitute them automatically. I plan to reimplement this
functionality, but will probably improve upon it to include glossary terms,
and possibly even support for indexing documents in LaTeX (and possibly
@@ -240,7 +256,7 @@ and "stuff between headers".
3. Improved EPUB support. Currently, EPUB support is provided by a separate
[tool](https://github.com/fletcher/MMD-ePub). At some point, I would like to
-better integrate this into [>MMD] itself.
+better integrate this into MMD itself.
diff --git a/Sources/libMultiMarkdown/html.c b/Sources/libMultiMarkdown/html.c
index a1f836e..de07093 100644
--- a/Sources/libMultiMarkdown/html.c
+++ b/Sources/libMultiMarkdown/html.c
@@ -1295,7 +1295,12 @@ void mmd_export_token_html(DString * out, const char * source, token * t, scratc
if (temp_short == -1) {
// This instance is not properly formed
print_const("[?");
- mmd_export_token_tree_html(out, source, t->child->next, scratch);
+
+ if (t->child)
+ mmd_export_token_tree_html(out, source, t->child->next, scratch);
+ else
+ print_token(t);
+
print_const("]");
break;
}
diff --git a/Sources/libMultiMarkdown/latex.c b/Sources/libMultiMarkdown/latex.c
index f59fdab..3161fcb 100644
--- a/Sources/libMultiMarkdown/latex.c
+++ b/Sources/libMultiMarkdown/latex.c
@@ -1219,7 +1219,12 @@ void mmd_export_token_latex(DString * out, const char * source, token * t, scrat
if (temp_short == -1) {
// This instance is not properly formed
print_const("[?");
- mmd_export_token_tree_latex(out, source, t->child->next, scratch);
+
+ if (t->child)
+ mmd_export_token_tree_latex(out, source, t->child->next, scratch);
+ else
+ print_token(t);
+
print_const("]");
break;
}
@@ -1231,7 +1236,7 @@ void mmd_export_token_latex(DString * out, const char * source, token * t, scrat
// This is a re-use of a previously used note
print("\\gls{");
- print(temp_note->label_text);
+ print(temp_note->clean_text);
print("}");
} else {
// This is the first time this note was used
@@ -1239,12 +1244,12 @@ void mmd_export_token_latex(DString * out, const char * source, token * t, scrat
if (temp_short3 == scratch->inline_glossaries_to_free->size) {
// This is a reference definition
print_const("\\gls{");
- print(temp_note->label_text);
+ print(temp_note->clean_text);
print_const("}");
} else {
// This is an inline definition
print_const("\\newglossaryentry{");
- print(temp_note->label_text);
+ print(temp_note->clean_text);
print_const("}{name=");
print(temp_note->clean_text);
@@ -1254,7 +1259,7 @@ void mmd_export_token_latex(DString * out, const char * source, token * t, scrat
// We skip over temp_note->content, since that is the term in use
mmd_export_token_tree_latex(out, source, temp_note->content, scratch);
print_const("}}\\gls{");
- print(temp_note->label_text);
+ print(temp_note->clean_text);
print_const("}");
}
}
@@ -1710,7 +1715,7 @@ void mmd_define_glossaries_latex(DString * out, const char * source, scratch_pad
HASH_ITER(hh, scratch->glossary_hash, f, f_tmp) {
// Add this glossary definition
print_const("\\longnewglossaryentry{");
- print(f->note->label_text);
+ print(f->note->clean_text);
print_const("}{name=");
print(f->note->clean_text);
diff --git a/Sources/libMultiMarkdown/odf.c b/Sources/libMultiMarkdown/odf.c
index 2f687a7..96e6d6f 100644
--- a/Sources/libMultiMarkdown/odf.c
+++ b/Sources/libMultiMarkdown/odf.c
@@ -1201,7 +1201,7 @@ void mmd_export_token_odf(DString * out, const char * source, token * t, scratch
print_const(")");
} else {
// This is an inline definition
- mmd_export_token_tree_odf(out, source, temp_note->content, scratch);
+ mmd_print_string_odf(out, temp_note->clean_text);
print_const(" (");
mmd_print_string_odf(out, temp_note->label_text);
print_const(")");
diff --git a/Sources/libMultiMarkdown/writer.c b/Sources/libMultiMarkdown/writer.c
index 02ed040..a1788c2 100644
--- a/Sources/libMultiMarkdown/writer.c
+++ b/Sources/libMultiMarkdown/writer.c
@@ -704,6 +704,14 @@ void store_citation(scratch_pad * scratch, footnote * f) {
void store_glossary(scratch_pad * scratch, footnote * f) {
fn_holder * temp_holder;
+ // Store by `clean_text`?
+ HASH_FIND_STR(scratch->glossary_hash, f->clean_text, temp_holder);
+
+ if (!temp_holder) {
+ temp_holder = fn_holder_new(f);
+ HASH_ADD_KEYPTR(hh, scratch->glossary_hash, f->clean_text, strlen(f->clean_text), temp_holder);
+ }
+
// Store by `label_text`?
HASH_FIND_STR(scratch->glossary_hash, f->label_text, temp_holder);
@@ -1259,6 +1267,8 @@ void process_definition_block(mmd_engine * e, token * block) {
f = footnote_new(e->dstr->str, label, block->child, false);
if (f && f->clean_text)
memmove(f->clean_text, &(f->clean_text)[1],strlen(f->clean_text));
+ //if (f && f->label_text)
+ // memmove(f->label_text, &(f->label_text)[1],strlen(f->label_text));
stack_push(e->glossary_stack, f);
break;
@@ -1560,7 +1570,7 @@ void identify_global_search_terms(mmd_engine * e, scratch_pad * scratch) {
trie_insert(ac, f->label_text, PAIR_BRACKET_ABBREVIATION);
}
- // Add glossary to search trie
+ // Add glossary to search trie (without leading '?')
for (int i = 0; i < e->glossary_stack->size; ++i)
{
f = stack_peek_index(e->glossary_stack, i);
@@ -1982,11 +1992,11 @@ void glossary_from_bracket(const char * source, scratch_pad * scratch, token * t
if (t->child) {
text = text_inside_pair(source, t);
+ memmove(text, &text[1], strlen(text));
} else {
- text = malloc(t->len + 2);
- text[0] = '?';
- memcpy(&text[1], &source[t->start], t->len);
- text[t->len + 1] = '\0';
+ text = malloc(t->len + 1);
+ memcpy(text, &source[t->start], t->len);
+ text[t->len] = '\0';
}
short glossary_id = extract_glossary_from_stack(scratch, text);
diff --git a/tests/MMD6Tests/Abbreviations.fodt b/tests/MMD6Tests/Abbreviations.fodt
index 35ccef2..03c46f0 100644
--- a/tests/MMD6Tests/Abbreviations.fodt
+++ b/tests/MMD6Tests/Abbreviations.fodt
@@ -306,6 +306,8 @@
bar
foo bar
+
+BAZ (baz)
diff --git a/tests/MMD6Tests/Abbreviations.html b/tests/MMD6Tests/Abbreviations.html
index 9f580e0..b0a1769 100644
--- a/tests/MMD6Tests/Abbreviations.html
+++ b/tests/MMD6Tests/Abbreviations.html
@@ -37,6 +37,8 @@
foo bar
+BAZ (baz)
+