--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
+ xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0"
+ xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
+ xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
+ xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
+ xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
+ xmlns:xlink="http://www.w3.org/1999/xlink"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
+ xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
+ xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
+ xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
+ xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
+ xmlns:math="http://www.w3.org/1998/Math/MathML"
+ xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0"
+ xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0"
+ xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0"
+ xmlns:ooo="http://openoffice.org/2004/office"
+ xmlns:ooow="http://openoffice.org/2004/writer"
+ xmlns:oooc="http://openoffice.org/2004/calc"
+ xmlns:dom="http://www.w3.org/2001/xml-events"
+ xmlns:xforms="http://www.w3.org/2002/xforms"
+ xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:rpt="http://openoffice.org/2005/report"
+ xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2"
+ xmlns:xhtml="http://www.w3.org/1999/xhtml"
+ xmlns:grddl="http://www.w3.org/2003/g/data-view#"
+ xmlns:tableooo="http://openoffice.org/2009/table"
+ xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
+ xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0"
+ xmlns:css3t="http://www.w3.org/TR/css3-text/"
+ office:version="1.2"
+ grddl:transformation="http://docs.oasis-open.org/office/1.2/xslt/odf2rdf.xsl"
+ office:mimetype="application/vnd.oasis.opendocument.text">
+<office:font-face-decls>
+ <style:font-face style:name="Courier New" svg:font-family="'Courier New'"
+ style:font-adornments="Regular"
+ style:font-family-generic="modern"
+ style:font-pitch="fixed"/>
+</office:font-face-decls>
+<office:styles>
+<style:style style:name="Standard" style:family="paragraph" style:class="text">
+ <style:paragraph-properties fo:margin-top="0in" fo:margin-bottom="0.15in" fo:text-align="justify" style:justify-single-word="false"/>
+ </style:style>
+<style:style style:name="Preformatted_20_Text" style:display-name="Preformatted Text"
+ style:family="paragraph"
+ style:parent-style-name="Standard"
+ style:class="html">
+ <style:paragraph-properties fo:margin-top="0in" fo:margin-bottom="0in" fo:text-align="start"
+ style:justify-single-word="false"/>
+ <style:text-properties style:font-name="Courier New" fo:font-size="11pt"
+ style:font-name-asian="Courier New"
+ style:font-size-asian="11pt"
+ style:font-name-complex="Courier New"
+ style:font-size-complex="11pt"/>
+</style:style>
+<style:style style:name="Source_20_Text" style:display-name="Source Text"
+ style:family="text">
+ <style:text-properties style:font-name="Courier New" style:font-name-asian="Courier New"
+ style:font-name-complex="Courier New"
+ fo:font-size="11pt"/>
+</style:style>
+<style:style style:name="List" style:family="paragraph"
+ style:parent-style-name="Standard"
+ style:class="list">
+ <style:paragraph-properties fo:text-align="start" style:justify-single-word="false"/>
+ <style:text-properties style:font-size-asian="12pt"/>
+</style:style>
+<style:style style:name="Quotations" style:family="paragraph"
+ style:parent-style-name="Standard"
+ style:class="html">
+ <style:paragraph-properties fo:margin-left="0.3937in" fo:margin-right="0.3937in" fo:margin-top="0in"
+ fo:margin-bottom="0.1965in"
+ fo:text-align="justify" style:justify-single-word="false" fo:text-indent="0in"
+ style:auto-text-indent="false"/>
+</style:style>
+<style:style style:name="Table_20_Heading" style:display-name="Table Heading"
+ style:family="paragraph"
+ style:parent-style-name="Table_20_Contents"
+ style:class="extra">
+ <style:paragraph-properties fo:text-align="center" style:justify-single-word="false"
+ text:number-lines="false"
+ text:line-number="0"/>
+ <style:text-properties fo:font-weight="bold" style:font-weight-asian="bold"
+ style:font-weight-complex="bold"/>
+</style:style>
+<style:style style:name="Horizontal_20_Line" style:display-name="Horizontal Line"
+ style:family="paragraph"
+ style:parent-style-name="Standard"
+ style:class="html">
+ <style:paragraph-properties fo:margin-top="0in" fo:margin-bottom="0.1965in"
+ style:border-line-width-bottom="0.0008in 0.0138in 0.0008in"
+ fo:padding="0in"
+ fo:border-left="none"
+ fo:border-right="none"
+ fo:border-top="none"
+ fo:border-bottom="0.0154in double #808080"
+ text:number-lines="false"
+ text:line-number="0"
+ style:join-border="false"/>
+ <style:text-properties fo:font-size="6pt" style:font-size-asian="6pt" style:font-size-complex="6pt"/>
+</style:style>
+<style:style style:name="Footnote_20_anchor" style:display-name="Footnote anchor" style:family="text"> <style:text-properties style:text-position="super 58%"/> </style:style>
+<style:style style:name="TOC_Item" style:family="paragraph" style:parent-style-name="Standard">
+ <style:paragraph-properties>
+ <style:tab-stops>
+ <style:tab-stop style:position="6.7283in" style:type="right" style:leader-style="dotted" style:leader-text="."/>
+ </style:tab-stops>
+ </style:paragraph-properties>
+</style:style>
+ <text:notes-configuration text:note-class="footnote" text:default-style-name="Footnote" text:citation-style-name="Footnote_20_Symbol" text:citation-body-style-name="Footnote_20_anchor" text:master-page-name="Footnote" style:num-format="a" text:start-value="0" text:footnotes-position="page" text:start-numbering-at="page"/>
+ <text:notes-configuration text:note-class="endnote" text:default-style-name="Endnote" text:citation-style-name="Endnote_20_Symbol" text:citation-body-style-name="Endnote_20_anchor" text:master-page-name="Endnote" style:num-format="1" text:start-value="0"/>
+</office:styles>
+<office:automatic-styles> <style:style style:name="MMD-Italic" style:family="text">
+ <style:text-properties fo:font-style="italic" style:font-style-asian="italic"
+ style:font-style-complex="italic"/>
+ </style:style>
+ <style:style style:name="MMD-Bold" style:family="text">
+ <style:text-properties fo:font-weight="bold" style:font-weight-asian="bold"
+ style:font-weight-complex="bold"/>
+ </style:style>
+ <style:style style:name="MMD-Superscript" style:family="text">
+ <style:text-properties style:text-position="super 58%"/>
+ </style:style>
+ <style:style style:name="MMD-Subscript" style:family="text">
+ <style:text-properties style:text-position="sub 58%"/>
+ </style:style>
+ <style:style style:name="Strike" style:family="text">
+ <style:text-properties style:text-line-through-style="solid" />
+ </style:style>
+ <style:style style:name="Underline" style:family="text">
+ <style:text-properties style:text-underline-style="solid" style:text-underline-color="font-color"/>
+ </style:style>
+ <style:style style:name="Highlight" style:family="text">
+ <style:text-properties fo:background-color="#FFFF00" />
+ </style:style>
+ <style:style style:name="Comment" style:family="text">
+ <style:text-properties fo:color="#0000BB" />
+ </style:style>
+<style:style style:name="MMD-Table" style:family="paragraph" style:parent-style-name="Standard">
+ <style:paragraph-properties fo:margin-top="0in" fo:margin-bottom="0.05in"/>
+</style:style>
+<style:style style:name="MMD-Table-Center" style:family="paragraph" style:parent-style-name="MMD-Table">
+ <style:paragraph-properties fo:text-align="center" style:justify-single-word="false"/>
+</style:style>
+<style:style style:name="MMD-Table-Right" style:family="paragraph" style:parent-style-name="MMD-Table">
+ <style:paragraph-properties fo:text-align="right" style:justify-single-word="false"/>
+</style:style>
+<style:style style:name="P2" style:family="paragraph" style:parent-style-name="Standard"
+ style:list-style-name="L2">
+<style:paragraph-properties fo:text-align="start" style:justify-single-word="false"/>
+</style:style>
+<style:style style:name="fr1" style:family="graphic" style:parent-style-name="Frame">
+ <style:graphic-properties style:print-content="true" style:vertical-pos="top"
+ style:vertical-rel="baseline"
+ fo:padding="0in"
+ fo:border="none"
+ style:shadow="none"/>
+</style:style>
+<style:style style:name="P1" style:family="paragraph" style:parent-style-name="Standard"
+ style:list-style-name="L1"/>
+<text:list-style style:name="L1">
+ <text:list-level-style-bullet text:level="1" text:style-name="Numbering_20_Symbols" style:num-suffix="." text:bullet-char="•">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="0.5in" fo:text-indent="-0.25in" fo:margin-left="0.5in"/>
+ </style:list-level-properties>
+ </text:list-level-style-bullet>
+ <text:list-level-style-bullet text:level="2" text:style-name="Numbering_20_Symbols" style:num-suffix="." text:bullet-char="◦">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="0.75in" fo:text-indent="-0.25in" fo:margin-left="0.75in"/>
+ </style:list-level-properties>
+ </text:list-level-style-bullet>
+ <text:list-level-style-bullet text:level="3" text:style-name="Numbering_20_Symbols" style:num-suffix="." text:bullet-char="▪">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1in" fo:text-indent="-0.25in" fo:margin-left="1in"/>
+ </style:list-level-properties>
+ </text:list-level-style-bullet>
+ <text:list-level-style-number text:level="4" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.25in" fo:text-indent="-0.25in" fo:margin-left="1.25in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="5" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.5in" fo:text-indent="-0.25in" fo:margin-left="1.5in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="6" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.75in" fo:text-indent="-0.25in" fo:margin-left="1.75in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="7" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="2in" fo:text-indent="-0.25in" fo:margin-left="2in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="8" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="2.25in" fo:text-indent="-0.25in" fo:margin-left="2.25in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="9" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="2.5in" fo:text-indent="-0.25in" fo:margin-left="2.5in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="10" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="2.75in" fo:text-indent="-0.25in" fo:margin-left="2.75in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+</text:list-style>
+<text:list-style style:name="L2">
+ <text:list-level-style-number text:level="1" text:style-name="Standard" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="0.5in" fo:text-indent="-0.25in" fo:margin-left="0.5in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="2" text:style-name="Standard" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="0.75in" fo:text-indent="-0.25in" fo:margin-left="0.75in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="3" text:style-name="Standard" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1in" fo:text-indent="-0.25in" fo:margin-left="1in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="4" text:style-name="Standard" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.25in" fo:text-indent="-0.25in" fo:margin-left="1.25in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="5" text:style-name="Standard" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.5in" fo:text-indent="-0.25in" fo:margin-left="1.5in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="6" text:style-name="Standard" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="1.75in" fo:text-indent="-0.25in" fo:margin-left="1.75in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="7" text:style-name="Standard" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="2in" fo:text-indent="-0.25in" fo:margin-left="2in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="8" text:style-name="Standard" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="2.25in" fo:text-indent="-0.25in" fo:margin-left="2.25in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="9" text:style-name="Standard" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="2.5in" fo:text-indent="-0.25in" fo:margin-left="2.5in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+ <text:list-level-style-number text:level="10" text:style-name="Standard" style:num-suffix="." style:num-format="1">
+ <style:list-level-properties text:list-level-position-and-space-mode="label-alignment">
+ <style:list-level-label-alignment text:label-followed-by="listtab" text:list-tab-stop-position="2.75in" fo:text-indent="-0.25in" fo:margin-left="2.75in"/>
+ </style:list-level-properties>
+ </text:list-level-style-number>
+</text:list-style>
+</office:automatic-styles>
+ <office:master-styles>
+ <style:master-page style:name="Endnote" >
+ <style:header><text:h text:outline-level="2">Bibliography</text:h></style:header></style:master-page>
+ <style:master-page style:name="Footnote" style:page-layout-name="pm2"/>
+ </office:master-styles>
+<office:meta>
+ <dc:title>MultiMarkdown v6 Development Notes</dc:title>
+ <meta:user-defined meta:name="author">Fletcher T. Penney</meta:user-defined>
+ <meta:user-defined meta:name="date">2017-03-14</meta:user-defined>
+</office:meta>
+<office:body>
+<office:text>
+<text:h text:outline-level="3"><text:bookmark text:name="introduction"/>Introduction </text:h>
+
+<text:p text:style-name="Standard">This document includes some notes on the development of MultiMarkdown (MMD) v6. Most of it
+will be interesting only to other developers or those needing to choose the
+absolute “best” Markdown (MD) implementation for their needs – it is not required
+reading to understand how the software works.</text:p>
+
+<text:h text:outline-level="4"><text:bookmark text:name="whyanewversion"/>Why a New Version? </text:h>
+
+<text:p text:style-name="Standard">MultiMarkdown version 5 was released in November of 2015, but the codebase was
+essentially the same as that of v4 – and that was released in beta in April
+of 2013. A few key things prompted work on a new version:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="Standard">Accuracy – MMD v4 and v5 were the most accurate versions yet, and a lot of
+effort went into finding and resolving various edge cases. However, it began
+to feel like a game of whack-a-mole where new bugs would creep in every time I
+fixed an old one. The PEG<text:note text:id="gn1" text:note-class="glossary"><text:note-body><text:p text:style-name="Footnote">Parsing Expression Grammar <text:a xlink:type="simple" xlink:href="https://en.wikipedia.org/wiki/Parsing_expression_grammar">https://en.wikipedia.org/wiki/Parsing_expression_grammar</text:a></text:p></text:note-body></text:note> began to feel rather convoluted in spots, even
+though it did allow for a precise (if not always accurate) specification of
+the grammar.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">Performance – “Back in the day” <text:a xlink:type="simple" xlink:href="https://github.com/jgm/peg-markdown">peg-markdown</text:a> was one of the fastest
+Markdown parsers around. MMD v3 was based on peg-markdown, and would leap-
+frog with it in terms of performance. Then <text:a xlink:type="simple" xlink:href="http://commonmark.org/">CommonMark</text:a> was released, which
+was a bit faster. Then a couple of years went by and CommonMark became <text:span text:style-name="MMD-Italic">much</text:span>
+faster – in one of my test suites, MMD v 5.4.0 takes about 25 times longer to
+process a long document than CommonMark 0.27.0.</text:p></text:list-item>
+
+</text:list>
+
+<text:p text:style-name="Standard">In the spring of 2016, I decided I wanted to rewrite MultiMarkdown from scratch,
+building the parser myself rather than relying on a pre-rolled solution. (I
+had been using <text:a xlink:type="simple" xlink:href="https://github.com/ooc-lang/greg">greg</text:a> to compile the PEG
+into parser code. It worked well overall, but lacked some features I needed,
+requiring a lot of workarounds.)</text:p>
+
+<text:h text:outline-level="3"><text:bookmark text:name="firstattempt"/>First Attempt </text:h>
+
+<text:p text:style-name="Standard">My first attempt started by hand-crafting a parser that scanned through the
+document a line at a time, deciding what to do with each line as it found
+them. I used regex parsers made with <text:a xlink:type="simple" xlink:href="http://re2c.org/index.html">re2c</text:a> to
+help classify each line, and then a separate parser layer to process groups of
+lines into blocks. Initially this approach worked well, and was really
+efficient. But I quickly began to code my way into a dead-end – the strategy
+was not elegant enough to handle things like nested lists, etc.</text:p>
+
+<text:p text:style-name="Standard">One thing that did turn out well from the first attempt, however, was an
+approach for handling <text:span text:style-name="Source_20_Text"><emph></text:span> and <text:span text:style-name="Source_20_Text"><strong></text:span> parsing. I’ve learned over the
+years that this can be one of the hardest parts of coding accurately for
+Markdown. There are many examples that are obvious to a person, but difficult
+to properly “explain” how to parse to a computer.</text:p>
+
+<text:p text:style-name="Standard">No solution is perfect, but I developed an approach that seems to accurately
+handle a wide range of situations without a great deal of complexity:</text:p>
+
+<text:list text:style-name="L2">
+<text:list-item>
+<text:p text:style-name="Standard">Scan the documents for asterisks (<text:span text:style-name="Source_20_Text">*</text:span>). Each one will be handled one at a
+time.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">Unlike brackets (<text:span text:style-name="Source_20_Text">[</text:span> and <text:span text:style-name="Source_20_Text">]</text:span>), an asterisk is “ambidextrous”, in that it
+may be able to open a matched pair of asterisks, close a pair, or both. For
+example, in <text:span text:style-name="Source_20_Text">foo *bar* foo</text:span>:</text:p>
+
+<text:list text:style-name="L2">
+<text:list-item>
+<text:p text:style-name="Standard">The first asterisk can open a pair, but not close one.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">The second asterisk can close a pair, but not open one.</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">So, once the asterisks have been identified, each has to be examined to
+determine whether it can open/close/both. The algorithm is not that complex,
+but I’ll describe it in general terms. Check the code for more specifics.
+This approach seems to work, but might still need some slight tweaking. In
+the future, I’ll codify this better in language rather than just in code.</text:p>
+
+<text:list text:style-name="L2">
+<text:list-item>
+<text:p text:style-name="Standard">If there is whitespace to the left of an asterisk, it can’t close.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">If there is whitespace or punctuation to the right it can’t open.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">“Runs” of asterisks, e.g. <text:span text:style-name="Source_20_Text">**bar</text:span> are treated as a unit in terms of
+looking left/right.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">Asterisks inside a word are a bit trickier – we look at the number of
+asterisks before the word, the number in the current run, and the number
+of asterisks after the word to determine which combinations, if any, are
+permitted.</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">Once all asterisks have been tagged as able to open/close/both, we proceed
+through them in order:</text:p>
+
+<text:list text:style-name="L2">
+<text:list-item>
+<text:p text:style-name="Standard">When we encounter a tag that can close, we look to see if there is a
+previous opener that has not been paired off. If so, pair the two and
+remove the opener from the list of available asterisks.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">When we encounter an opener, add it to the stack of available openers.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">When encounter an asterisk that can do both, see if it can close an
+existing opener. If not, then add it to the stack.</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">After all tokens in the block have been paired, then we look for nesting
+pairs of asterisks in order to create <text:span text:style-name="Source_20_Text"><emph></text:span> and <text:span text:style-name="Source_20_Text"><strong></text:span> sets. For
+example, assume we have six asterisks wrapped around a word, three in front,
+and three after. The asterisks are indicated with numbers: <text:span text:style-name="Source_20_Text">123foo456</text:span>. We
+proceed in the following manner:</text:p>
+
+<text:list text:style-name="L2">
+<text:list-item>
+<text:p text:style-name="Standard">Based on the pairing algorithm above, these asterisks would be paired as
+follows, with matching asterisks sharing numbers – <text:span text:style-name="Source_20_Text">123foo321</text:span>.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">Moving forwards, we come to asterisk “1”. It is followed by an
+asterisk, so we check to see if they should be grouped as a <text:span text:style-name="Source_20_Text"><strong></text:span>.
+Since the “1” asterisks are wrapped immediately outside the “2” asterisks,
+they are joined together. More than two pairs can’t be joined, so we now
+get the following – <text:span text:style-name="Source_20_Text">112foo211</text:span>, where the “11” represents the opening
+and closing of a <text:span text:style-name="Source_20_Text"><strong></text:span>, and the “2” represents a <text:span text:style-name="Source_20_Text"><emph></text:span>.</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">When matching a pair, any unclosed openers that are on the stack are
+removed, preventing pairs from “crossing” or “intersecting”. Pairs can wrap
+around each other, e.g. <text:span text:style-name="Source_20_Text">[(foo)]</text:span>, but not intersect like <text:span text:style-name="Source_20_Text">[(foo])</text:span>. In the
+second case, the brackets would close, removing the <text:span text:style-name="Source_20_Text">(</text:span> from the stack.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">This same approach is used in all tokens that are matched in pairs–
+<text:span text:style-name="Source_20_Text">[foo]</text:span>, <text:span text:style-name="Source_20_Text">(foo)</text:span>, <text:span text:style-name="Source_20_Text">_foo_</text:span>, etc. There’s slightly more to it, but once you
+figure out how to assign opening/closing ability, the rest is easy. By using
+a stack to track available openers, it can be performed efficiently.</text:p></text:list-item>
+
+</text:list>
+
+<text:p text:style-name="Standard">In my testing, this approach has worked quite well. It handles all the basic
+scenarios I’ve thrown at it, and all of the “basic” and “devious” edge cases I
+have thought of (some of these don’t necessarily have a “right” answer – but
+v6 gives consistency answers that seem as reasonable as any others to me).
+There are also three more edge cases I’ve come up can still stump it, and
+ironically they are handled correctly by most implementations. They just
+don’t follow the rules above. I’ll continue to work on this.</text:p>
+
+<text:p text:style-name="Standard">In the end, I scrapped this effort, but kept the lessons learned in the token
+pairing algorithm.</text:p>
+
+<text:h text:outline-level="3"><text:bookmark text:name="secondattempt"/>Second Attempt </text:h>
+
+<text:p text:style-name="Standard">I tried again this past Fall. This time, I approached the problem with lots
+of reading. <text:span text:style-name="MMD-Italic">Lots and lots</text:span> of reading – tons of websites, computer science
+journal articles, PhD theses, etc. Learned a lot about lexers, and a lot
+about parsers, including hand-crafting vs using parser generators. In brief:</text:p>
+
+<text:list text:style-name="L2">
+<text:list-item>
+<text:p text:style-name="Standard">I learned about the <text:a xlink:type="simple" xlink:href="https://en.wikipedia.org/wiki/Aho-Corasick_algorithm">Aho–Corasick algorithm</text:a>, which is a great way to
+efficiently search a string for multiple target strings at once. I used this
+to create a custom lexer to identify tokens in a MultiMarkdown text document
+(e.g. <text:span text:style-name="Source_20_Text">*</text:span>, <text:span text:style-name="Source_20_Text">[</text:span>, <text:span text:style-name="Source_20_Text">{++</text:span>, etc.). I learned a lot, and had a good time working
+out the implementation. This code efficiently allowed me to break a string of
+text into the tokens that mattered for Markdown parsing.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">However, in a few instances I really needed some features of regular
+expressions to simplify more complex structures. After a quick bit of testing,
+using re2c to create a tokenizer was just as efficient, and allowed me to
+incorporate some regex functionality that simplified later parsing. I’ll keep
+the Aho-Corasick stuff around, and will probably experiment more with it
+later. But I didn’t need it for MMD now. <text:span text:style-name="Source_20_Text">lexer.re</text:span> contains the source for
+the tokenizer.</text:p></text:list-item>
+
+</text:list>
+
+<text:p text:style-name="Standard">I looked long and hard for a way to simplify the parsing algorithm to try and
+“touch” each token only once. Ideally, the program could step through each
+token, and decide when to create a new block, when to pair things together,
+etc. But I’m not convinced it’s possible. Since Markdown’s grammar varies
+based on context, it seems to work best when handled in distinct phases:</text:p>
+
+<text:list text:style-name="L2">
+<text:list-item>
+<text:p text:style-name="Standard">Tokenize the string to identify key sections of text. This includes line
+breaks, allowing the text to be examined one line at time.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">Join series of lines together into blocks, such as paragraphs, code blocks,
+lists, etc.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">The tokens inside each block can then be paired together to create more
+complex syntax such as links, strong, emphasis, etc.</text:p></text:list-item>
+
+</text:list>
+
+<text:p text:style-name="Standard">To handle the block parsing, I started off using the Aho-Corasick code to
+handle my first attempt. I had actually implemented some basic regex
+functionality, and used that to group lines together to create blocks. But
+this quickly fell apart in the face of more complex structures such as
+recursive lists. After a lot of searching, and <text:span text:style-name="MMD-Italic">tons</text:span> more reading, I
+ultimately decided to use a parser generator to handle the task of group lines
+into blocks. <text:span text:style-name="Source_20_Text">parser.y</text:span> has the source for this, and it is processed by the
+<text:a xlink:type="simple" xlink:href="http://www.hwaci.com/sw/lemon/">lemon</text:a> parser generator to create the actual
+code.</text:p>
+
+<text:p text:style-name="Standard">I chose to do this because hand-crafting the block parser would be complex.
+The end result would likely be difficult to read and understand, which would
+make it difficult to update later on. Using the parser generator allows me to
+write things out in a way that can more easily be understood by a person. In
+all likelihood, the performance is probably as good as anything I could do
+anyway, if not better.</text:p>
+
+<text:p text:style-name="Standard">Because lemon is a LALR(1) parser, it does require a bit of thinking ahead
+about how to create the grammar used. But so far, it has been able to handle
+everything I have thrown at it.</text:p>
+
+<text:h text:outline-level="3"><text:bookmark text:name="optimization"/>Optimization </text:h>
+
+<text:p text:style-name="Standard">One of my goals for MMD 6 was performance. So I’ve paid attention to speed
+along the way, and have tried to use a few tricks to keep things fast. Here
+are some things I’ve learned along the way. In no particular order:</text:p>
+
+<text:h text:outline-level="4"><text:bookmark text:name="memoryallocation"/>Memory Allocation </text:h>
+
+<text:p text:style-name="Standard">When parsing a long document, a <text:span text:style-name="MMD-Italic">lot</text:span> of token structures are created. Each
+one requires a small bit of memory to be allocated. In aggregate, that time
+added up and slowed down performance.</text:p>
+
+<text:p text:style-name="Standard">After reading for a bit, I ended up coming up with an approach that uses
+larger chunks of memory. I allocate pools of of memory in large slabs for
+smaller “objects”". For example, I allocate memory for 1024 tokens at a
+single time, and then dole that memory out as needed. When the slab is empty,
+a new one is allocated. This dramatically improved performance.</text:p>
+
+<text:p text:style-name="Standard">When pairing tokens, I created a new stack for each block. I realized that an
+empty stack didn’t have any “leftover” cruft to interfere with re-use, so I
+just used one for the entire document. Again a sizeable improvement in
+performance from only allocating one object instead of many. When recursing
+to a deeper level, the stack just gets deeper, but earlier levels aren’t
+modified.</text:p>
+
+<text:p text:style-name="Standard">Speaking of tokens, I realized that the average document contains a lot of
+single spaces (there’s one between every two words I have written, for
+example.) The vast majority of the time, these single spaces have no effect
+on the output of Markdown documents. I changed my whitespace token search to
+only flag runs of 2 or more spaces, dramatically reducing the number of
+tokens. This gives the benefit of needing fewer memory allocations, and also
+reduces the number of tokens that need to be processed later on. The only
+downside is remember to check for a single space character in a few instances
+where it matters.</text:p>
+
+<text:h text:outline-level="4"><text:bookmark text:name="properinputbuffering"/>Proper input buffering </text:h>
+
+<text:p text:style-name="Standard">When I first began last spring, I was amazed to see how much time was being
+spent by MultiMarkdown simply reading the input file. Then I discovered it
+was because I was reading it one character at a time. I switched to using a
+buffered read approach and the time to read the file went to almost nothing. I
+experimented with different buffer sizes, but they did not seem to make a
+measurable difference.</text:p>
+
+<text:h text:outline-level="4"><text:bookmark text:name="outputbuffering"/>Output Buffering </text:h>
+
+<text:p text:style-name="Standard">I experimented with different approaches to creating the output after parsing.
+I tried printing directly to <text:span text:style-name="Source_20_Text">stdout</text:span>, and even played with different
+buffering settings. None of those seemed to work well, and all were slower
+than using the <text:span text:style-name="Source_20_Text">d_string</text:span> approach (formerly call <text:span text:style-name="Source_20_Text">GString</text:span> in MMD 5).</text:p>
+
+<text:h text:outline-level="4"><text:bookmark text:name="fastsearches"/>Fast Searches </text:h>
+
+<text:p text:style-name="Standard">After getting basic Markdown functionality complete, I discovered during
+testing that the time required to parse a document grew exponentially as the
+document grew longer. Performance was on par with CommonMark for shorter
+documents, but fell increasingly behind in larger tests. Time profiling found
+that the culprit was searching for link definitions when they didn’t exist.
+My first approach was to keep a stack of used link definitions, and to iterate
+through them when necessary. In long documents, this performs very poorly.
+More research and I ended up using
+<text:a xlink:type="simple" xlink:href="http://troydhanson.github.io/uthash/">uthash</text:a>. This allows me to search for
+a link (or footnote, etc.) by “name” rather than searching through an array.
+This allowed me to get MMD’s performance back to O(n), taking roughly twice as
+much time to process a document that is twice as long.</text:p>
+
+<text:h text:outline-level="4"><text:bookmark text:name="efficientutilityfunctions"/>Efficient Utility Functions </text:h>
+
+<text:p text:style-name="Standard">It is frequently necessary when parsing Markdown to check what sort of
+character we are dealing with at a certain position – a letter, whitespace,
+punctuation, etc. I created a lookup table for this via <text:span text:style-name="Source_20_Text">char_lookup.c</text:span> and
+hard-coded it in <text:span text:style-name="Source_20_Text">char.c</text:span>. These routines allow me to quickly, and
+consistently, classify any byte within a document. This saved a lot of
+programming time, and saved time tracking down bugs from handling things
+slightly differently under different circumstances. I also suspect it
+improved performance, but don’t have the data to back it up.</text:p>
+
+<text:h text:outline-level="4"><text:bookmark text:name="testingwhilewriting"/>Testing While Writing </text:h>
+
+<text:p text:style-name="Standard">I developed several chunks of code in parallel while creating MMD 6. The vast
+majority of it was developed largely in a <text:a xlink:type="simple" xlink:href="https://en.wikipedia.org/wiki/Test-driven_development">test-driven development</text:a> approach.
+The other code was largely created with extensive unit testing to accomplish
+this.</text:p>
+
+<text:p text:style-name="Standard">MMD isn’t particularly amenable to this approach at the small level, but
+instead I relied more on integration testing with an ever-growing collection
+of text files and the corresponding HTML files in the MMD 6 test suite. This
+allowed me to ensure new features work properly and that old features aren’t
+broken. At this time, there are 29 text files in the test suite, and many
+more to come.</text:p>
+
+<text:h text:outline-level="4"><text:bookmark text:name="otherlessons"/>Other Lessons </text:h>
+
+<text:p text:style-name="Standard">Some things that didn’t do me any good….</text:p>
+
+<text:p text:style-name="Standard">I considered differences between using <text:span text:style-name="Source_20_Text">malloc</text:span> and <text:span text:style-name="Source_20_Text">calloc</text:span> when initializing
+tokens. The time saved by using <text:span text:style-name="Source_20_Text">malloc</text:span> was basically exactly offset by the
+initial time required to initialize the token to default null values as
+compared to using <text:span text:style-name="Source_20_Text">calloc</text:span>. When trying <text:span text:style-name="Source_20_Text">calloc</text:span> failed to help me out
+(thinking that clearing a single slab in the object pool would be faster), I
+stuck with <text:span text:style-name="Source_20_Text">malloc</text:span> as it makes more sense to me in my workflow.</text:p>
+
+<text:p text:style-name="Standard">I read a bit about <text:a xlink:type="simple" xlink:href="http://www.catb.org/esr/structure-packing/">struct padding</text:a> and reordered some of my structs. It was
+until later that I discovered the <text:span text:style-name="Source_20_Text">-Wpadded</text:span> option, and it’s not clear
+whether my changes modified anything. Since the structs were being padded
+automatically, there was no noticeable performance change, and I didn’t have
+the tools to measure whether I could have improved memory usage at all. Not
+sure this would be worth the effort – much lower hanging fruit available.</text:p>
+
+<text:h text:outline-level="3"><text:bookmark text:name="performance"/>Performance </text:h>
+
+<text:p text:style-name="Standard">Basic tests show that currently MMD 6 takes about 20–25% longer the CommonMark
+0.27.0 to process long files (e.g. 0.2 MB). However, it is around 5% <text:span text:style-name="MMD-Italic">faster</text:span>
+than CommonMark when parsing a shorter file (27 kB) (measured by parsing the
+same file 200 times over). This test suite is performed by using the Markdown
+[syntax page], modified to avoid the use of the Setext header at the top. The
+longer files tested are created by copying the same syntax page onto itself,
+thereby doubling the length of the file with each iteration.</text:p>
+
+<text:p text:style-name="Standard">The largest file I test is approximately 108 MB (4096 copies of the syntax
+page). On my machine (2012 Mac mini with 2.3 GHz Intel Core i7, 16 GB RAM),
+it takes approximately 4.4 seconds to parse with MMD 6 and 3.7 seconds with
+CommonMark. MMD 6 processes approximately 25 MB/s on this test file.
+CommonMark 0.27.0 gets about 29 MB/s on the same machine.</text:p>
+
+<text:p text:style-name="Standard">There are some slight variations with the smaller test files (8–32 copies),
+but overall the performance of both programs (MMD 6 and CommonMark) are
+roughly linear as the test file gets bigger (double the file size and it takes
+twice as long to parse, aka O(n)).</text:p>
+
+<text:p text:style-name="Standard">Out of curiosity, I ran the same tests on the original Markdown.pl by Gruber
+(v 1.0.2b8). It took approximately 178 seconds to parse 128 copies of the
+file (3.4 MB) and was demonstrating quadratic performance characteristics
+(double the file size and it takes 2<text:span text:style-name="MMD-Superscript">2</text:span> or 4 times longer to process, aka
+O(n<text:span text:style-name="MMD-Superscript">2</text:span>)). I didn’t bother running it on larger versions of the test file. For
+comparison, MMD 6 can process 128 copies in approximately 140 msec.</text:p>
+
+<text:p text:style-name="Standard">Of note, the throughput speed drops when testing more complicated files
+containing more advanced MultiMarkdown features, though it still seems to
+maintain linear performance characteristics. A second test file is created by
+concatenating all of the test suite files (including the Markdown syntax
+file). In this case, MMD gets about 13 MB/s. CommonMark doesn’t support
+these additional features, so testing it with that file is not relevant. I
+will work to see whether there are certain features in particular that are
+more challenging and see whether they can be reworked to improve performance.</text:p>
+
+<text:p text:style-name="Standard">As above, I have done some high level optimization of the parse strategy, but
+I’m sure there’s still a lot of room for further improvement to be made.
+Suggestions welcome!</text:p>
+
+<text:h text:outline-level="3"><text:bookmark text:name="testing"/>Testing </text:h>
+
+<text:h text:outline-level="4"><text:bookmark text:name="testsuite"/>Test Suite </text:h>
+
+<text:p text:style-name="Standard">The development of MMD v6 was heavily, but not absolutely, influenced by the
+philosophy of test-driven development. While coding, I made use of test
+suites to verify successful implementation of new features, to avoid
+regression problems when adding new features, and to identify known edge cases
+in need of proper handling.</text:p>
+
+<text:p text:style-name="Standard">The test suite (located in <text:span text:style-name="Source_20_Text">tests/MMD6Tests</text:span>) is a “living” collection of
+documents that will continue to be updated as new bugs and edge cases are
+identified. This helps make proper integration testing of the entire
+application with every release.</text:p>
+
+<text:h text:outline-level="4"><text:bookmark text:name="fuzztesting"/>Fuzz Testing </text:h>
+
+<text:p text:style-name="Standard">I was not familiar with the concept of <text:a xlink:type="simple" xlink:href="#fuzztesting">Fuzz Testing</text:a>
+(https://en.wikipedia.org/wiki/Fuzzing) until a user mentioned something about
+it to me a year or two ago. I had never used it before, but it seemed like a
+good idea. I implement it in two ways.</text:p>
+
+<text:p text:style-name="Standard">The first is that I created a simplified version of the line parser that
+simply accepts various combinations of line type identifiers to see if they
+would successfully parse. The line parser is responsible for taking a series
+of line types (e.g. plain text, indented line, etc.) and determining what sort
+of block they should become. The file <text:span text:style-name="Source_20_Text">test/parser_text.y</text:span> is run through the
+<text:span text:style-name="Source_20_Text">lemon</text:span> program, compiled (with or without the <text:span text:style-name="Source_20_Text">-DNDEBUG</text:span> flag) and then run.
+It sequentially throws every combination of line types at the simplified line
+parser to make sure that it doesn’t choke. When I first did this, I found
+several combinations of lines that did not pass.</text:p>
+
+<text:p text:style-name="Standard"><text:span text:style-name="MMD-Bold">NOTE</text:span>: This does not verify accurate parsing, simply that the parser does
+not crash by an unacceptable combination of lines.</text:p>
+
+<text:p text:style-name="Standard">The second form of fuzz testing I have started using more recently. This is
+using the <text:a xlink:type="simple" xlink:href="http://lcamtuf.coredump.cx/afl/">American fuzzy lop</text:a> program to try
+to find text input that crashes MMD. This works by taking sample input (e.g.
+files from the test suite), modifying them slightly, and trying the modified
+versions. Do this over and over and over, and some interesting edge cases are
+sometimes identified. I have found some interesting edge cases this way.
+Definitely a very useful tool!</text:p>
+
+<text:h text:outline-level="4"><text:bookmark text:name="unittesting"/>Unit Testing </text:h>
+
+<text:p text:style-name="Standard">Some of the original development was done with unit testing in some other
+tools I developed. This code formed the basis of a few parts of MMD.
+Otherwise, it was hard to see how to really create very good unit tests for
+the development of MMD. So there is really not much unit testing built into
+the code or used during the development.</text:p>
+
+<text:h text:outline-level="3"><text:bookmark text:name="changelog"/>Changelog </text:h>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="Standard">2017–03–13 – v 6.0.0-b2:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add CriticMarkup preprocessor that works across empty lines when accepting/rejecting markup</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add back the mmd6 latex title file</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Basic EPUB 3 support – uses ‘miniz’ library to zip creation</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Update QuickStart and EPUB code</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Update QuickStart guide</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Update test suite</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Don't duplicate LaTeX glossary definitions</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix abbreviations in ODF; Improve test suite</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Improve glossaries and abbreviations; Update QuickStart</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Tidy up some compiler warnings in code</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">FIXED: Use custom UUID code to minimize external dependencies</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">2017–03–09 – v 6.0.0-b1:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add French translations; fix typo in German</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add Quick Start guide</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add functionality to automatically identify abbreviations and glossary terms in source</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Improve LaTeX configuration files</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Update German translations</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Use native ODF table of contents instead of a manual list</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Use native command for table of contents in LaTeX</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Bring HTML and ODF into line with LaTeX as to output of abbreviatinos on first and subsequent uses</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Slight performance tweak</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Update German test suite</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Allow <text:span text:style-name="Source_20_Text">{{TOC}}</text:span> in latex verbatim</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Don't free token_pool if never initialized</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix German typo</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix missing token type</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Improve performance of checking document for metadata, which improves performance when checking for possible transclusion</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">FIXED: Update test suite for abbreviation changes</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">2017–03–05 – v 0.4.2-b:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add and utility functions; fix memory leak</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Initial abbreviation support</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Keep working on Abbreviations/Glossaries</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Refactor abbreviation code; Add inline abbreviations; Fix abbreviations in ODF</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Update Inline Footnote test</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Add comments to i18n.h</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Finish refactoring note–related code</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Refactor footnotes</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Refactor glossary code</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Remove offset from html export functions</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: latex list items need to block optional argument to allow ‘[’ as first character</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">Merge branch ‘release/0.4.1-b’ into develop</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">2017–03–04 – v 0.4.1-b:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="Standard">FIXED: Add glossary localization</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">2017–03–04 – v 0.4.0-b:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add TOC support to ODF</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add glossary support to ODF</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add prelim code for handling abbreviations</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add support for Swift Package Maker; CHANGED: Restructure source directory</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Added LaTeX support for escaped characters, fenced code blocks, images, links</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Basic ODF Support</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Better document strong/emph algorithm</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Continue ODF progress</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Continue to work on ODF export</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Continue work on ODF</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Finish ODF support for lists</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Improve performance when exporting</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Improve token_pool memory handling</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Prototype support for Glossaries</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Support ‘latexconfig’ metadata</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Use multiple cases in glossary tests</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Don't force glossary terms into lowercase</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix Makefile for new source file location</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix algorithm for creating TOC to properly handle ‘incorrect’ levels</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix linebreaks in LaTeX; ADDED: Add Linebreaks test file</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix new_source script for new directory structure</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix non–breaking space in ODF</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix padding at end of document body in ODF</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix underscores in raw latex</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Potential bug</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">NOTE: Add shared library build option</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">2017–02–17 – v 0.3.1.a:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: ‘finalize’ beamer support</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add escaped newline as linebreak; start on beamer/memoir support</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: CriticMarkup test for LaTeX</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Custom LaTeX output for CriticMarkup comments</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Support mmd export format</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Work on cpack installer – change project name for compatibility</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Adjust latex metadata configuration for consistency</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+CHANGED: Configure cmake to use C99</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Add custom implementation for cross–platform support</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix German HTML tests</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix cpack destination directory issue</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix memory leaks etc</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix warning in custom vasprintf</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Modify CMakeLists.txt to test for use of clang compiler</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Work on memory leaks</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">NOTE: Adjust license width to improve display on smaller terminal windows</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">2017–02–14 – v 0.3.0a:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add basic image support to LaTeX</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add file transclusion</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add support for citation ‘locators’</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add support for manual labels on ATX Headers</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add support for manual labels on Setext Headers</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add support for tables in LaTeX</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: HTML Comments appear as raw LaTeX</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Improved citation support in LaTeX</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Support \autoref{} in LaTeX</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Support combined options in LaTeX citations that use the ‘][’ syntax</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Support language specifier in fenced code blocks</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Support metadata in LaTeX</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Update Citations test suite</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Escaped LaTeX characters</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix bug in URL parsing</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix bug in citation links</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix bug when no closing divider or newline at end of last table cell</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix issue printing ‘–’</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Fix scan_url test suite</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Get Math working in LaTeX</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Improve reliability or link scanner</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Properly add id attribute to new instances of citation only</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Properly handle manual labels with TOC</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Properly print hash characters in LaTeX</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Separate LaTeX verbatim and texttt character handling</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+FIXED: Update Escapes test LaTeX result</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">FIXED: Work on escaping LaTeX characters</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">2017–02–08 – v 0.1.4a:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="Standard">ADDED: Add smart quote support for other languages (resolves #15)</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">2017–02–08 – v 0.1.3a:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add support for reference image id attributes</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Add support for table captions</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Metadata support for base header level</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Support distinction between 3 and 5 backticks in fenced code blocks</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+ADDED: Support Setext headers</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">FIXED: Fix issue with metadata disrupting smart quotes</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">2017–02–07 – v 0.1.2a:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="Standard">“pathologic” test suite – fix handling of nested brackets, e.g.
+<text:span text:style-name="Source_20_Text">[[[[foo]]]]</text:span> to avoid bogging down checking for reference links that
+don’t exist.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">Table support – a single blank line separates sections of tables, so
+at least two blank lines are needed between adjacent tables.</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+Definition list support</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+“fuzz testing” – stress test the parser for unexpected failures</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+Table of Contents support</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">Improved compatibility mode parsing</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">2017–01–28 – v 0.1.1a includes a few updates:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="P1">
+Metadata support</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+Metadata variables support</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+Extended ASCII range character checking</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+Rudimentary language translations, including German</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+Improved performance</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">Additional testing:</text:p>
+
+<text:list text:style-name="L1">
+<text:list-item>
+<text:p text:style-name="P1">
+CriticMarkup</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+HTML Blokcs</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="P1">
+Metadata/Variables</text:p></text:list-item>
+
+<text:list-item>
+<text:p text:style-name="Standard">“pathologic” test cases from CommonMark</text:p></text:list-item>
+
+</text:list></text:list-item>
+
+</text:list></text:list-item>
+
+</text:list>
+</office:text>
+</office:body>
+</office:document>
--- /dev/null
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
+<head>
+ <meta charset="utf-8"/>
+ <title>MultiMarkdown v6 Development Notes</title>
+ <meta name="author" content="Fletcher T. Penney"/>
+ <meta name="date" content="2017-03-14"/>
+</head>
+<body>
+
+<h3 id="introduction">Introduction </h3>
+
+<p>This document includes some notes on the development of MultiMarkdown (<abbr title="MultiMarkdown">MMD</abbr>) v6. Most of it
+will be interesting only to other developers or those needing to choose the
+absolute “best” Markdown (<abbr title="Markdown">MD</abbr>) implementation for their needs – it is not required
+reading to understand how the software works.</p>
+
+<h4 id="whyanewversion">Why a New Version? </h4>
+
+<p>MultiMarkdown version 5 was released in November of 2015, but the codebase was
+essentially the same as that of v4 – and that was released in beta in April
+of 2013. A few key things prompted work on a new version:</p>
+
+<ul>
+<li><p>Accuracy – <abbr title="MultiMarkdown">MMD</abbr> v4 and v5 were the most accurate versions yet, and a lot of
+effort went into finding and resolving various edge cases. However, it began
+to feel like a game of whack-a-mole where new bugs would creep in every time I
+fixed an old one. The <a href="#gn:1" id="gnref:1" title="see glossary" class="glossary">PEG</a> began to feel rather convoluted in spots, even
+though it did allow for a precise (if not always accurate) specification of
+the grammar.</p></li>
+<li><p>Performance – “Back in the day” <a href="https://github.com/jgm/peg-markdown">peg-markdown</a> was one of the fastest
+Markdown parsers around. <abbr title="MultiMarkdown">MMD</abbr> v3 was based on peg-markdown, and would leap-
+frog with it in terms of performance. Then <a href="http://commonmark.org/">CommonMark</a> was released, which
+was a bit faster. Then a couple of years went by and CommonMark became <em>much</em>
+faster – in one of my test suites, <abbr title="MultiMarkdown">MMD</abbr> v 5.4.0 takes about 25 times longer to
+process a long document than CommonMark 0.27.0.</p></li>
+</ul>
+
+<p>In the spring of 2016, I decided I wanted to rewrite MultiMarkdown from scratch,
+building the parser myself rather than relying on a pre-rolled solution. (I
+had been using <a href="https://github.com/ooc-lang/greg">greg</a> to compile the PEG
+into parser code. It worked well overall, but lacked some features I needed,
+requiring a lot of workarounds.)</p>
+
+<h3 id="firstattempt">First Attempt </h3>
+
+<p>My first attempt started by hand-crafting a parser that scanned through the
+document a line at a time, deciding what to do with each line as it found
+them. I used regex parsers made with <a href="http://re2c.org/index.html">re2c</a> to
+help classify each line, and then a separate parser layer to process groups of
+lines into blocks. Initially this approach worked well, and was really
+efficient. But I quickly began to code my way into a dead-end – the strategy
+was not elegant enough to handle things like nested lists, etc.</p>
+
+<p>One thing that did turn out well from the first attempt, however, was an
+approach for handling <code><emph></code> and <code><strong></code> parsing. I’ve learned over the
+years that this can be one of the hardest parts of coding accurately for
+Markdown. There are many examples that are obvious to a person, but difficult
+to properly “explain” how to parse to a computer.</p>
+
+<p>No solution is perfect, but I developed an approach that seems to accurately
+handle a wide range of situations without a great deal of complexity:</p>
+
+<ol>
+<li><p>Scan the documents for asterisks (<code>*</code>). Each one will be handled one at a
+time.</p></li>
+<li><p>Unlike brackets (<code>[</code> and <code>]</code>), an asterisk is “ambidextrous”, in that it
+may be able to open a matched pair of asterisks, close a pair, or both. For
+example, in <code>foo *bar* foo</code>:</p>
+
+<ol>
+<li><p>The first asterisk can open a pair, but not close one.</p></li>
+<li><p>The second asterisk can close a pair, but not open one.</p></li>
+</ol></li>
+<li><p>So, once the asterisks have been identified, each has to be examined to
+determine whether it can open/close/both. The algorithm is not that complex,
+but I’ll describe it in general terms. Check the code for more specifics.
+This approach seems to work, but might still need some slight tweaking. In
+the future, I’ll codify this better in language rather than just in code.</p>
+
+<ol>
+<li><p>If there is whitespace to the left of an asterisk, it can’t close.</p></li>
+<li><p>If there is whitespace or punctuation to the right it can’t open.</p></li>
+<li><p>“Runs” of asterisks, e.g. <code>**bar</code> are treated as a unit in terms of
+looking left/right.</p></li>
+<li><p>Asterisks inside a word are a bit trickier – we look at the number of
+asterisks before the word, the number in the current run, and the number
+of asterisks after the word to determine which combinations, if any, are
+permitted.</p></li>
+</ol></li>
+<li><p>Once all asterisks have been tagged as able to open/close/both, we proceed
+through them in order:</p>
+
+<ol>
+<li><p>When we encounter a tag that can close, we look to see if there is a
+previous opener that has not been paired off. If so, pair the two and
+remove the opener from the list of available asterisks.</p></li>
+<li><p>When we encounter an opener, add it to the stack of available openers.</p></li>
+<li><p>When encounter an asterisk that can do both, see if it can close an
+existing opener. If not, then add it to the stack.</p></li>
+</ol></li>
+<li><p>After all tokens in the block have been paired, then we look for nesting
+pairs of asterisks in order to create <code><emph></code> and <code><strong></code> sets. For
+example, assume we have six asterisks wrapped around a word, three in front,
+and three after. The asterisks are indicated with numbers: <code>123foo456</code>. We
+proceed in the following manner:</p>
+
+<ol>
+<li><p>Based on the pairing algorithm above, these asterisks would be paired as
+follows, with matching asterisks sharing numbers – <code>123foo321</code>.</p></li>
+<li><p>Moving forwards, we come to asterisk “1”. It is followed by an
+asterisk, so we check to see if they should be grouped as a <code><strong></code>.
+Since the “1” asterisks are wrapped immediately outside the “2” asterisks,
+they are joined together. More than two pairs can’t be joined, so we now
+get the following – <code>112foo211</code>, where the “11” represents the opening
+and closing of a <code><strong></code>, and the “2” represents a <code><emph></code>.</p></li>
+</ol></li>
+<li><p>When matching a pair, any unclosed openers that are on the stack are
+removed, preventing pairs from “crossing” or “intersecting”. Pairs can wrap
+around each other, e.g. <code>[(foo)]</code>, but not intersect like <code>[(foo])</code>. In the
+second case, the brackets would close, removing the <code>(</code> from the stack.</p></li>
+<li><p>This same approach is used in all tokens that are matched in pairs–
+<code>[foo]</code>, <code>(foo)</code>, <code>_foo_</code>, etc. There’s slightly more to it, but once you
+figure out how to assign opening/closing ability, the rest is easy. By using
+a stack to track available openers, it can be performed efficiently.</p></li>
+</ol>
+
+<p>In my testing, this approach has worked quite well. It handles all the basic
+scenarios I’ve thrown at it, and all of the “basic” and “devious” edge cases I
+have thought of (some of these don’t necessarily have a “right” answer – but
+v6 gives consistency answers that seem as reasonable as any others to me).
+There are also three more edge cases I’ve come up can still stump it, and
+ironically they are handled correctly by most implementations. They just
+don’t follow the rules above. I’ll continue to work on this.</p>
+
+<p>In the end, I scrapped this effort, but kept the lessons learned in the token
+pairing algorithm.</p>
+
+<h3 id="secondattempt">Second Attempt </h3>
+
+<p>I tried again this past Fall. This time, I approached the problem with lots
+of reading. <em>Lots and lots</em> of reading – tons of websites, computer science
+journal articles, PhD theses, etc. Learned a lot about lexers, and a lot
+about parsers, including hand-crafting vs using parser generators. In brief:</p>
+
+<ol>
+<li><p>I learned about the <a href="https://en.wikipedia.org/wiki/Aho-Corasick_algorithm">Aho–Corasick algorithm</a>, which is a great way to
+efficiently search a string for multiple target strings at once. I used this
+to create a custom lexer to identify tokens in a MultiMarkdown text document
+(e.g. <code>*</code>, <code>[</code>, <code>{++</code>, etc.). I learned a lot, and had a good time working
+out the implementation. This code efficiently allowed me to break a string of
+text into the tokens that mattered for Markdown parsing.</p></li>
+<li><p>However, in a few instances I really needed some features of regular
+expressions to simplify more complex structures. After a quick bit of testing,
+using re2c to create a tokenizer was just as efficient, and allowed me to
+incorporate some regex functionality that simplified later parsing. I’ll keep
+the Aho-Corasick stuff around, and will probably experiment more with it
+later. But I didn’t need it for <abbr title="MultiMarkdown">MMD</abbr> now. <code>lexer.re</code> contains the source for
+the tokenizer.</p></li>
+</ol>
+
+<p>I looked long and hard for a way to simplify the parsing algorithm to try and
+“touch” each token only once. Ideally, the program could step through each
+token, and decide when to create a new block, when to pair things together,
+etc. But I’m not convinced it’s possible. Since Markdown’s grammar varies
+based on context, it seems to work best when handled in distinct phases:</p>
+
+<ol>
+<li><p>Tokenize the string to identify key sections of text. This includes line
+breaks, allowing the text to be examined one line at time.</p></li>
+<li><p>Join series of lines together into blocks, such as paragraphs, code blocks,
+lists, etc.</p></li>
+<li><p>The tokens inside each block can then be paired together to create more
+complex syntax such as links, strong, emphasis, etc.</p></li>
+</ol>
+
+<p>To handle the block parsing, I started off using the Aho-Corasick code to
+handle my first attempt. I had actually implemented some basic regex
+functionality, and used that to group lines together to create blocks. But
+this quickly fell apart in the face of more complex structures such as
+recursive lists. After a lot of searching, and <em>tons</em> more reading, I
+ultimately decided to use a parser generator to handle the task of group lines
+into blocks. <code>parser.y</code> has the source for this, and it is processed by the
+<a href="http://www.hwaci.com/sw/lemon/">lemon</a> parser generator to create the actual
+code.</p>
+
+<p>I chose to do this because hand-crafting the block parser would be complex.
+The end result would likely be difficult to read and understand, which would
+make it difficult to update later on. Using the parser generator allows me to
+write things out in a way that can more easily be understood by a person. In
+all likelihood, the performance is probably as good as anything I could do
+anyway, if not better.</p>
+
+<p>Because lemon is a LALR(1) parser, it does require a bit of thinking ahead
+about how to create the grammar used. But so far, it has been able to handle
+everything I have thrown at it.</p>
+
+<h3 id="optimization">Optimization </h3>
+
+<p>One of my goals for <abbr title="MultiMarkdown">MMD</abbr> 6 was performance. So I’ve paid attention to speed
+along the way, and have tried to use a few tricks to keep things fast. Here
+are some things I’ve learned along the way. In no particular order:</p>
+
+<h4 id="memoryallocation">Memory Allocation </h4>
+
+<p>When parsing a long document, a <em>lot</em> of token structures are created. Each
+one requires a small bit of memory to be allocated. In aggregate, that time
+added up and slowed down performance.</p>
+
+<p>After reading for a bit, I ended up coming up with an approach that uses
+larger chunks of memory. I allocate pools of of memory in large slabs for
+smaller “objects”". For example, I allocate memory for 1024 tokens at a
+single time, and then dole that memory out as needed. When the slab is empty,
+a new one is allocated. This dramatically improved performance.</p>
+
+<p>When pairing tokens, I created a new stack for each block. I realized that an
+empty stack didn’t have any “leftover” cruft to interfere with re-use, so I
+just used one for the entire document. Again a sizeable improvement in
+performance from only allocating one object instead of many. When recursing
+to a deeper level, the stack just gets deeper, but earlier levels aren’t
+modified.</p>
+
+<p>Speaking of tokens, I realized that the average document contains a lot of
+single spaces (there’s one between every two words I have written, for
+example.) The vast majority of the time, these single spaces have no effect
+on the output of Markdown documents. I changed my whitespace token search to
+only flag runs of 2 or more spaces, dramatically reducing the number of
+tokens. This gives the benefit of needing fewer memory allocations, and also
+reduces the number of tokens that need to be processed later on. The only
+downside is remember to check for a single space character in a few instances
+where it matters.</p>
+
+<h4 id="properinputbuffering">Proper input buffering </h4>
+
+<p>When I first began last spring, I was amazed to see how much time was being
+spent by MultiMarkdown simply reading the input file. Then I discovered it
+was because I was reading it one character at a time. I switched to using a
+buffered read approach and the time to read the file went to almost nothing. I
+experimented with different buffer sizes, but they did not seem to make a
+measurable difference.</p>
+
+<h4 id="outputbuffering">Output Buffering </h4>
+
+<p>I experimented with different approaches to creating the output after parsing.
+I tried printing directly to <code>stdout</code>, and even played with different
+buffering settings. None of those seemed to work well, and all were slower
+than using the <code>d_string</code> approach (formerly call <code>GString</code> in MMD 5).</p>
+
+<h4 id="fastsearches">Fast Searches </h4>
+
+<p>After getting basic Markdown functionality complete, I discovered during
+testing that the time required to parse a document grew exponentially as the
+document grew longer. Performance was on par with CommonMark for shorter
+documents, but fell increasingly behind in larger tests. Time profiling found
+that the culprit was searching for link definitions when they didn’t exist.
+My first approach was to keep a stack of used link definitions, and to iterate
+through them when necessary. In long documents, this performs very poorly.
+More research and I ended up using
+<a href="http://troydhanson.github.io/uthash/">uthash</a>. This allows me to search for
+a link (or footnote, etc.) by “name” rather than searching through an array.
+This allowed me to get <abbr title="MultiMarkdown">MMD</abbr>’s performance back to O(n), taking roughly twice as
+much time to process a document that is twice as long.</p>
+
+<h4 id="efficientutilityfunctions">Efficient Utility Functions </h4>
+
+<p>It is frequently necessary when parsing Markdown to check what sort of
+character we are dealing with at a certain position – a letter, whitespace,
+punctuation, etc. I created a lookup table for this via <code>char_lookup.c</code> and
+hard-coded it in <code>char.c</code>. These routines allow me to quickly, and
+consistently, classify any byte within a document. This saved a lot of
+programming time, and saved time tracking down bugs from handling things
+slightly differently under different circumstances. I also suspect it
+improved performance, but don’t have the data to back it up.</p>
+
+<h4 id="testingwhilewriting">Testing While Writing </h4>
+
+<p>I developed several chunks of code in parallel while creating <abbr title="MultiMarkdown">MMD</abbr> 6. The vast
+majority of it was developed largely in a <a href="https://en.wikipedia.org/wiki/Test-driven_development">test-driven development</a> approach.
+The other code was largely created with extensive unit testing to accomplish
+this.</p>
+
+<p><abbr title="MultiMarkdown">MMD</abbr> isn’t particularly amenable to this approach at the small level, but
+instead I relied more on integration testing with an ever-growing collection
+of text files and the corresponding HTML files in the <abbr title="MultiMarkdown">MMD</abbr> 6 test suite. This
+allowed me to ensure new features work properly and that old features aren’t
+broken. At this time, there are 29 text files in the test suite, and many
+more to come.</p>
+
+<h4 id="otherlessons">Other Lessons </h4>
+
+<p>Some things that didn’t do me any good….</p>
+
+<p>I considered differences between using <code>malloc</code> and <code>calloc</code> when initializing
+tokens. The time saved by using <code>malloc</code> was basically exactly offset by the
+initial time required to initialize the token to default null values as
+compared to using <code>calloc</code>. When trying <code>calloc</code> failed to help me out
+(thinking that clearing a single slab in the object pool would be faster), I
+stuck with <code>malloc</code> as it makes more sense to me in my workflow.</p>
+
+<p>I read a bit about <a href="http://www.catb.org/esr/structure-packing/">struct padding</a> and reordered some of my structs. It was
+until later that I discovered the <code>-Wpadded</code> option, and it’s not clear
+whether my changes modified anything. Since the structs were being padded
+automatically, there was no noticeable performance change, and I didn’t have
+the tools to measure whether I could have improved memory usage at all. Not
+sure this would be worth the effort – much lower hanging fruit available.</p>
+
+<h3 id="performance">Performance </h3>
+
+<p>Basic tests show that currently <abbr title="MultiMarkdown">MMD</abbr> 6 takes about 20–25% longer the CommonMark
+0.27.0 to process long files (e.g. 0.2 MB). However, it is around 5% <em>faster</em>
+than CommonMark when parsing a shorter file (27 kB) (measured by parsing the
+same file 200 times over). This test suite is performed by using the Markdown
+[syntax page], modified to avoid the use of the Setext header at the top. The
+longer files tested are created by copying the same syntax page onto itself,
+thereby doubling the length of the file with each iteration.</p>
+
+<p>The largest file I test is approximately 108 MB (4096 copies of the syntax
+page). On my machine (2012 Mac mini with 2.3 GHz Intel Core i7, 16 GB RAM),
+it takes approximately 4.4 seconds to parse with <abbr title="MultiMarkdown">MMD</abbr> 6 and 3.7 seconds with
+CommonMark. <abbr title="MultiMarkdown">MMD</abbr> 6 processes approximately 25 MB/s on this test file.
+CommonMark 0.27.0 gets about 29 MB/s on the same machine.</p>
+
+<p>There are some slight variations with the smaller test files (8–32 copies),
+but overall the performance of both programs (MMD 6 and CommonMark) are
+roughly linear as the test file gets bigger (double the file size and it takes
+twice as long to parse, aka O(n)).</p>
+
+<p>Out of curiosity, I ran the same tests on the original Markdown.pl by Gruber
+(v 1.0.2b8). It took approximately 178 seconds to parse 128 copies of the
+file (3.4 MB) and was demonstrating quadratic performance characteristics
+(double the file size and it takes 2<sup>2</sup> or 4 times longer to process, aka
+O(n<sup>2</sup>)). I didn’t bother running it on larger versions of the test file. For
+comparison, <abbr title="MultiMarkdown">MMD</abbr> 6 can process 128 copies in approximately 140 msec.</p>
+
+<p>Of note, the throughput speed drops when testing more complicated files
+containing more advanced MultiMarkdown features, though it still seems to
+maintain linear performance characteristics. A second test file is created by
+concatenating all of the test suite files (including the Markdown syntax
+file). In this case, <abbr title="MultiMarkdown">MMD</abbr> gets about 13 MB/s. CommonMark doesn’t support
+these additional features, so testing it with that file is not relevant. I
+will work to see whether there are certain features in particular that are
+more challenging and see whether they can be reworked to improve performance.</p>
+
+<p>As above, I have done some high level optimization of the parse strategy, but
+I’m sure there’s still a lot of room for further improvement to be made.
+Suggestions welcome!</p>
+
+<h3 id="testing">Testing </h3>
+
+<h4 id="testsuite">Test Suite </h4>
+
+<p>The development of <abbr title="MultiMarkdown">MMD</abbr> v6 was heavily, but not absolutely, influenced by the
+philosophy of test-driven development. While coding, I made use of test
+suites to verify successful implementation of new features, to avoid
+regression problems when adding new features, and to identify known edge cases
+in need of proper handling.</p>
+
+<p>The test suite (located in <code>tests/MMD6Tests</code>) is a “living” collection of
+documents that will continue to be updated as new bugs and edge cases are
+identified. This helps make proper integration testing of the entire
+application with every release.</p>
+
+<h4 id="fuzztesting">Fuzz Testing </h4>
+
+<p>I was not familiar with the concept of <a href="#fuzztesting">Fuzz Testing</a>
+(https://en.wikipedia.org/wiki/Fuzzing) until a user mentioned something about
+it to me a year or two ago. I had never used it before, but it seemed like a
+good idea. I implement it in two ways.</p>
+
+<p>The first is that I created a simplified version of the line parser that
+simply accepts various combinations of line type identifiers to see if they
+would successfully parse. The line parser is responsible for taking a series
+of line types (e.g. plain text, indented line, etc.) and determining what sort
+of block they should become. The file <code>test/parser_text.y</code> is run through the
+<code>lemon</code> program, compiled (with or without the <code>-DNDEBUG</code> flag) and then run.
+It sequentially throws every combination of line types at the simplified line
+parser to make sure that it doesn’t choke. When I first did this, I found
+several combinations of lines that did not pass.</p>
+
+<p><strong>NOTE</strong>: This does not verify accurate parsing, simply that the parser does
+not crash by an unacceptable combination of lines.</p>
+
+<p>The second form of fuzz testing I have started using more recently. This is
+using the <a href="http://lcamtuf.coredump.cx/afl/">American fuzzy lop</a> program to try
+to find text input that crashes <abbr title="MultiMarkdown">MMD</abbr>. This works by taking sample input (e.g.
+files from the test suite), modifying them slightly, and trying the modified
+versions. Do this over and over and over, and some interesting edge cases are
+sometimes identified. I have found some interesting edge cases this way.
+Definitely a very useful tool!</p>
+
+<h4 id="unittesting">Unit Testing </h4>
+
+<p>Some of the original development was done with unit testing in some other
+tools I developed. This code formed the basis of a few parts of <abbr title="MultiMarkdown">MMD</abbr>.
+Otherwise, it was hard to see how to really create very good unit tests for
+the development of <abbr title="MultiMarkdown">MMD</abbr>. So there is really not much unit testing built into
+the code or used during the development.</p>
+
+<h3 id="changelog">Changelog </h3>
+
+<ul>
+<li><p>2017–03–13 – v 6.0.0-b2:</p>
+
+<ul>
+<li>ADDED: Add CriticMarkup preprocessor that works across empty lines when accepting/rejecting markup</li>
+<li>ADDED: Add back the mmd6 latex title file</li>
+<li>ADDED: Basic EPUB 3 support – uses ‘miniz’ library to zip creation</li>
+<li>ADDED: Update QuickStart and EPUB code</li>
+<li>CHANGED: Update QuickStart guide</li>
+<li>CHANGED: Update test suite</li>
+<li>FIXED: Don't duplicate LaTeX glossary definitions</li>
+<li>FIXED: Fix abbreviations in ODF; Improve test suite</li>
+<li>FIXED: Improve glossaries and abbreviations; Update QuickStart</li>
+<li>FIXED: Tidy up some compiler warnings in code</li>
+<li>FIXED: Use custom UUID code to minimize external dependencies</li>
+</ul></li>
+<li><p>2017–03–09 – v 6.0.0-b1:</p>
+
+<ul>
+<li>ADDED: Add French translations; fix typo in German</li>
+<li>ADDED: Add Quick Start guide</li>
+<li>ADDED: Add functionality to automatically identify abbreviations and glossary terms in source</li>
+<li>ADDED: Improve LaTeX configuration files</li>
+<li>ADDED: Update German translations</li>
+<li>ADDED: Use native ODF table of contents instead of a manual list</li>
+<li>ADDED: Use native command for table of contents in LaTeX</li>
+<li>CHANGED: Bring HTML and ODF into line with LaTeX as to output of abbreviatinos on first and subsequent uses</li>
+<li>CHANGED: Slight performance tweak</li>
+<li>CHANGED: Update German test suite</li>
+<li>FIXED: Allow <code>{{TOC}}</code> in latex verbatim</li>
+<li>FIXED: Don't free token_pool if never initialized</li>
+<li>FIXED: Fix German typo</li>
+<li>FIXED: Fix missing token type</li>
+<li>FIXED: Improve performance of checking document for metadata, which improves performance when checking for possible transclusion</li>
+<li>FIXED: Update test suite for abbreviation changes</li>
+</ul></li>
+<li><p>2017–03–05 – v 0.4.2-b:</p>
+
+<ul>
+<li>ADDED: Add and utility functions; fix memory leak</li>
+<li>ADDED: Initial abbreviation support</li>
+<li>ADDED: Keep working on Abbreviations/Glossaries</li>
+<li>ADDED: Refactor abbreviation code; Add inline abbreviations; Fix abbreviations in ODF</li>
+<li>ADDED: Update Inline Footnote test</li>
+<li>CHANGED: Add comments to i18n.h</li>
+<li>CHANGED: Finish refactoring note–related code</li>
+<li>CHANGED: Refactor footnotes</li>
+<li>CHANGED: Refactor glossary code</li>
+<li>CHANGED: Remove offset from html export functions</li>
+<li>FIXED: latex list items need to block optional argument to allow ‘[’ as first character</li>
+<li>Merge branch ‘release/0.4.1-b’ into develop</li>
+</ul></li>
+<li><p>2017–03–04 – v 0.4.1-b:</p>
+
+<ul>
+<li>FIXED: Add glossary localization</li>
+</ul></li>
+<li><p>2017–03–04 – v 0.4.0-b:</p>
+
+<ul>
+<li>ADDED: Add TOC support to ODF</li>
+<li>ADDED: Add glossary support to ODF</li>
+<li>ADDED: Add prelim code for handling abbreviations</li>
+<li>ADDED: Add support for Swift Package Maker; CHANGED: Restructure source directory</li>
+<li>ADDED: Added LaTeX support for escaped characters, fenced code blocks, images, links</li>
+<li>ADDED: Basic ODF Support</li>
+<li>ADDED: Better document strong/emph algorithm</li>
+<li>ADDED: Continue ODF progress</li>
+<li>ADDED: Continue to work on ODF export</li>
+<li>ADDED: Continue work on ODF</li>
+<li>ADDED: Finish ODF support for lists</li>
+<li>ADDED: Improve performance when exporting</li>
+<li>ADDED: Improve token_pool memory handling</li>
+<li>ADDED: Prototype support for Glossaries</li>
+<li>ADDED: Support ‘latexconfig’ metadata</li>
+<li>CHANGED: Use multiple cases in glossary tests</li>
+<li>FIXED: Don't force glossary terms into lowercase</li>
+<li>FIXED: Fix Makefile for new source file location</li>
+<li>FIXED: Fix algorithm for creating TOC to properly handle ‘incorrect’ levels</li>
+<li>FIXED: Fix linebreaks in LaTeX; ADDED: Add Linebreaks test file</li>
+<li>FIXED: Fix new_source script for new directory structure</li>
+<li>FIXED: Fix non–breaking space in ODF</li>
+<li>FIXED: Fix padding at end of document body in ODF</li>
+<li>FIXED: Fix underscores in raw latex</li>
+<li>FIXED: Potential bug</li>
+<li>NOTE: Add shared library build option</li>
+</ul></li>
+<li><p>2017–02–17 – v 0.3.1.a:</p>
+
+<ul>
+<li>ADDED: ‘finalize’ beamer support</li>
+<li>ADDED: Add escaped newline as linebreak; start on beamer/memoir support</li>
+<li>ADDED: CriticMarkup test for LaTeX</li>
+<li>ADDED: Custom LaTeX output for CriticMarkup comments</li>
+<li>ADDED: Support mmd export format</li>
+<li>ADDED: Work on cpack installer – change project name for compatibility</li>
+<li>CHANGED: Adjust latex metadata configuration for consistency</li>
+<li>CHANGED: Configure cmake to use C99</li>
+<li>FIXED: Add custom implementation for cross–platform support</li>
+<li>FIXED: Fix German HTML tests</li>
+<li>FIXED: Fix cpack destination directory issue</li>
+<li>FIXED: Fix memory leaks etc</li>
+<li>FIXED: Fix warning in custom vasprintf</li>
+<li>FIXED: Modify CMakeLists.txt to test for use of clang compiler</li>
+<li>FIXED: Work on memory leaks</li>
+<li>NOTE: Adjust license width to improve display on smaller terminal windows</li>
+</ul></li>
+<li><p>2017–02–14 – v 0.3.0a:</p>
+
+<ul>
+<li>ADDED: Add basic image support to LaTeX</li>
+<li>ADDED: Add file transclusion</li>
+<li>ADDED: Add support for citation ‘locators’</li>
+<li>ADDED: Add support for manual labels on ATX Headers</li>
+<li>ADDED: Add support for manual labels on Setext Headers</li>
+<li>ADDED: Add support for tables in LaTeX</li>
+<li>ADDED: HTML Comments appear as raw LaTeX</li>
+<li>ADDED: Improved citation support in LaTeX</li>
+<li>ADDED: Support \autoref{} in LaTeX</li>
+<li>ADDED: Support combined options in LaTeX citations that use the ‘][’ syntax</li>
+<li>ADDED: Support language specifier in fenced code blocks</li>
+<li>ADDED: Support metadata in LaTeX</li>
+<li>ADDED: Update Citations test suite</li>
+<li>FIXED: Escaped LaTeX characters</li>
+<li>FIXED: Fix bug in URL parsing</li>
+<li>FIXED: Fix bug in citation links</li>
+<li>FIXED: Fix bug when no closing divider or newline at end of last table cell</li>
+<li>FIXED: Fix issue printing ‘–’</li>
+<li>FIXED: Fix scan_url test suite</li>
+<li>FIXED: Get Math working in LaTeX</li>
+<li>FIXED: Improve reliability or link scanner</li>
+<li>FIXED: Properly add id attribute to new instances of citation only</li>
+<li>FIXED: Properly handle manual labels with TOC</li>
+<li>FIXED: Properly print hash characters in LaTeX</li>
+<li>FIXED: Separate LaTeX verbatim and texttt character handling</li>
+<li>FIXED: Update Escapes test LaTeX result</li>
+<li>FIXED: Work on escaping LaTeX characters</li>
+</ul></li>
+<li><p>2017–02–08 – v 0.1.4a:</p>
+
+<ul>
+<li>ADDED: Add smart quote support for other languages (resolves #15)</li>
+</ul></li>
+<li><p>2017–02–08 – v 0.1.3a:</p>
+
+<ul>
+<li>ADDED: Add support for reference image id attributes</li>
+<li>ADDED: Add support for table captions</li>
+<li>ADDED: Metadata support for base header level</li>
+<li>ADDED: Support distinction between 3 and 5 backticks in fenced code blocks</li>
+<li>ADDED: Support Setext headers</li>
+<li>FIXED: Fix issue with metadata disrupting smart quotes</li>
+</ul></li>
+<li><p>2017–02–07 – v 0.1.2a:</p>
+
+<ul>
+<li>“pathologic” test suite – fix handling of nested brackets, e.g.
+<code>[[[[foo]]]]</code> to avoid bogging down checking for reference links that
+don’t exist.</li>
+<li>Table support – a single blank line separates sections of tables, so
+at least two blank lines are needed between adjacent tables.</li>
+<li>Definition list support</li>
+<li>“fuzz testing” – stress test the parser for unexpected failures</li>
+<li>Table of Contents support</li>
+<li>Improved compatibility mode parsing</li>
+</ul></li>
+<li><p>2017–01–28 – v 0.1.1a includes a few updates:</p>
+
+<ul>
+<li>Metadata support</li>
+<li>Metadata variables support</li>
+<li>Extended ASCII range character checking</li>
+<li>Rudimentary language translations, including German</li>
+<li>Improved performance</li>
+<li>Additional testing:
+
+<ul>
+<li>CriticMarkup</li>
+<li>HTML Blokcs</li>
+<li>Metadata/Variables</li>
+<li>“pathologic” test cases from CommonMark</li>
+</ul></li>
+</ul></li>
+</ul>
+
+<div class="glossary">
+<hr />
+<ol>
+
+<li id="gn:1">
+PEG: <p>Parsing Expression Grammar <a href="https://en.wikipedia.org/wiki/Parsing_expression_grammar">https://en.wikipedia.org/wiki/Parsing_expression_grammar</a> <a href="#gnref:1" title="return to body" class="reverseglossary"> ↩</a></p>
+</li>
+
+</ol>
+</div>
+
+</body>
+</html>
+
later. But I didn't need it for MMD now. `lexer.re` contains the source for
the tokenizer.
-[Aho–Corasick algorithm]: https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm
+[Aho–Corasick algorithm]: https://en.wikipedia.org/wiki/Aho-Corasick_algorithm
I looked long and hard for a way to simplify the parsing algorithm to try and
"touch" each token only once. Ideally, the program could step through each
3. The tokens inside each block can then be paired together to create more
complex syntax such as links, strong, emphasis, etc.
-To handle the block parsing, I started off using the [Aho-Corasick] code to
+To handle the block parsing, I started off using the Aho-Corasick code to
handle my first attempt. I had actually implemented some basic regex
functionality, and used that to group lines together to create blocks. But
this quickly fell apart in the face of more complex structures such as
[?AST]: Abstract Syntax Tree <https://en.wikipedia.org/wiki/Abstract_syntax_tree>
+
+# Changelog #
+
+
+* 2017-03-13 -- v 6.0.0-b2:
+
+ * ADDED: Add CriticMarkup preprocessor that works across empty lines when accepting/rejecting markup
+ * ADDED: Add back the mmd6 latex title file
+ * ADDED: Basic EPUB 3 support -- uses 'miniz' library to zip creation
+ * ADDED: Update QuickStart and EPUB code
+ * CHANGED: Update QuickStart guide
+ * CHANGED: Update test suite
+ * FIXED: Don't duplicate LaTeX glossary definitions
+ * FIXED: Fix abbreviations in ODF; Improve test suite
+ * FIXED: Improve glossaries and abbreviations; Update QuickStart
+ * FIXED: Tidy up some compiler warnings in code
+ * FIXED: Use custom UUID code to minimize external dependencies
+
+
+* 2017-03-09 -- v 6.0.0-b1:
+
+ * ADDED: Add French translations; fix typo in German
+ * ADDED: Add Quick Start guide
+ * ADDED: Add functionality to automatically identify abbreviations and glossary terms in source
+ * ADDED: Improve LaTeX configuration files
+ * ADDED: Update German translations
+ * ADDED: Use native ODF table of contents instead of a manual list
+ * ADDED: Use native command for table of contents in LaTeX
+ * CHANGED: Bring HTML and ODF into line with LaTeX as to output of abbreviatinos on first and subsequent uses
+ * CHANGED: Slight performance tweak
+ * CHANGED: Update German test suite
+ * FIXED: Allow `{{TOC}}` in latex verbatim
+ * FIXED: Don't free token_pool if never initialized
+ * FIXED: Fix German typo
+ * FIXED: Fix missing token type
+ * FIXED: Improve performance of checking document for metadata, which improves performance when checking for possible transclusion
+ * FIXED: Update test suite for abbreviation changes
+
+
+* 2017-03-05 -- v 0.4.2-b:
+
+ * ADDED: Add and utility functions; fix memory leak
+ * ADDED: Initial abbreviation support
+ * ADDED: Keep working on Abbreviations/Glossaries
+ * ADDED: Refactor abbreviation code; Add inline abbreviations; Fix abbreviations in ODF
+ * ADDED: Update Inline Footnote test
+ * CHANGED: Add comments to i18n.h
+ * CHANGED: Finish refactoring note-related code
+ * CHANGED: Refactor footnotes
+ * CHANGED: Refactor glossary code
+ * CHANGED: Remove offset from html export functions
+ * FIXED: latex list items need to block optional argument to allow '[' as first character
+ * Merge branch 'release/0.4.1-b' into develop
+
+
+* 2017-03-04 -- v 0.4.1-b:
+
+ * FIXED: Add glossary localization
+
+
+* 2017-03-04 -- v 0.4.0-b:
+
+ * ADDED: Add TOC support to ODF
+ * ADDED: Add glossary support to ODF
+ * ADDED: Add prelim code for handling abbreviations
+ * ADDED: Add support for Swift Package Maker; CHANGED: Restructure source directory
+ * ADDED: Added LaTeX support for escaped characters, fenced code blocks, images, links
+ * ADDED: Basic ODF Support
+ * ADDED: Better document strong/emph algorithm
+ * ADDED: Continue ODF progress
+ * ADDED: Continue to work on ODF export
+ * ADDED: Continue work on ODF
+ * ADDED: Finish ODF support for lists
+ * ADDED: Improve performance when exporting
+ * ADDED: Improve token_pool memory handling
+ * ADDED: Prototype support for Glossaries
+ * ADDED: Support 'latexconfig' metadata
+ * CHANGED: Use multiple cases in glossary tests
+ * FIXED: Don't force glossary terms into lowercase
+ * FIXED: Fix Makefile for new source file location
+ * FIXED: Fix algorithm for creating TOC to properly handle 'incorrect' levels
+ * FIXED: Fix linebreaks in LaTeX; ADDED: Add Linebreaks test file
+ * FIXED: Fix new_source script for new directory structure
+ * FIXED: Fix non-breaking space in ODF
+ * FIXED: Fix padding at end of document body in ODF
+ * FIXED: Fix underscores in raw latex
+ * FIXED: Potential bug
+ * NOTE: Add shared library build option
+
+
+* 2017-02-17 -- v 0.3.1.a:
+
+ * ADDED: 'finalize' beamer support
+ * ADDED: Add escaped newline as linebreak; start on beamer/memoir support
+ * ADDED: CriticMarkup test for LaTeX
+ * ADDED: Custom LaTeX output for CriticMarkup comments
+ * ADDED: Support mmd export format
+ * ADDED: Work on cpack installer -- change project name for compatibility
+ * CHANGED: Adjust latex metadata configuration for consistency
+ * CHANGED: Configure cmake to use C99
+ * FIXED: Add custom implementation for cross-platform support
+ * FIXED: Fix German HTML tests
+ * FIXED: Fix cpack destination directory issue
+ * FIXED: Fix memory leaks etc
+ * FIXED: Fix warning in custom vasprintf
+ * FIXED: Modify CMakeLists.txt to test for use of clang compiler
+ * FIXED: Work on memory leaks
+ * NOTE: Adjust license width to improve display on smaller terminal windows
+
+
+* 2017-02-14 -- v 0.3.0a:
+
+ * ADDED: Add basic image support to LaTeX
+ * ADDED: Add file transclusion
+ * ADDED: Add support for citation 'locators'
+ * ADDED: Add support for manual labels on ATX Headers
+ * ADDED: Add support for manual labels on Setext Headers
+ * ADDED: Add support for tables in LaTeX
+ * ADDED: HTML Comments appear as raw LaTeX
+ * ADDED: Improved citation support in LaTeX
+ * ADDED: Support \autoref{} in LaTeX
+ * ADDED: Support combined options in LaTeX citations that use the '\]\[' syntax
+ * ADDED: Support language specifier in fenced code blocks
+ * ADDED: Support metadata in LaTeX
+ * ADDED: Update Citations test suite
+ * FIXED: Escaped LaTeX characters
+ * FIXED: Fix bug in URL parsing
+ * FIXED: Fix bug in citation links
+ * FIXED: Fix bug when no closing divider or newline at end of last table cell
+ * FIXED: Fix issue printing '-'
+ * FIXED: Fix scan_url test suite
+ * FIXED: Get Math working in LaTeX
+ * FIXED: Improve reliability or link scanner
+ * FIXED: Properly add id attribute to new instances of citation only
+ * FIXED: Properly handle manual labels with TOC
+ * FIXED: Properly print hash characters in LaTeX
+ * FIXED: Separate LaTeX verbatim and texttt character handling
+ * FIXED: Update Escapes test LaTeX result
+ * FIXED: Work on escaping LaTeX characters
+
+
+* 2017-02-08 -- v 0.1.4a:
+
+ * ADDED: Add smart quote support for other languages (resolves #15)
+
+
+* 2017-02-08 -- v 0.1.3a:
+
+ * ADDED: Add support for reference image id attributes
+ * ADDED: Add support for table captions
+ * ADDED: Metadata support for base header level
+ * ADDED: Support distinction between 3 and 5 backticks in fenced code blocks
+ * ADDED: Support Setext headers
+ * FIXED: Fix issue with metadata disrupting smart quotes
+
+* 2017-02-07 -- v 0.1.2a:
+
+ * "pathologic" test suite -- fix handling of nested brackets, e.g.
+ `[[[[foo]]]]` to avoid bogging down checking for reference links that
+ don't exist.
+ * Table support -- a single blank line separates sections of tables, so
+ at least two blank lines are needed between adjacent tables.
+ * Definition list support
+ * "fuzz testing" -- stress test the parser for unexpected failures
+ * Table of Contents support
+ * Improved compatibility mode parsing
+
+* 2017-01-28 -- v 0.1.1a includes a few updates:
+
+ * Metadata support
+ * Metadata variables support
+ * Extended ASCII range character checking
+ * Rudimentary language translations, including German
+ * Improved performance
+ * Additional testing:
+ * CriticMarkup
+ * HTML Blokcs
+ * Metadata/Variables
+ * "pathologic" test cases from CommonMark
+
| Version: | 6.0.0-b2 |
-## Updates ##
-
-* 2017-03-13 -- v 6.0.0-b2:
-
- * ADDED: Add CriticMarkup preprocessor that works across empty lines when accepting/rejecting markup
- * ADDED: Add back the mmd6 latex title file
- * ADDED: Basic EPUB 3 support -- uses 'miniz' library to zip creation
- * ADDED: Update QuickStart and EPUB code
- * CHANGED: Update QuickStart guide
- * CHANGED: Update test suite
- * FIXED: Don't duplicate LaTeX glossary definitions
- * FIXED: Fix abbreviations in ODF; Improve test suite
- * FIXED: Improve glossaries and abbreviations; Update QuickStart
- * FIXED: Tidy up some compiler warnings in code
- * FIXED: Use custom UUID code to minimize external dependencies
-
-
-* 2017-03-09 -- v 6.0.0-b1:
-
- * ADDED: Add French translations; fix typo in German
- * ADDED: Add Quick Start guide
- * ADDED: Add functionality to automatically identify abbreviations and glossary terms in source
- * ADDED: Improve LaTeX configuration files
- * ADDED: Update German translations
- * ADDED: Use native ODF table of contents instead of a manual list
- * ADDED: Use native command for table of contents in LaTeX
- * CHANGED: Bring HTML and ODF into line with LaTeX as to output of abbreviatinos on first and subsequent uses
- * CHANGED: Slight performance tweak
- * CHANGED: Update German test suite
- * FIXED: Allow {{TOC}} in latex verbatim
- * FIXED: Don't free token_pool if never initialized
- * FIXED: Fix German typo
- * FIXED: Fix missing token type
- * FIXED: Improve performance of checking document for metadata, which improves performance when checking for possible transclusion
- * FIXED: Update test suite for abbreviation changes
-
-
-* 2017-03-05 -- v 0.4.2-b:
-
- * ADDED: Add and utility functions; fix memory leak
- * ADDED: Initial abbreviation support
- * ADDED: Keep working on Abbreviations/Glossaries
- * ADDED: Refactor abbreviation code; Add inline abbreviations; Fix abbreviations in ODF
- * ADDED: Update Inline Footnote test
- * CHANGED: Add comments to i18n.h
- * CHANGED: Finish refactoring note-related code
- * CHANGED: Refactor footnotes
- * CHANGED: Refactor glossary code
- * CHANGED: Remove offset from html export functions
- * FIXED: latex list items need to block optional argument to allow '[' as first character
- * Merge branch 'release/0.4.1-b' into develop
-
-
-* 2017-03-04 -- v 0.4.1-b:
-
- * FIXED: Add glossary localization
-
-
-* 2017-03-04 -- v 0.4.0-b:
-
- * ADDED: Add TOC support to ODF
- * ADDED: Add glossary support to ODF
- * ADDED: Add prelim code for handling abbreviations
- * ADDED: Add support for Swift Package Maker; CHANGED: Restructure source directory
- * ADDED: Added LaTeX support for escaped characters, fenced code blocks, images, links
- * ADDED: Basic ODF Support
- * ADDED: Better document strong/emph algorithm
- * ADDED: Continue ODF progress
- * ADDED: Continue to work on ODF export
- * ADDED: Continue work on ODF
- * ADDED: Finish ODF support for lists
- * ADDED: Improve performance when exporting
- * ADDED: Improve token_pool memory handling
- * ADDED: Prototype support for Glossaries
- * ADDED: Support 'latexconfig' metadata
- * CHANGED: Use multiple cases in glossary tests
- * FIXED: Don't force glossary terms into lowercase
- * FIXED: Fix Makefile for new source file location
- * FIXED: Fix algorithm for creating TOC to properly handle 'incorrect' levels
- * FIXED: Fix linebreaks in LaTeX; ADDED: Add Linebreaks test file
- * FIXED: Fix new_source script for new directory structure
- * FIXED: Fix non-breaking space in ODF
- * FIXED: Fix padding at end of document body in ODF
- * FIXED: Fix underscores in raw latex
- * FIXED: Potential bug
- * NOTE: Add shared library build option
-
-
-* 2017-02-17 -- v 0.3.1.a:
-
- * ADDED: 'finalize' beamer support
- * ADDED: Add escaped newline as linebreak; start on beamer/memoir support
- * ADDED: CriticMarkup test for LaTeX
- * ADDED: Custom LaTeX output for CriticMarkup comments
- * ADDED: Support mmd export format
- * ADDED: Work on cpack installer -- change project name for compatibility
- * CHANGED: Adjust latex metadata configuration for consistency
- * CHANGED: Configure cmake to use C99
- * FIXED: Add custom implementation for cross-platform support
- * FIXED: Fix German HTML tests
- * FIXED: Fix cpack destination directory issue
- * FIXED: Fix memory leaks etc
- * FIXED: Fix warning in custom vasprintf
- * FIXED: Modify CMakeLists.txt to test for use of clang compiler
- * FIXED: Work on memory leaks
- * NOTE: Adjust license width to improve display on smaller terminal windows
-
-
-* 2017-02-14 -- v 0.3.0a:
-
- * ADDED: Add basic image support to LaTeX
- * ADDED: Add file transclusion
- * ADDED: Add support for citation 'locators'
- * ADDED: Add support for manual labels on ATX Headers
- * ADDED: Add support for manual labels on Setext Headers
- * ADDED: Add support for tables in LaTeX
- * ADDED: HTML Comments appear as raw LaTeX
- * ADDED: Improved citation support in LaTeX
- * ADDED: Support \autoref{} in LaTeX
- * ADDED: Support combined options in LaTeX citations that use the '\]\[' syntax
- * ADDED: Support language specifier in fenced code blocks
- * ADDED: Support metadata in LaTeX
- * ADDED: Update Citations test suite
- * FIXED: Escaped LaTeX characters
- * FIXED: Fix bug in URL parsing
- * FIXED: Fix bug in citation links
- * FIXED: Fix bug when no closing divider or newline at end of last table cell
- * FIXED: Fix issue printing '-'
- * FIXED: Fix scan_url test suite
- * FIXED: Get Math working in LaTeX
- * FIXED: Improve reliability or link scanner
- * FIXED: Properly add id attribute to new instances of citation only
- * FIXED: Properly handle manual labels with TOC
- * FIXED: Properly print hash characters in LaTeX
- * FIXED: Separate LaTeX verbatim and texttt character handling
- * FIXED: Update Escapes test LaTeX result
- * FIXED: Work on escaping LaTeX characters
-
-
-* 2017-02-08 -- v 0.1.4a:
-
- * ADDED: Add smart quote support for other languages (resolves #15)
-
-
-* 2017-02-08 -- v 0.1.3a:
-
- * ADDED: Add support for reference image id attributes
- * ADDED: Add support for table captions
- * ADDED: Metadata support for base header level
- * ADDED: Support distinction between 3 and 5 backticks in fenced code blocks
- * ADDED: Support Setext headers
- * FIXED: Fix issue with metadata disrupting smart quotes
-
-* 2017-02-07 -- v 0.1.2a:
-
- * "pathologic" test suite -- fix handling of nested brackets, e.g.
- `[[[[foo]]]]` to avoid bogging down checking for reference links that
- don't exist.
- * Table support -- a single blank line separates sections of tables, so
- at least two blank lines are needed between adjacent tables.
- * Definition list support
- * "fuzz testing" -- stress test the parser for unexpected failures
- * Table of Contents support
- * Improved compatibility mode parsing
-
-* 2017-01-28 -- v 0.1.1a includes a few updates:
-
- * Metadata support
- * Metadata variables support
- * Extended ASCII range character checking
- * Rudimentary language translations, including German
- * Improved performance
- * Additional testing:
- * CriticMarkup
- * HTML Blokcs
- * Metadata/Variables
- * "pathologic" test cases from CommonMark
+## An Announcement! ##
+MultiMarkdown v6 is finally here! It's technically still in "beta" as I would
+like to hear back from a few more users to make sure I'm not missing anything,
+but it has been subjected to much more rigorous testing than any previous
+versions of MultiMarkdown in the past. If you want more information about
+testing, see `DevelopmentNotes`. It's basically feature complete as a
+replacement for MMD v5, and included additional features beyond that.
-## An Announcement! ##
-I would like to officially announce that MultiMarkdown version 6 is in public
-alpha. It's finally at a point where it is usable, but there are quite a few
-caveats.
+## Obtaining MultiMarkdown ##
+
+You can download the latest installer for MacOS or Windows at Github:
+
+<https://github.com/fletcher/MultiMarkdown-6/releases>
-This post is a way for me to organize some of my thoughts, provide some
-history for those who are interested, and to provide some tips and tricks from
-my experiences for those who are working on their own products.
+To build from source, download from Github. Then:
-But first, some background...
+ make release
+ (OR)
+ make debug
+ cd build
+ make
+
+You can optionally test using the test suite:
+
+ ctest
## Differences in MultiMarkdown Itself ##
-MultiMarkdown v6 is mostly about making a better MMD parser, but it will
-likely involve a few changes to the MultiMarkdown language itself.
+MultiMarkdown v6 is mostly about making a better MMD parser, but it involves a
+few changes to the MultiMarkdown syntax itself.
+1. Setext headers can consist of more than one line to be included in the
+header:
-1. {--I am thinking about removing Setext headers from the language. I almost
-never use them, much preferring to use ATX style headers (`# foo #`).
-Additionally, I have never liked the fact that Setext headers allow the
-meaning of a line to be completely changed by the following line. It makes
-the parsing slightly more difficult on a technical level (requiring some
-backtracking at times). I'm not 100% certain on this, but right now I believe
-it's the only Markdown feature that doesn't exist in MMD 6 yet.--}{++I decided
-to go ahead and implement Setext headers, as it can be done with the new
-parser without backtracking. One difference with older versions of MMD, as
-well as Markdown itself, is that a setext header can consist of more than one
-line to be included in the header.++}
+ This is
+ a header
+ ========
2. Whitespace is not allowed between the text brackets and label brackets in
reference links, images, footnotes, etc. For example `[foo] [bar]` will no
longer be the same as `[foo][bar]`.
3. Link and image titles can be quoted with `'foo'`, `"foo"`, or `(foo)`.
+Link attributes can be used in both reference and inline links/images.
4. HTML elements are handled slightly differently. There is no longer a
`markdown="1"` feature. Instead, HTML elements that are on a line by
to be treated as HTML such that Markdown will not be parsed in side of it.
HTML block-level tags are even "stronger" at starting an HTML block. It is
not quite as complex as the approach used in CommonMark, but is similar under
-most circumstances.
+most circumstances. Leaving a blank line after the opening tag will allow
+MultiMarkdown parsing inside of the HTML block.
For example, this would not be parsed:
</div>
5. "Malformed" reference link definitions are handled slightly differently.
-For example, `Reference Footnotes.text` is parsed differently in compatibility
-mode than MMD-5. This started as a side-effect of the parsing algorithm, but
-I actually think it makes sense. This may or may not change in the future.
+For example, the test suite file `Reference Footnotes.text` is parsed
+differently in compatibility mode than MMD-5. This started as a side-effect
+of the parsing algorithm, but I actually think it makes sense. This may or
+may not change in the future.
6. Table captions in MMD-6 must come immediately *after* the table, not
before it.
feature in MMD, but I don't see a problem with just making it default
behavior.
+8. Escaped spaces (`\ `) will be interpreted as a non-breaking space, if the
+output format supports it.
-## Where Does MultiMarkdown 6 Stand? ##
-
-
-### Features ###
-
-I *think* that all basic Markdown features have been implemented.
-Additionally, the following MultiMarkdown features have been implemented:
-
-* Automatic cross-reference targets
-* Basic Citation support
-* CriticMarkup support
-* Definition lists
-* Figures
-* Footnotes
-* Inline and reference footnotes
-* Image and Link attributes (attributes can now be used with inline links as
- well as reference links)
-* Math support
-* Smart quotes (support for languages other than english is not fully
- implemented yet)
-* Superscripts/subscripts
-* Table of Contents
-* Tables
-
-
-Things that are partially completed:
-
-* Citations -- still need:
- * Syntax for "not cited" entries
- * Output format
- * HTML --> separate footnotes and citations?
- * Locators required?
-* CriticMarkup -- need to decide:
- * How to handle CM stretches that include blank lines
-* Fenced code blocks
-* Headers -- need support for manual labels
-* Metadata
-* Full/Snippet modes
-
-
-Things yet to be completed:
-
-* Abbreviations
-* Glossaries
-* File Transclusion
+9. CriticMarkup, Abbreviations, Glossary Terms, and Citations are handled
+slightly differently. See the QuickStart guide for more information.
+10. Fenced code blocks can use leading/trailing "fences" of 3, 4, or 5
+backticks in length. That should be sufficient for complex documents without
+requiring a more complex parser. If there is no trailing fence, then the
+fenced block is considered to go through the end of the document.
-### Accuracy ###
+11. Emph and Strong parsing is conceptually the same, but the implementation
+is different. It is designed for speed, accuracy, and consistency. In
+general, it seems to handle edge cases much more reliably, but there are still
+a couple of situations that I would like to take into account, if possible.
+These are not situations that should occur often in "real life."
-MultiMarkdown v6 successfully parses the Markdown [syntax page], except for
-the Setext header at the top. It passes the 29 test files currently in place.
-There are a few at
+12. EPUB 3 output is supported without need of any external tools.
-[syntax page]: https://daringfireball.net/projects/markdown/syntax
+13. Internationalization support for HTML phrases, such as "see footnote". See
+[Github](https://github.com/fletcher/MultiMarkdown-6/issues/37) for more
+information.
| Version: | @My_Project_Version@ |
-## Updates ##
-
-* 2017-03-13 -- v 6.0.0-b2:
-
- * ADDED: Add CriticMarkup preprocessor that works across empty lines when accepting/rejecting markup
- * ADDED: Add back the mmd6 latex title file
- * ADDED: Basic EPUB 3 support -- uses 'miniz' library to zip creation
- * ADDED: Update QuickStart and EPUB code
- * CHANGED: Update QuickStart guide
- * CHANGED: Update test suite
- * FIXED: Don't duplicate LaTeX glossary definitions
- * FIXED: Fix abbreviations in ODF; Improve test suite
- * FIXED: Improve glossaries and abbreviations; Update QuickStart
- * FIXED: Tidy up some compiler warnings in code
- * FIXED: Use custom UUID code to minimize external dependencies
-
-
-* 2017-03-09 -- v 6.0.0-b1:
-
- * ADDED: Add French translations; fix typo in German
- * ADDED: Add Quick Start guide
- * ADDED: Add functionality to automatically identify abbreviations and glossary terms in source
- * ADDED: Improve LaTeX configuration files
- * ADDED: Update German translations
- * ADDED: Use native ODF table of contents instead of a manual list
- * ADDED: Use native command for table of contents in LaTeX
- * CHANGED: Bring HTML and ODF into line with LaTeX as to output of abbreviatinos on first and subsequent uses
- * CHANGED: Slight performance tweak
- * CHANGED: Update German test suite
- * FIXED: Allow {{TOC}} in latex verbatim
- * FIXED: Don't free token_pool if never initialized
- * FIXED: Fix German typo
- * FIXED: Fix missing token type
- * FIXED: Improve performance of checking document for metadata, which improves performance when checking for possible transclusion
- * FIXED: Update test suite for abbreviation changes
-
-
-* 2017-03-05 -- v 0.4.2-b:
-
- * ADDED: Add and utility functions; fix memory leak
- * ADDED: Initial abbreviation support
- * ADDED: Keep working on Abbreviations/Glossaries
- * ADDED: Refactor abbreviation code; Add inline abbreviations; Fix abbreviations in ODF
- * ADDED: Update Inline Footnote test
- * CHANGED: Add comments to i18n.h
- * CHANGED: Finish refactoring note-related code
- * CHANGED: Refactor footnotes
- * CHANGED: Refactor glossary code
- * CHANGED: Remove offset from html export functions
- * FIXED: latex list items need to block optional argument to allow '[' as first character
- * Merge branch 'release/0.4.1-b' into develop
-
-
-* 2017-03-04 -- v 0.4.1-b:
-
- * FIXED: Add glossary localization
-
-
-* 2017-03-04 -- v 0.4.0-b:
-
- * ADDED: Add TOC support to ODF
- * ADDED: Add glossary support to ODF
- * ADDED: Add prelim code for handling abbreviations
- * ADDED: Add support for Swift Package Maker; CHANGED: Restructure source directory
- * ADDED: Added LaTeX support for escaped characters, fenced code blocks, images, links
- * ADDED: Basic ODF Support
- * ADDED: Better document strong/emph algorithm
- * ADDED: Continue ODF progress
- * ADDED: Continue to work on ODF export
- * ADDED: Continue work on ODF
- * ADDED: Finish ODF support for lists
- * ADDED: Improve performance when exporting
- * ADDED: Improve token_pool memory handling
- * ADDED: Prototype support for Glossaries
- * ADDED: Support 'latexconfig' metadata
- * CHANGED: Use multiple cases in glossary tests
- * FIXED: Don't force glossary terms into lowercase
- * FIXED: Fix Makefile for new source file location
- * FIXED: Fix algorithm for creating TOC to properly handle 'incorrect' levels
- * FIXED: Fix linebreaks in LaTeX; ADDED: Add Linebreaks test file
- * FIXED: Fix new_source script for new directory structure
- * FIXED: Fix non-breaking space in ODF
- * FIXED: Fix padding at end of document body in ODF
- * FIXED: Fix underscores in raw latex
- * FIXED: Potential bug
- * NOTE: Add shared library build option
-
-
-* 2017-02-17 -- v 0.3.1.a:
-
- * ADDED: 'finalize' beamer support
- * ADDED: Add escaped newline as linebreak; start on beamer/memoir support
- * ADDED: CriticMarkup test for LaTeX
- * ADDED: Custom LaTeX output for CriticMarkup comments
- * ADDED: Support mmd export format
- * ADDED: Work on cpack installer -- change project name for compatibility
- * CHANGED: Adjust latex metadata configuration for consistency
- * CHANGED: Configure cmake to use C99
- * FIXED: Add custom implementation for cross-platform support
- * FIXED: Fix German HTML tests
- * FIXED: Fix cpack destination directory issue
- * FIXED: Fix memory leaks etc
- * FIXED: Fix warning in custom vasprintf
- * FIXED: Modify CMakeLists.txt to test for use of clang compiler
- * FIXED: Work on memory leaks
- * NOTE: Adjust license width to improve display on smaller terminal windows
-
-
-* 2017-02-14 -- v 0.3.0a:
-
- * ADDED: Add basic image support to LaTeX
- * ADDED: Add file transclusion
- * ADDED: Add support for citation 'locators'
- * ADDED: Add support for manual labels on ATX Headers
- * ADDED: Add support for manual labels on Setext Headers
- * ADDED: Add support for tables in LaTeX
- * ADDED: HTML Comments appear as raw LaTeX
- * ADDED: Improved citation support in LaTeX
- * ADDED: Support \autoref{} in LaTeX
- * ADDED: Support combined options in LaTeX citations that use the '\]\[' syntax
- * ADDED: Support language specifier in fenced code blocks
- * ADDED: Support metadata in LaTeX
- * ADDED: Update Citations test suite
- * FIXED: Escaped LaTeX characters
- * FIXED: Fix bug in URL parsing
- * FIXED: Fix bug in citation links
- * FIXED: Fix bug when no closing divider or newline at end of last table cell
- * FIXED: Fix issue printing '-'
- * FIXED: Fix scan_url test suite
- * FIXED: Get Math working in LaTeX
- * FIXED: Improve reliability or link scanner
- * FIXED: Properly add id attribute to new instances of citation only
- * FIXED: Properly handle manual labels with TOC
- * FIXED: Properly print hash characters in LaTeX
- * FIXED: Separate LaTeX verbatim and texttt character handling
- * FIXED: Update Escapes test LaTeX result
- * FIXED: Work on escaping LaTeX characters
-
-
-* 2017-02-08 -- v 0.1.4a:
-
- * ADDED: Add smart quote support for other languages (resolves #15)
-
-
-* 2017-02-08 -- v 0.1.3a:
-
- * ADDED: Add support for reference image id attributes
- * ADDED: Add support for table captions
- * ADDED: Metadata support for base header level
- * ADDED: Support distinction between 3 and 5 backticks in fenced code blocks
- * ADDED: Support Setext headers
- * FIXED: Fix issue with metadata disrupting smart quotes
-
-* 2017-02-07 -- v 0.1.2a:
-
- * "pathologic" test suite -- fix handling of nested brackets, e.g.
- `[[[[foo]]]]` to avoid bogging down checking for reference links that
- don't exist.
- * Table support -- a single blank line separates sections of tables, so
- at least two blank lines are needed between adjacent tables.
- * Definition list support
- * "fuzz testing" -- stress test the parser for unexpected failures
- * Table of Contents support
- * Improved compatibility mode parsing
-
-* 2017-01-28 -- v 0.1.1a includes a few updates:
-
- * Metadata support
- * Metadata variables support
- * Extended ASCII range character checking
- * Rudimentary language translations, including German
- * Improved performance
- * Additional testing:
- * CriticMarkup
- * HTML Blokcs
- * Metadata/Variables
- * "pathologic" test cases from CommonMark
+## An Announcement! ##
+MultiMarkdown v6 is finally here! It's technically still in "beta" as I would
+like to hear back from a few more users to make sure I'm not missing anything,
+but it has been subjected to much more rigorous testing than any previous
+versions of MultiMarkdown in the past. If you want more information about
+testing, see `DevelopmentNotes`. It's basically feature complete as a
+replacement for MMD v5, and included additional features beyond that.
-## An Announcement! ##
-I would like to officially announce that MultiMarkdown version 6 is in public
-alpha. It's finally at a point where it is usable, but there are quite a few
-caveats.
+## Obtaining MultiMarkdown ##
+
+You can download the latest installer for MacOS or Windows at Github:
+
+<https://github.com/fletcher/MultiMarkdown-6/releases>
-This post is a way for me to organize some of my thoughts, provide some
-history for those who are interested, and to provide some tips and tricks from
-my experiences for those who are working on their own products.
+To build from source, download from Github. Then:
-But first, some background...
+ make release
+ (OR)
+ make debug
+ cd build
+ make
+
+You can optionally test using the test suite:
+
+ ctest
## Differences in MultiMarkdown Itself ##
-MultiMarkdown v6 is mostly about making a better MMD parser, but it will
-likely involve a few changes to the MultiMarkdown language itself.
+MultiMarkdown v6 is mostly about making a better MMD parser, but it involves a
+few changes to the MultiMarkdown syntax itself.
+1. Setext headers can consist of more than one line to be included in the
+header:
-1. {--I am thinking about removing Setext headers from the language. I almost
-never use them, much preferring to use ATX style headers (`# foo #`).
-Additionally, I have never liked the fact that Setext headers allow the
-meaning of a line to be completely changed by the following line. It makes
-the parsing slightly more difficult on a technical level (requiring some
-backtracking at times). I'm not 100% certain on this, but right now I believe
-it's the only Markdown feature that doesn't exist in MMD 6 yet.--}{++I decided
-to go ahead and implement Setext headers, as it can be done with the new
-parser without backtracking. One difference with older versions of MMD, as
-well as Markdown itself, is that a setext header can consist of more than one
-line to be included in the header.++}
+ This is
+ a header
+ ========
2. Whitespace is not allowed between the text brackets and label brackets in
reference links, images, footnotes, etc. For example `[foo] [bar]` will no
longer be the same as `[foo][bar]`.
3. Link and image titles can be quoted with `'foo'`, `"foo"`, or `(foo)`.
+Link attributes can be used in both reference and inline links/images.
4. HTML elements are handled slightly differently. There is no longer a
`markdown="1"` feature. Instead, HTML elements that are on a line by
to be treated as HTML such that Markdown will not be parsed in side of it.
HTML block-level tags are even "stronger" at starting an HTML block. It is
not quite as complex as the approach used in CommonMark, but is similar under
-most circumstances.
+most circumstances. Leaving a blank line after the opening tag will allow
+MultiMarkdown parsing inside of the HTML block.
For example, this would not be parsed:
</div>
5. "Malformed" reference link definitions are handled slightly differently.
-For example, `Reference Footnotes.text` is parsed differently in compatibility
-mode than MMD-5. This started as a side-effect of the parsing algorithm, but
-I actually think it makes sense. This may or may not change in the future.
+For example, the test suite file `Reference Footnotes.text` is parsed
+differently in compatibility mode than MMD-5. This started as a side-effect
+of the parsing algorithm, but I actually think it makes sense. This may or
+may not change in the future.
6. Table captions in MMD-6 must come immediately *after* the table, not
before it.
feature in MMD, but I don't see a problem with just making it default
behavior.
+8. Escaped spaces (`\ `) will be interpreted as a non-breaking space, if the
+output format supports it.
-## Where Does MultiMarkdown 6 Stand? ##
-
-
-### Features ###
-
-I *think* that all basic Markdown features have been implemented.
-Additionally, the following MultiMarkdown features have been implemented:
-
-* Automatic cross-reference targets
-* Basic Citation support
-* CriticMarkup support
-* Definition lists
-* Figures
-* Footnotes
-* Inline and reference footnotes
-* Image and Link attributes (attributes can now be used with inline links as
- well as reference links)
-* Math support
-* Smart quotes (support for languages other than english is not fully
- implemented yet)
-* Superscripts/subscripts
-* Table of Contents
-* Tables
-
-
-Things that are partially completed:
-
-* Citations -- still need:
- * Syntax for "not cited" entries
- * Output format
- * HTML --> separate footnotes and citations?
- * Locators required?
-* CriticMarkup -- need to decide:
- * How to handle CM stretches that include blank lines
-* Fenced code blocks
-* Headers -- need support for manual labels
-* Metadata
-* Full/Snippet modes
-
-
-Things yet to be completed:
-
-* Abbreviations
-* Glossaries
-* File Transclusion
+9. CriticMarkup, Abbreviations, Glossary Terms, and Citations are handled
+slightly differently. See the QuickStart guide for more information.
+10. Fenced code blocks can use leading/trailing "fences" of 3, 4, or 5
+backticks in length. That should be sufficient for complex documents without
+requiring a more complex parser. If there is no trailing fence, then the
+fenced block is considered to go through the end of the document.
-### Accuracy ###
+11. Emph and Strong parsing is conceptually the same, but the implementation
+is different. It is designed for speed, accuracy, and consistency. In
+general, it seems to handle edge cases much more reliably, but there are still
+a couple of situations that I would like to take into account, if possible.
+These are not situations that should occur often in "real life."
-MultiMarkdown v6 successfully parses the Markdown [syntax page], except for
-the Setext header at the top. It passes the 29 test files currently in place.
-There are a few at
+12. EPUB 3 output is supported without need of any external tools.
-[syntax page]: https://daringfireball.net/projects/markdown/syntax
+13. Internationalization support for HTML phrases, such as "see footnote". See
+[Github](https://github.com/fletcher/MultiMarkdown-6/issues/37) for more
+information.