From: Fletcher T. Penney Date: Sun, 29 Jan 2017 16:46:42 +0000 (-0500) Subject: FIXED: Prevent stack overflows with pathologic input X-Git-Tag: 0.1.2a^2~17 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f74a9f0da4b66779b74215fd0f96d3dd6c4fd1a4;p=multimarkdown FIXED: Prevent stack overflows with pathologic input --- diff --git a/src/html.c b/src/html.c index 081f5e4..b551a1e 100644 --- a/src/html.c +++ b/src/html.c @@ -59,8 +59,8 @@ #include "char.h" #include "d_string.h" #include "html.h" -#include "libMultiMarkdown.h" #include "i18n.h" +#include "libMultiMarkdown.h" #include "parser.h" #include "token.h" #include "scanners.h" @@ -1026,6 +1026,14 @@ void mmd_export_token_html(DString * out, const char * source, token * t, size_t void mmd_export_token_tree_html(DString * out, const char * source, token * t, size_t offset, scratch_pad * scratch) { + + // Prevent stack overflow with "dangerous" input causing extreme recursion + if (scratch->recurse_depth == kMaxExportRecursiveDepth) { + return; + } + + scratch->recurse_depth++; + while (t != NULL) { if (scratch->skip_token) { scratch->skip_token--; @@ -1035,6 +1043,8 @@ void mmd_export_token_tree_html(DString * out, const char * source, token * t, s t = t->next; } + + scratch->recurse_depth--; } diff --git a/src/mmd.c b/src/mmd.c index 4831872..c7a21c0 100644 --- a/src/mmd.c +++ b/src/mmd.c @@ -91,6 +91,8 @@ mmd_engine * mmd_engine_create(DString * d, unsigned long extensions) { e->extensions = extensions; + e->recurse_depth = 0; + e->allow_meta = (extensions & EXT_COMPATIBILITY) ? false : true; e->language = LC_EN; @@ -748,6 +750,11 @@ token * mmd_tokenize_string(mmd_engine * e, const char * str, size_t len) { /// Parse token tree void mmd_parse_token_chain(mmd_engine * e, token * chain) { + if (e->recurse_depth == kMaxParseRecursiveDepth) + return; + + e->recurse_depth++; + void* pParser = ParseAlloc (malloc); // Create a parser (for lemon) token * walker = chain->child; // Walk the existing tree token * remainder; // Hold unparsed tail of chain @@ -779,6 +786,8 @@ void mmd_parse_token_chain(mmd_engine * e, token * chain) { e->root = NULL; ParseFree(pParser, free); + + e->recurse_depth--; } @@ -809,7 +818,7 @@ void mmd_pair_tokens_in_block(token * block, token_pair_engine * e, stack * s) { case BLOCK_H5: case BLOCK_H6: case BLOCK_PARA: - token_pairs_match_pairs_inside_token(block, e, s); + token_pairs_match_pairs_inside_token(block, e, s, 0); break; case DOC_START_TOKEN: case BLOCK_LIST_BULLETED: @@ -820,13 +829,13 @@ void mmd_pair_tokens_in_block(token * block, token_pair_engine * e, stack * s) { break; case BLOCK_LIST_ITEM: case BLOCK_LIST_ITEM_TIGHT: - token_pairs_match_pairs_inside_token(block, e, s); + token_pairs_match_pairs_inside_token(block, e, s, 0); mmd_pair_tokens_in_chain(block->child, e, s); break; case LINE_TABLE: case BLOCK_TABLE: // TODO: Need to parse into cells first - token_pairs_match_pairs_inside_token(block, e, s); + token_pairs_match_pairs_inside_token(block, e, s, 0); mmd_pair_tokens_in_chain(block->child, e, s); break; case BLOCK_EMPTY: @@ -1487,6 +1496,7 @@ token * mmd_engine_parse_substring(mmd_engine * e, size_t byte_start, size_t byt // This avoids allocating/freeing one for each iteration. stack * pair_stack = stack_new(0); + mmd_pair_tokens_in_block(doc, e->pairings1, pair_stack); mmd_pair_tokens_in_block(doc, e->pairings2, pair_stack); mmd_pair_tokens_in_block(doc, e->pairings3, pair_stack); diff --git a/src/mmd.h b/src/mmd.h index b99bb6e..8109ecc 100644 --- a/src/mmd.h +++ b/src/mmd.h @@ -62,10 +62,14 @@ #include "token.h" #include "token_pairs.h" +#define kMaxParseRecursiveDepth 1000 //!< Maximum recursion depth when parsing -- to prevent stack overflow with "pathologic" input + + struct mmd_engine { DString * dstr; token * root; unsigned long extensions; + unsigned short recurse_depth; bool allow_meta; diff --git a/src/token_pairs.c b/src/token_pairs.c index 5b70eaa..fd3fc7c 100644 --- a/src/token_pairs.c +++ b/src/token_pairs.c @@ -128,7 +128,11 @@ void token_pair_mate(token * a, token * b) { /// Search a token's childen for matching pairs -void token_pairs_match_pairs_inside_token(token * parent, token_pair_engine * e, stack * s) { +void token_pairs_match_pairs_inside_token(token * parent, token_pair_engine * e, stack * s, unsigned short depth) { + + // Avoid stack overflow in "pathologic" input + if (depth == kMaxPairRecursiveDepth) + return; // Walk the child chain token * walker = parent->child; @@ -145,7 +149,7 @@ void token_pairs_match_pairs_inside_token(token * parent, token_pair_engine * e, while (walker != NULL) { if (walker->child) { - token_pairs_match_pairs_inside_token(walker, e, s); + token_pairs_match_pairs_inside_token(walker, e, s, depth + 1); } // Is this a closer? diff --git a/src/token_pairs.h b/src/token_pairs.h index 742accb..67fc76a 100644 --- a/src/token_pairs.h +++ b/src/token_pairs.h @@ -65,8 +65,9 @@ #include "CuTest.h" #endif -#define kMaxTokenTypes 200 // This needs to be larger than the largest token type being used -#define kLargeStackThreshold 1000 // Avoid unnecessary searches of large stacks +#define kMaxTokenTypes 200 //!< This needs to be larger than the largest token type being used +#define kLargeStackThreshold 1000 //!< Avoid unnecessary searches of large stacks +#define kMaxPairRecursiveDepth 1000 //!< Maximum recursion depth to traverse when pairing tokens -- to prevent stack overflow with "pathologic" input /// Store information about which tokens can be paired, and what actions to take when @@ -114,7 +115,8 @@ void token_pair_engine_add_pairing( void token_pairs_match_pairs_inside_token( token * parent, //!< Which tokens should we search for pairs token_pair_engine * e, //!< Token pair engine to be used for matching - stack * s //!< Pointer to a stack to use for pairing tokens + stack * s, //!< Pointer to a stack to use for pairing tokens + unsigned short depth //!< Keep track of recursion depth ); diff --git a/src/writer.c b/src/writer.c index 9b60658..3adaa6c 100644 --- a/src/writer.c +++ b/src/writer.c @@ -92,6 +92,8 @@ scratch_pad * scratch_pad_new(mmd_engine * e) { p->header_stack = e->header_stack; + p->recurse_depth = 0; + // Store links in a hash for rapid retrieval when exporting p->link_hash = NULL; link * l; diff --git a/src/writer.h b/src/writer.h index 512999c..d94c676 100644 --- a/src/writer.h +++ b/src/writer.h @@ -67,6 +67,9 @@ #include "uthash.h" +#define kMaxExportRecursiveDepth 1000 //!< Maximum recursion depth when exporting token tree -- to prevent stack overflow with "pathologic" input + + typedef struct { struct link * link_hash; struct meta * meta_hash; @@ -91,6 +94,8 @@ typedef struct { short quotes_lang; stack * header_stack; + + short recurse_depth; char _PADDING[4]; //!< pad struct for alignment