From 0ae2f95b8b7afc92a707d9272a484f15a0a75f74 Mon Sep 17 00:00:00 2001 From: Remi Collet Date: Tue, 30 May 2017 14:14:57 +0200 Subject: [PATCH] Update Oniguruma to latest upstream version 6.3.0 Windows specific changes need to be applied again. --- ext/mbstring/oniguruma/HISTORY | 30 +++ ext/mbstring/oniguruma/README.md | 6 + ext/mbstring/oniguruma/doc/API | 14 ++ ext/mbstring/oniguruma/doc/API.ja | 14 ++ ext/mbstring/oniguruma/doc/RE | 3 +- ext/mbstring/oniguruma/doc/RE.ja | 33 +-- ext/mbstring/oniguruma/index.html | 9 +- ext/mbstring/oniguruma/index_ja.html | 9 +- ext/mbstring/oniguruma/src/ascii.c | 2 +- ext/mbstring/oniguruma/src/big5.c | 2 +- ext/mbstring/oniguruma/src/config.h.win32 | 1 - ext/mbstring/oniguruma/src/config.h.win64 | 1 - ext/mbstring/oniguruma/src/cp1251.c | 2 +- ext/mbstring/oniguruma/src/euc_jp.c | 2 +- ext/mbstring/oniguruma/src/euc_kr.c | 2 +- ext/mbstring/oniguruma/src/euc_tw.c | 2 +- ext/mbstring/oniguruma/src/gb18030.c | 2 +- ext/mbstring/oniguruma/src/iso8859_1.c | 2 +- ext/mbstring/oniguruma/src/iso8859_10.c | 2 +- ext/mbstring/oniguruma/src/iso8859_11.c | 2 +- ext/mbstring/oniguruma/src/iso8859_13.c | 2 +- ext/mbstring/oniguruma/src/iso8859_14.c | 2 +- ext/mbstring/oniguruma/src/iso8859_15.c | 2 +- ext/mbstring/oniguruma/src/iso8859_16.c | 2 +- ext/mbstring/oniguruma/src/iso8859_2.c | 2 +- ext/mbstring/oniguruma/src/iso8859_3.c | 2 +- ext/mbstring/oniguruma/src/iso8859_4.c | 2 +- ext/mbstring/oniguruma/src/iso8859_5.c | 2 +- ext/mbstring/oniguruma/src/iso8859_6.c | 2 +- ext/mbstring/oniguruma/src/iso8859_7.c | 2 +- ext/mbstring/oniguruma/src/iso8859_8.c | 2 +- ext/mbstring/oniguruma/src/iso8859_9.c | 2 +- ext/mbstring/oniguruma/src/koi8.c | 2 +- ext/mbstring/oniguruma/src/koi8_r.c | 2 +- ext/mbstring/oniguruma/src/mktable.c | 42 +++- ext/mbstring/oniguruma/src/onig_init.c | 2 +- ext/mbstring/oniguruma/src/oniguruma.h | 14 +- ext/mbstring/oniguruma/src/regcomp.c | 129 ++++++---- ext/mbstring/oniguruma/src/regenc.c | 2 +- ext/mbstring/oniguruma/src/regenc.h | 2 +- ext/mbstring/oniguruma/src/regerror.c | 24 +- ext/mbstring/oniguruma/src/regexec.c | 85 ++++--- ext/mbstring/oniguruma/src/regint.h | 15 +- ext/mbstring/oniguruma/src/regparse.c | 229 ++++++++++++------ ext/mbstring/oniguruma/src/regparse.h | 1 + ext/mbstring/oniguruma/src/regsyntax.c | 6 +- ext/mbstring/oniguruma/src/regversion.c | 2 +- ext/mbstring/oniguruma/src/sjis.c | 2 +- ext/mbstring/oniguruma/src/st.c | 19 +- ext/mbstring/oniguruma/src/unicode.c | 2 +- .../oniguruma/src/unicode_fold1_key.c | 12 +- .../oniguruma/src/unicode_fold2_key.c | 12 +- .../oniguruma/src/unicode_fold3_key.c | 12 +- .../oniguruma/src/unicode_unfold_key.c | 14 +- ext/mbstring/oniguruma/src/utf16_be.c | 2 +- ext/mbstring/oniguruma/src/utf16_le.c | 2 +- ext/mbstring/oniguruma/src/utf32_be.c | 2 +- ext/mbstring/oniguruma/src/utf32_le.c | 2 +- ext/mbstring/oniguruma/src/utf8.c | 2 +- 59 files changed, 534 insertions(+), 270 deletions(-) diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY index c59fe4b6f3..74004e9743 100644 --- a/ext/mbstring/oniguruma/HISTORY +++ b/ext/mbstring/oniguruma/HISTORY @@ -1,5 +1,35 @@ History +2017/05/29: Version 6.3.0 + +2017/05/24: fix #60 : invalid state(CCS_VALUE) in parse_char_class() +2017/05/24: fix #59 : access to invalid address by reg->dmax value +2017/05/23: fix invalid increment of start position in onig_scan() +2017/05/23: fix #58 : access to invalid address by reg->dmin value +2017/05/23: fix #57 : DATA_ENSURE() check must be before data access +2017/05/22: fix #56 : return invalid result for codepoint 0xFFFFFFFF +2017/05/19: [new] add \o{17777777777} syntax. +2017/05/19: fix #55 : Byte value expressed in octal must be smaller than 256 + +2017/04/08: Version 6.2.0 + +2017/03/15: fix: size in xmemcpy in stack_double (PR #51) +2017/02/21: Initialize return value +2017/01/03: NEW API: add onig_set_capture_num_limit() +2017/01/03: change MemNumType from short int to int +2016/12/13: fix: [0-9-a] was not allowed as [0-9\-a] +2016/12/13: fix: illegal capture after recursive call +2016/12/13: fix: problem with optimization of \z +2016/12/13: fix: .* optimization +2016/12/13: Set a limit of parser recursion +2016/12/12: fix; that warnings are not shown properly +2016/12/12: fix: /[a-c#]+\W/ =~ "def#" fails when encoding is UTF-16/32 +2016/12/12: fix: /[\x{0}-X]/i doesn't match properly when UTF-16/32 is used. + +2016/12/11: Version 6.1.3 + +2016/12/11: fix: Syntax error: redirection unexpected (expecting word) #35 + 2016/11/07: Version 6.1.2 2016/10/25: allow word bound, word begin and word end in look-behind. diff --git a/ext/mbstring/oniguruma/README.md b/ext/mbstring/oniguruma/README.md index bfb41c74b4..a3abc750ac 100644 --- a/ext/mbstring/oniguruma/README.md +++ b/ext/mbstring/oniguruma/README.md @@ -20,6 +20,12 @@ Supported character encodings: * CP1251: contributed by Byte +New feature of version 6.3.0 +-------------------------- + +* NEW SYNTAX: escape-o-brace for octal codepoint. + + New feature of version 6.1.2 -------------------------- diff --git a/ext/mbstring/oniguruma/doc/API b/ext/mbstring/oniguruma/doc/API index c639432e15..d12a46b997 100644 --- a/ext/mbstring/oniguruma/doc/API +++ b/ext/mbstring/oniguruma/doc/API @@ -629,6 +629,20 @@ Oniguruma API Version 6.1.0 2016/08/22 normal return: ONIG_NORMAL +# unsigned int onig_get_parse_depth_limit(void) + + Return the maximum depth of parser recursion. + (default: DEFAULT_PARSE_DEPTH_LIMIT defined in regint.h. Currently 4096.) + + +# int onig_set_parse_depth_limit(unsigned int depth) + + Set the maximum depth of parser recursion. + (depth = 0: Set to the default value defined in regint.h.) + + normal return: ONIG_NORMAL + + # int onig_end(void) The use of this library is finished. diff --git a/ext/mbstring/oniguruma/doc/API.ja b/ext/mbstring/oniguruma/doc/API.ja index 25975100aa..dcc7317b0a 100644 --- a/ext/mbstring/oniguruma/doc/API.ja +++ b/ext/mbstring/oniguruma/doc/API.ja @@ -636,6 +636,20 @@ Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL +# unsigned int onig_get_parse_depth_limit(void) + + ºÆµ¢¥Ñ¡¼¥¹½èÍý¤ÎºÇÂ翼¤µ¤òÊÖ¤¹¡£ + (¥Ç¥Õ¥©¥ë¥È: regint.h ¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë DEFAULT_PARSE_DEPTH_LIMIT¡£¸½ºß¤Ï 4096) + + +# int onig_set_parse_depth_limit(unsigned int depth) + + ºÆµ¢¥Ñ¡¼¥¹½èÍý¤ÎºÇÂ翼¤µ¤ò»ØÄꤹ¤ë¡£ + (depth = 0: regint.h ¤ÇÄêµÁ¤µ¤ì¤¿¥Ç¥Õ¥©¥ë¥ÈÃͤËÀßÄꤹ¤ë¡£) + + Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL + + # int onig_end(void) ¥é¥¤¥Ö¥é¥ê¤Î»ÈÍѤò½ªÎ»¤¹¤ë¡£ diff --git a/ext/mbstring/oniguruma/doc/RE b/ext/mbstring/oniguruma/doc/RE index e8a6aa4aae..168541597c 100644 --- a/ext/mbstring/oniguruma/doc/RE +++ b/ext/mbstring/oniguruma/doc/RE @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.0.0 2016/08/18 +Oniguruma Regular Expressions Version 6.3.0 2017/05/19 syntax: ONIG_SYNTAX_RUBY (default) @@ -22,6 +22,7 @@ syntax: ONIG_SYNTAX_RUBY (default) \a bell (0x07) \e escape (0x1B) \nnn octal char (encoded byte value) + \o{17777777777} wide octal char (character code point value) \xHH hexadecimal char (encoded byte value) \x{7HHHHHHH} wide hexadecimal char (character code point value) \cx control char (character code point value) diff --git a/ext/mbstring/oniguruma/doc/RE.ja b/ext/mbstring/oniguruma/doc/RE.ja index cf89c8fcba..5c09100400 100644 --- a/ext/mbstring/oniguruma/doc/RE.ja +++ b/ext/mbstring/oniguruma/doc/RE.ja @@ -1,4 +1,4 @@ -µ´¼Ö Àµµ¬É½¸½ Version 6.0.0 2016/05/02 +µ´¼Ö Àµµ¬É½¸½ Version 6.3.0 2017/05/19 »ÈÍÑʸˡ: ONIG_SYNTAX_RUBY (´ûÄêÃÍ) @@ -13,21 +13,22 @@ 2. ʸ»ú - \t ¿åÊ¿¥¿¥Ö (0x09) - \v ¿âľ¥¿¥Ö (0x0B) - \n ²þ¹Ô (0x0A) - \r Éüµ¢ (0x0D) - \b ¸åÂà¶õÇò (0x08) - \f ²þÊÇ (0x0C) - \a ¾â (0x07) - \e ÂàÈò½¤¾þ (0x1B) - \nnn Ȭ¿Ê¿ôɽ¸½ É乿²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) - \xHH ½½Ï»¿Ê¿ôɽ¸½ É乿²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) - \x{7HHHHHHH} ³ÈÄ¥½½Ï»¿Ê¿ôɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ - \cx À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ - \C-x À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ - \M-x Ķ (x|0x80) ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ - \M-\C-x Ķ + À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \t ¿åÊ¿¥¿¥Ö (0x09) + \v ¿âľ¥¿¥Ö (0x0B) + \n ²þ¹Ô (0x0A) + \r Éüµ¢ (0x0D) + \b ¸åÂà¶õÇò (0x08) + \f ²þÊÇ (0x0C) + \a ¾â (0x07) + \e ÂàÈò½¤¾þ (0x1B) + \nnn Ȭ¿Ê¿ôɽ¸½ É乿²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) + \o{17777777777} ³ÈĥȬ¿Ê¿ôɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \xHH ½½Ï»¿Ê¿ôɽ¸½ É乿²½¥Ð¥¤¥ÈÃÍ(¤Î°ìÉô) + \x{7HHHHHHH} ³ÈÄ¥½½Ï»¿Ê¿ôɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \cx À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \C-x À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \M-x Ķ (x|0x80) ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ + \M-\C-x Ķ + À©¸æÊ¸»úɽ¸½ ¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃÍ ¢¨ \b¤Ï¡¢Ê¸»ú½¸¹çÆâ¤Ç¤Î¤ßÍ­¸ú diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html index cf9177c068..79e4de0e0f 100755 --- a/ext/mbstring/oniguruma/index.html +++ b/ext/mbstring/oniguruma/index.html @@ -8,7 +8,7 @@

Oniguruma

(Japanese)

-(c) K.Kosako, updated at: 2016/11/07 +(c) K.Kosako, updated at: 2017/05/26

@@ -16,10 +16,9 @@
What's new
    -
  • 2016/11/07: Version 6.1.2 released.
  • -
  • 2016/09/02: Version 6.1.1 released.
  • -
  • 2016/08/29: Version 6.1.0 released.
  • -
  • 2014/12/12: Version 5.9.6 released.
  • +
  • 2017/05/29: Version 6.3.0 released.
  • +
  • 2017/04/08: Version 6.2.0 released.
  • +
  • 2016/12/11: Version 6.1.3 released.

diff --git a/ext/mbstring/oniguruma/index_ja.html b/ext/mbstring/oniguruma/index_ja.html index e11e0f56d3..7070dfe243 100644 --- a/ext/mbstring/oniguruma/index_ja.html +++ b/ext/mbstring/oniguruma/index_ja.html @@ -8,7 +8,7 @@

鬼車

-(c) K.Kosako, 最終更新: 2016/11/07 +(c) K.Kosako, 最終更新: 2017/05/26

@@ -16,10 +16,9 @@
更新情報
    -
  • 2016/11/07: Version 6.1.2 リリース
  • -
  • 2016/09/02: Version 6.1.1 リリース
  • -
  • 2016/08/29: Version 6.1.0 リリース
  • -
  • 2014/12/12: Version 5.9.6 リリース
  • +
  • 2017/05/29: Version 6.3.0 リリース
  • +
  • 2017/04/08: Version 6.2.0 リリース
  • +
  • 2016/12/11: Version 6.1.3 リリース

diff --git a/ext/mbstring/oniguruma/src/ascii.c b/ext/mbstring/oniguruma/src/ascii.c index 9983d82467..b21878d275 100644 --- a/ext/mbstring/oniguruma/src/ascii.c +++ b/ext/mbstring/oniguruma/src/ascii.c @@ -2,7 +2,7 @@ ascii.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/big5.c b/ext/mbstring/oniguruma/src/big5.c index 745f71b340..bc713abdd3 100644 --- a/ext/mbstring/oniguruma/src/big5.c +++ b/ext/mbstring/oniguruma/src/big5.c @@ -2,7 +2,7 @@ big5.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/config.h.win32 b/ext/mbstring/oniguruma/src/config.h.win32 index 9a9c43f26d..bdbdaf25c1 100644 --- a/ext/mbstring/oniguruma/src/config.h.win32 +++ b/ext/mbstring/oniguruma/src/config.h.win32 @@ -15,7 +15,6 @@ #define SIZEOF_VOIDP 4 #define SIZEOF_FLOAT 4 #define SIZEOF_DOUBLE 8 -#define SIZEOF_SIZE_T 4 #define HAVE_PROTOTYPES 1 #define TOKEN_PASTE(x,y) x##y #define HAVE_STDARG_PROTOTYPES 1 diff --git a/ext/mbstring/oniguruma/src/config.h.win64 b/ext/mbstring/oniguruma/src/config.h.win64 index dec7b75773..01a86c7bb9 100644 --- a/ext/mbstring/oniguruma/src/config.h.win64 +++ b/ext/mbstring/oniguruma/src/config.h.win64 @@ -15,7 +15,6 @@ #define SIZEOF_VOIDP 8 #define SIZEOF_FLOAT 4 #define SIZEOF_DOUBLE 8 -#define SIZEOF_SIZE_T 8 #define HAVE_PROTOTYPES 1 #define TOKEN_PASTE(x,y) x##y #define HAVE_STDARG_PROTOTYPES 1 diff --git a/ext/mbstring/oniguruma/src/cp1251.c b/ext/mbstring/oniguruma/src/cp1251.c index 8ed570646f..4d655bb459 100644 --- a/ext/mbstring/oniguruma/src/cp1251.c +++ b/ext/mbstring/oniguruma/src/cp1251.c @@ -2,7 +2,7 @@ cp1251.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2006-2017 Byte + * Copyright (c) 2006-2016 Byte * K.Kosako * All rights reserved. * diff --git a/ext/mbstring/oniguruma/src/euc_jp.c b/ext/mbstring/oniguruma/src/euc_jp.c index bd545a8b98..3b54e958d9 100644 --- a/ext/mbstring/oniguruma/src/euc_jp.c +++ b/ext/mbstring/oniguruma/src/euc_jp.c @@ -2,7 +2,7 @@ euc_jp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/euc_kr.c b/ext/mbstring/oniguruma/src/euc_kr.c index 3383b0e567..450caf146f 100644 --- a/ext/mbstring/oniguruma/src/euc_kr.c +++ b/ext/mbstring/oniguruma/src/euc_kr.c @@ -2,7 +2,7 @@ euc_kr.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/euc_tw.c b/ext/mbstring/oniguruma/src/euc_tw.c index 743a555a48..b3ee62866c 100644 --- a/ext/mbstring/oniguruma/src/euc_tw.c +++ b/ext/mbstring/oniguruma/src/euc_tw.c @@ -2,7 +2,7 @@ euc_tw.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/gb18030.c b/ext/mbstring/oniguruma/src/gb18030.c index 7ab709bf22..c8b586505c 100644 --- a/ext/mbstring/oniguruma/src/gb18030.c +++ b/ext/mbstring/oniguruma/src/gb18030.c @@ -2,7 +2,7 @@ gb18030.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2005-2017 KUBO Takehiro + * Copyright (c) 2005-2016 KUBO Takehiro * K.Kosako * All rights reserved. * diff --git a/ext/mbstring/oniguruma/src/iso8859_1.c b/ext/mbstring/oniguruma/src/iso8859_1.c index 2a2f346077..573931fdaf 100644 --- a/ext/mbstring/oniguruma/src/iso8859_1.c +++ b/ext/mbstring/oniguruma/src/iso8859_1.c @@ -2,7 +2,7 @@ iso8859_1.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_10.c b/ext/mbstring/oniguruma/src/iso8859_10.c index beae57c95a..91b18d4274 100644 --- a/ext/mbstring/oniguruma/src/iso8859_10.c +++ b/ext/mbstring/oniguruma/src/iso8859_10.c @@ -2,7 +2,7 @@ iso8859_10.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_11.c b/ext/mbstring/oniguruma/src/iso8859_11.c index d753b32db4..518be25412 100644 --- a/ext/mbstring/oniguruma/src/iso8859_11.c +++ b/ext/mbstring/oniguruma/src/iso8859_11.c @@ -2,7 +2,7 @@ iso8859_11.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_13.c b/ext/mbstring/oniguruma/src/iso8859_13.c index 25444ad45b..d1f39a24d6 100644 --- a/ext/mbstring/oniguruma/src/iso8859_13.c +++ b/ext/mbstring/oniguruma/src/iso8859_13.c @@ -2,7 +2,7 @@ iso8859_13.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_14.c b/ext/mbstring/oniguruma/src/iso8859_14.c index 4c9018ee7b..3361b0db40 100644 --- a/ext/mbstring/oniguruma/src/iso8859_14.c +++ b/ext/mbstring/oniguruma/src/iso8859_14.c @@ -2,7 +2,7 @@ iso8859_14.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_15.c b/ext/mbstring/oniguruma/src/iso8859_15.c index ae0f4e91e1..b09e876171 100644 --- a/ext/mbstring/oniguruma/src/iso8859_15.c +++ b/ext/mbstring/oniguruma/src/iso8859_15.c @@ -2,7 +2,7 @@ iso8859_15.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_16.c b/ext/mbstring/oniguruma/src/iso8859_16.c index 3f0981df9b..29a350ddbe 100644 --- a/ext/mbstring/oniguruma/src/iso8859_16.c +++ b/ext/mbstring/oniguruma/src/iso8859_16.c @@ -2,7 +2,7 @@ iso8859_16.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_2.c b/ext/mbstring/oniguruma/src/iso8859_2.c index 5b84915511..9eb35365fc 100644 --- a/ext/mbstring/oniguruma/src/iso8859_2.c +++ b/ext/mbstring/oniguruma/src/iso8859_2.c @@ -2,7 +2,7 @@ iso8859_2.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_3.c b/ext/mbstring/oniguruma/src/iso8859_3.c index 2491efc24a..862823a219 100644 --- a/ext/mbstring/oniguruma/src/iso8859_3.c +++ b/ext/mbstring/oniguruma/src/iso8859_3.c @@ -2,7 +2,7 @@ iso8859_3.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_4.c b/ext/mbstring/oniguruma/src/iso8859_4.c index 530fe36fb7..db706da23c 100644 --- a/ext/mbstring/oniguruma/src/iso8859_4.c +++ b/ext/mbstring/oniguruma/src/iso8859_4.c @@ -2,7 +2,7 @@ iso8859_4.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_5.c b/ext/mbstring/oniguruma/src/iso8859_5.c index 24b8b059bc..0e03e9cb81 100644 --- a/ext/mbstring/oniguruma/src/iso8859_5.c +++ b/ext/mbstring/oniguruma/src/iso8859_5.c @@ -2,7 +2,7 @@ iso8859_5.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_6.c b/ext/mbstring/oniguruma/src/iso8859_6.c index c8643de434..6289af5cfd 100644 --- a/ext/mbstring/oniguruma/src/iso8859_6.c +++ b/ext/mbstring/oniguruma/src/iso8859_6.c @@ -2,7 +2,7 @@ iso8859_6.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_7.c b/ext/mbstring/oniguruma/src/iso8859_7.c index 2cd0aacbe6..75b520f552 100644 --- a/ext/mbstring/oniguruma/src/iso8859_7.c +++ b/ext/mbstring/oniguruma/src/iso8859_7.c @@ -2,7 +2,7 @@ iso8859_7.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_8.c b/ext/mbstring/oniguruma/src/iso8859_8.c index 6d46fb0495..5f18345e61 100644 --- a/ext/mbstring/oniguruma/src/iso8859_8.c +++ b/ext/mbstring/oniguruma/src/iso8859_8.c @@ -2,7 +2,7 @@ iso8859_8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/iso8859_9.c b/ext/mbstring/oniguruma/src/iso8859_9.c index cdad59a82d..d0c06bb9aa 100644 --- a/ext/mbstring/oniguruma/src/iso8859_9.c +++ b/ext/mbstring/oniguruma/src/iso8859_9.c @@ -2,7 +2,7 @@ iso8859_9.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/koi8.c b/ext/mbstring/oniguruma/src/koi8.c index 9c1675bd95..80f89e93a5 100644 --- a/ext/mbstring/oniguruma/src/koi8.c +++ b/ext/mbstring/oniguruma/src/koi8.c @@ -2,7 +2,7 @@ koi8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/koi8_r.c b/ext/mbstring/oniguruma/src/koi8_r.c index 07eae34222..f8ef34fd46 100644 --- a/ext/mbstring/oniguruma/src/koi8_r.c +++ b/ext/mbstring/oniguruma/src/koi8_r.c @@ -2,7 +2,7 @@ koi8_r.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/mktable.c b/ext/mbstring/oniguruma/src/mktable.c index 285216ebda..a9cac2c40a 100644 --- a/ext/mbstring/oniguruma/src/mktable.c +++ b/ext/mbstring/oniguruma/src/mktable.c @@ -2,7 +2,7 @@ mktable.c **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,10 @@ #include #include +#ifndef __USE_ISOC99 #define __USE_ISOC99 +#endif + #include #include "regenc.h" @@ -1108,11 +1111,13 @@ static int exec(FILE* fp, ENC_INFO* einfo) #define NCOL 8 int c, val, enc; + int r; enc = einfo->num; - fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n", - einfo->name); + r = fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n", + einfo->name); + if (r < 0) return -1; for (c = 0; c < 256; c++) { val = 0; @@ -1131,20 +1136,33 @@ static int exec(FILE* fp, ENC_INFO* einfo) if (IsWord (enc, c)) val |= BIT_CTYPE_WORD; if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII; - if (c % NCOL == 0) fputs(" ", fp); - fprintf(fp, "0x%04x", val); - if (c != 255) fputs(",", fp); + if (c % NCOL == 0) { + r = fputs(" ", fp); + if (r < 0) return -1; + } + r = fprintf(fp, "0x%04x", val); + if (r < 0) return -1; + + if (c != 255) { + r = fputs(",", fp); + if (r < 0) return -1; + } if (c != 0 && c % NCOL == (NCOL-1)) - fputs("\n", fp); + r = fputs("\n", fp); else - fputs(" ", fp); + r = fputs(" ", fp); + + if (r < 0) return -1; } - fprintf(fp, "};\n"); + r = fprintf(fp, "};\n"); + if (r < 0) return -1; + return 0; } extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED) { + int r; int i; FILE* fp = stdout; @@ -1155,7 +1173,11 @@ extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED) /* setlocale(LC_ALL, "fr_FR.iso88591"); */ for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) { - exec(fp, &Info[i]); + r = exec(fp, &Info[i]); + if (r < 0) { + fprintf(stderr, "FAIL exec(): %d\n", r); + return -1; + } } return 0; diff --git a/ext/mbstring/oniguruma/src/onig_init.c b/ext/mbstring/oniguruma/src/onig_init.c index 18af14fd58..9f535685cd 100644 --- a/ext/mbstring/oniguruma/src/onig_init.c +++ b/ext/mbstring/oniguruma/src/onig_init.c @@ -2,7 +2,7 @@ onig_init.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2016-2017 K.Kosako + * Copyright (c) 2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/oniguruma.h b/ext/mbstring/oniguruma/src/oniguruma.h index 2d5e93eff8..02d425421e 100644 --- a/ext/mbstring/oniguruma/src/oniguruma.h +++ b/ext/mbstring/oniguruma/src/oniguruma.h @@ -35,8 +35,8 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 -#define ONIGURUMA_VERSION_MINOR 1 -#define ONIGURUMA_VERSION_TEENY 2 +#define ONIGURUMA_VERSION_MINOR 3 +#define ONIGURUMA_VERSION_TEENY 0 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -372,7 +372,7 @@ int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const /* config parameters */ #define ONIG_NREGION 10 -#define ONIG_MAX_CAPTURE_NUM 32767 +#define ONIG_MAX_CAPTURE_NUM 2147483647 /* 2**31 - 1 */ #define ONIG_MAX_BACKREF_NUM 1000 #define ONIG_MAX_REPEAT_NUM 100000 #define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 @@ -473,6 +473,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ #define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ #define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ +#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{1OOOOOOOOOO} */ #define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ #define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ @@ -543,6 +544,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_UNDEFINED_BYTECODE -13 #define ONIGERR_UNEXPECTED_BYTECODE -14 #define ONIGERR_MATCH_STACK_LIMIT_OVER -15 +#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 #define ONIGERR_FAIL_TO_INITIALIZE -23 @@ -821,6 +823,12 @@ unsigned int onig_get_match_stack_limit_size P_((void)); ONIG_EXTERN int onig_set_match_stack_limit_size P_((unsigned int size)); ONIG_EXTERN +unsigned int onig_get_parse_depth_limit P_((void)); +ONIG_EXTERN +int onig_set_capture_num_limit P_((int num)); +ONIG_EXTERN +int onig_set_parse_depth_limit P_((unsigned int depth)); +ONIG_EXTERN int onig_unicode_define_user_property P_((const char* name, OnigCodePoint* ranges)); ONIG_EXTERN int onig_end P_((void)); diff --git a/ext/mbstring/oniguruma/src/regcomp.c b/ext/mbstring/oniguruma/src/regcomp.c index 06e74bac67..0e9a9ab38c 100644 --- a/ext/mbstring/oniguruma/src/regcomp.c +++ b/ext/mbstring/oniguruma/src/regcomp.c @@ -761,17 +761,17 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) if (infinite && qn->lower <= 1) { if (qn->greedy) { if (qn->lower == 1) - len = SIZE_OP_JUMP; + len = SIZE_OP_JUMP; else - len = 0; + len = 0; len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; } else { if (qn->lower == 0) - len = SIZE_OP_JUMP; + len = SIZE_OP_JUMP; else - len = 0; + len = 0; len += mod_tlen + SIZE_OP_PUSH + cklen; } @@ -785,10 +785,10 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) else if (qn->upper == 1 && qn->greedy) { if (qn->lower == 0) { if (CKN_ON) { - len = SIZE_OP_STATE_CHECK_PUSH + tlen; + len = SIZE_OP_STATE_CHECK_PUSH + tlen; } else { - len = SIZE_OP_PUSH + tlen; + len = SIZE_OP_PUSH + tlen; } } else { @@ -1230,6 +1230,11 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg) len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); } + else if (IS_ENCLOSE_RECURSION(node)) { + len = SIZE_OP_MEMORY_START_PUSH; + len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC); + } else #endif { @@ -1250,7 +1255,7 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg) if (tlen < 0) return tlen; len = tlen * qn->lower - + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; + + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; } else { len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; @@ -1321,6 +1326,14 @@ compile_enclose_node(EncloseNode* node, regex_t* reg) if (r) return r; r = add_opcode(reg, OP_RETURN); } + else if (IS_ENCLOSE_RECURSION(node)) { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, OP_MEMORY_END_PUSH_REC); + else + r = add_opcode(reg, OP_MEMORY_END_REC); + if (r) return r; + r = add_mem_num(reg, node->regnum); + } else #endif { @@ -1349,7 +1362,7 @@ compile_enclose_node(EncloseNode* node, regex_t* reg) r = add_opcode(reg, OP_POP); if (r) return r; r = add_opcode_rel_addr(reg, OP_JUMP, - -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); + -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); } else { r = add_opcode(reg, OP_PUSH_STOP_BT); @@ -2132,16 +2145,16 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) switch (en->type) { case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CLEN_FIXED(en)) - *len = en->char_len; - else { - r = get_char_length_tree1(en->target, reg, len, level); - if (r == 0) { - en->char_len = *len; - SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); - } - } - break; + if (IS_ENCLOSE_CLEN_FIXED(en)) + *len = en->char_len; + else { + r = get_char_length_tree1(en->target, reg, len, level); + if (r == 0) { + en->char_len = *len; + SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); + } + } + break; #endif case ENCLOSE_OPTION: case ENCLOSE_STOP_BACKTRACK: @@ -2231,6 +2244,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) return 0; } else { + if (IS_NOT_NULL(xc->mbuf)) return 0; for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (! IS_CODE_SB_WORD(reg->enc, i)) { if (!IS_NCCLASS_NOT(xc)) { @@ -2580,17 +2594,17 @@ get_min_len(Node* node, OnigLen *min, ScanEnv* env) if (IS_ENCLOSE_MIN_FIXED(en)) *min = en->min_len; else { - if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - *min = 0; // recursive - else { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = get_min_len(en->target, min, env); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - if (r == 0) { - en->min_len = *min; - SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); - } - } + if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + *min = 0; // recursive + else { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = get_min_len(en->target, min, env); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + if (r == 0) { + en->min_len = *min; + SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); + } + } } break; @@ -2699,22 +2713,22 @@ get_max_len(Node* node, OnigLen *max, ScanEnv* env) EncloseNode* en = NENCLOSE(node); switch (en->type) { case ENCLOSE_MEMORY: - if (IS_ENCLOSE_MAX_FIXED(en)) - *max = en->max_len; - else { - if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - *max = ONIG_INFINITE_DISTANCE; - else { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = get_max_len(en->target, max, env); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - if (r == 0) { - en->max_len = *max; - SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); - } - } - } - break; + if (IS_ENCLOSE_MAX_FIXED(en)) + *max = en->max_len; + else { + if (IS_ENCLOSE_MARK1(NENCLOSE(node))) + *max = ONIG_INFINITE_DISTANCE; + else { + SET_ENCLOSE_STATUS(node, NST_MARK1); + r = get_max_len(en->target, max, env); + CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + if (r == 0) { + en->max_len = *max; + SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); + } + } + } + break; case ENCLOSE_OPTION: case ENCLOSE_STOP_BACKTRACK: @@ -3673,6 +3687,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) #define IN_NOT (1<<1) #define IN_REPEAT (1<<2) #define IN_VAR_REPEAT (1<<3) +#define IN_CALL (1<<4) +#define IN_RECCALL (1<<5) /* setup_tree does the following work. 1. check empty loop. (set qn->target_empty_info) @@ -3843,10 +3859,16 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) break; case ENCLOSE_MEMORY: - if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { + if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) { BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ } + if (IS_ENCLOSE_CALLED(en)) + state |= IN_CALL; + if (IS_ENCLOSE_RECURSION(en)) + state |= IN_RECCALL; + else if ((state & IN_RECCALL) != 0) + SET_CALL_RECURSION(node); r = setup_tree(en->target, reg, state, env); break; @@ -4160,6 +4182,9 @@ concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, if (right_len == 0) { to->right_anchor |= left->right_anchor; } + else { + to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT); + } } static int @@ -4534,7 +4559,7 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) if (to->expr.len > 0) { if (add->len.max > 0) { if (to->expr.len > (int )add->len.max) - to->expr.len = add->len.max; + to->expr.len = add->len.max; if (to->expr.mmd.max == 0) select_opt_exact_info(enc, &to->exb, &to->expr); @@ -4932,7 +4957,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) reg->exact_end = reg->exact + e->len; allow_reverse = - ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); + ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, @@ -5003,12 +5028,14 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) if (r) return r; reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | - ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | + ANCHOR_LOOK_BEHIND); if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; - reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | + ANCHOR_PREC_READ_NOT); if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { reg->anchor_dmin = opt.len.min; @@ -5018,7 +5045,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) if (opt.exb.len > 0 || opt.exm.len > 0) { select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); if (opt.map.value > 0 && - comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { + comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { goto set_map; } else { diff --git a/ext/mbstring/oniguruma/src/regenc.c b/ext/mbstring/oniguruma/src/regenc.c index 7b223250f7..554a622382 100644 --- a/ext/mbstring/oniguruma/src/regenc.c +++ b/ext/mbstring/oniguruma/src/regenc.c @@ -2,7 +2,7 @@ regenc.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/regenc.h b/ext/mbstring/oniguruma/src/regenc.h index aa2616404d..e119dab5a6 100644 --- a/ext/mbstring/oniguruma/src/regenc.h +++ b/ext/mbstring/oniguruma/src/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/regerror.c b/ext/mbstring/oniguruma/src/regerror.c index 05fc9d8be8..ee35b36358 100644 --- a/ext/mbstring/oniguruma/src/regerror.c +++ b/ext/mbstring/oniguruma/src/regerror.c @@ -54,6 +54,8 @@ onig_error_code_to_format(int code) p = "fail to memory allocation"; break; case ONIGERR_MATCH_STACK_LIMIT_OVER: p = "match-stack limit over"; break; + case ONIGERR_PARSE_DEPTH_LIMIT_OVER: + p = "parse depth limit over"; break; case ONIGERR_TYPE_BUG: p = "undefined type (bug)"; break; case ONIGERR_PARSER_BUG: @@ -348,21 +350,12 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) p = pat; while (p < pat_end) { - if (*p == '\\') { - *s++ = *p++; - len = enclen(enc, p); - while (len-- > 0) *s++ = *p++; - } - else if (*p == '/') { - *s++ = (unsigned char )'\\'; - *s++ = *p++; - } - else if (ONIGENC_IS_MBC_HEAD(enc, p)) { + if (ONIGENC_IS_MBC_HEAD(enc, p)) { len = enclen(enc, p); if (ONIGENC_MBC_MINLEN(enc) == 1) { while (len-- > 0) *s++ = *p++; } - else { /* for UTF16 */ + else { /* for UTF16/32 */ int blen; while (len-- > 0) { @@ -373,6 +366,15 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) } } } + else if (*p == '\\') { + *s++ = *p++; + len = enclen(enc, p); + while (len-- > 0) *s++ = *p++; + } + else if (*p == '/') { + *s++ = (unsigned char )'\\'; + *s++ = *p++; + } else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && !ONIGENC_IS_CODE_SPACE(enc, *p)) { sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); diff --git a/ext/mbstring/oniguruma/src/regexec.c b/ext/mbstring/oniguruma/src/regexec.c index c7a705e960..c0626efd43 100644 --- a/ext/mbstring/oniguruma/src/regexec.c +++ b/ext/mbstring/oniguruma/src/regexec.c @@ -462,6 +462,7 @@ stack_double(int is_alloca, char** arg_alloc_base, unsigned int n; int used; size_t size; + size_t new_size; char* alloc_base; char* new_alloc_base; OnigStackType *stk_base, *stk_end, *stk; @@ -472,10 +473,11 @@ stack_double(int is_alloca, char** arg_alloc_base, stk = *arg_stk; n = stk_end - stk_base; - n *= 2; size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n; + n *= 2; + new_size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n; if (is_alloca != 0) { - new_alloc_base = (char* )xmalloc(size); + new_alloc_base = (char* )xmalloc(new_size); if (IS_NULL(new_alloc_base)) { STACK_SAVE; return ONIGERR_MEMORY; @@ -489,7 +491,7 @@ stack_double(int is_alloca, char** arg_alloc_base, else n = MatchStackLimitSize; } - new_alloc_base = (char* )xrealloc(alloc_base, size); + new_alloc_base = (char* )xrealloc(alloc_base, new_size); if (IS_NULL(new_alloc_base)) { STACK_SAVE; return ONIGERR_MEMORY; @@ -1242,16 +1244,24 @@ onig_statistics_init(void) MaxStackDepth = 0; } -extern void +extern int onig_print_statistics(FILE* f) { + int r; int i; - fprintf(f, " count prev time\n"); + + r = fprintf(f, " count prev time\n"); + if (r < 0) return -1; + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + r = fprintf(f, "%8d: %8d: %10ld: %s\n", + OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + if (r < 0) return -1; } - fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); + r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); + if (r < 0) return -1; + + return 0; } #define STACK_INC do {\ @@ -1336,8 +1346,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fprintf(stderr, "%4d> \"", (int )(s - str)); bp = buf; for (i = 0, q = s; i < 7 && q < end; i++) { - len = enclen(encode, q); - while (len-- > 0) *bp++ = *q++; + len = enclen(encode, q); + while (len-- > 0) *bp++ = *q++; } if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } else { xmemcpy(bp, "\"", 1); bp += 1; } @@ -1463,14 +1473,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; case OP_EXACT1: MOP_IN(OP_EXACT1); -#if 0 DATA_ENSURE(1); if (*p != *s) goto fail; p++; s++; -#endif - if (*p != *s++) goto fail; - DATA_ENSURE(0); - p++; MOP_OUT; break; @@ -3149,6 +3154,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } else { UChar *q = p + reg->dmin; + + if (q >= end) return 0; /* fail */ while (p < q) p += enclen(reg->enc, p); } } @@ -3228,18 +3235,25 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } else { if (reg->dmax != ONIG_INFINITE_DISTANCE) { - *low = p - reg->dmax; - if (*low > s) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, - *low, (const UChar** )low_prev); - if (low_prev && IS_NULL(*low_prev)) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : s), *low); + if (p - str < reg->dmax) { + *low = (UChar* )str; + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low); } else { - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low); + *low = p - reg->dmax; + if (*low > s) { + *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, + *low, (const UChar** )low_prev); + if (low_prev && IS_NULL(*low_prev)) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : s), *low); + } + else { + if (low_prev) + *low_prev = onigenc_get_prev_char_head(reg->enc, + (pprev ? pprev : str), *low); + } } } } @@ -3493,15 +3507,14 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, start = min_semi_end - reg->anchor_dmax; if (start < end) start = onigenc_get_right_adjust_char_head(reg->enc, str, start); - else { /* match with empty at end */ - start = onigenc_get_prev_char_head(reg->enc, str, end); - } } if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) { range = max_semi_end - reg->anchor_dmin + 1; } - if (start >= range) goto mismatch_no_msa; + if (start > range) goto mismatch_no_msa; + /* If start == range, match with empty at end. + Backward search is used. */ } else { if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) { @@ -3626,9 +3639,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, prev = s; s += enclen(reg->enc, s); - while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { - prev = s; - s += enclen(reg->enc, s); + if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { + prev = s; + s += enclen(reg->enc, s); + } } } while (s < range); goto mismatch; @@ -3779,8 +3794,10 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end, if (rs != 0) return rs; - if (region->end[0] == start - str) - start++; + if (region->end[0] == start - str) { + if (start >= end) break; + start += enclen(reg->enc, start); + } else start = str + region->end[0]; diff --git a/ext/mbstring/oniguruma/src/regint.h b/ext/mbstring/oniguruma/src/regint.h index 7f2076b4ef..9835143fdc 100644 --- a/ext/mbstring/oniguruma/src/regint.h +++ b/ext/mbstring/oniguruma/src/regint.h @@ -71,6 +71,7 @@ #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ +#define DEFAULT_PARSE_DEPTH_LIMIT 4096 #if defined(__GNUC__) # define ARG_UNUSED __attribute__ ((unused)) @@ -200,17 +201,17 @@ } while(0) /* sizeof(OnigCodePoint) */ -#define WORD_ALIGNMENT_SIZE SIZEOF_SIZE_T +#define WORD_ALIGNMENT_SIZE SIZEOF_LONG #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ (pad_size) = WORD_ALIGNMENT_SIZE \ - - ((size_t)(addr) % WORD_ALIGNMENT_SIZE);\ + - ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ } while (0) #define ALIGNMENT_RIGHT(addr) do {\ (addr) += (WORD_ALIGNMENT_SIZE - 1);\ - (addr) -= ((size_t)(addr) % WORD_ALIGNMENT_SIZE);\ + (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\ } while (0) #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ @@ -522,7 +523,7 @@ typedef int RelAddrType; typedef int AbsAddrType; typedef int LengthType; typedef int RepeatNumType; -typedef short int MemNumType; +typedef int MemNumType; typedef short int StateCheckNumType; typedef void* PointerType; @@ -661,11 +662,7 @@ typedef struct { BBuf* mbuf; /* multi-byte info or NULL */ } CClassNode; -#ifdef _WIN64 -typedef __int64 OnigStackIndex; -#else typedef long OnigStackIndex; -#endif typedef struct _OnigStackType { unsigned int type; @@ -751,7 +748,7 @@ extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, #ifdef ONIG_DEBUG_STATISTICS extern void onig_statistics_init P_((void)); -extern void onig_print_statistics P_((FILE* f)); +extern int onig_print_statistics P_((FILE* f)); #endif #endif diff --git a/ext/mbstring/oniguruma/src/regparse.c b/ext/mbstring/oniguruma/src/regparse.c index c9f63cc3fe..8153513202 100644 --- a/ext/mbstring/oniguruma/src/regparse.c +++ b/ext/mbstring/oniguruma/src/regparse.c @@ -41,7 +41,8 @@ OnigSyntaxType OnigSyntaxRuby = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | + ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL ) & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | @@ -108,6 +109,38 @@ onig_warning(const char* s) (*onig_warn)(s); } +#define DEFAULT_MAX_CAPTURE_NUM 32767 + +static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM; + +extern int +onig_set_capture_num_limit(int num) +{ + if (num < 0) return -1; + + MaxCaptureNum = num; + return 0; +} + +static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT; + +extern unsigned int +onig_get_parse_depth_limit(void) +{ + return ParseDepthLimit; +} + +extern int +onig_set_parse_depth_limit(unsigned int depth) +{ + if (depth == 0) + ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT; + else + ParseDepthLimit = depth; + return 0; +} + + static void bbuf_free(BBuf* bbuf) { @@ -521,8 +554,8 @@ i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg) int r = (*(arg->func))(e->name, e->name + e->name_len, e->back_num, - (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), - arg->reg, arg->arg); + (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), + arg->reg, arg->arg); if (r != 0) { arg->ret = r; return ST_STOP; @@ -959,6 +992,7 @@ scan_env_clear(ScanEnv* env) env->curr_max_regnum = 0; env->has_recursion = 0; #endif + env->parse_depth = 0; } static int @@ -968,7 +1002,7 @@ scan_env_add_mem_entry(ScanEnv* env) Node** p; need = env->num_mem + 1; - if (need > ONIG_MAX_CAPTURE_NUM) + if (need > MaxCaptureNum && MaxCaptureNum != 0) return ONIGERR_TOO_MANY_CAPTURES; if (need >= SCANENV_MEMNODES_SIZE) { @@ -1020,7 +1054,7 @@ onig_node_free(Node* node) switch (NTYPE(node)) { case NT_STR: if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { + IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { xfree(NSTR(node)->s); } break; @@ -1639,9 +1673,10 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) bound = x; } - for (high = low, bound = n; high < bound; ) { + high = (to == ~((OnigCodePoint )0)) ? n : low; + for (bound = n; high < bound; ) { x = (high + bound) >> 1; - if (to >= data[x*2] - 1) + if (to + 1 >= data[x*2]) high = x + 1; else bound = x; @@ -2485,8 +2520,8 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, int flag = (c == '-' ? -1 : 1); if (PEND) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - goto end; + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + goto end; } PFETCH(c); if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; @@ -2497,9 +2532,9 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, exist_level = 1; if (!PEND) { - PFETCH(c); - if (c == end_code) - goto end; + PFETCH(c); + if (c == end_code) + goto end; } } @@ -2911,19 +2946,46 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) c2 = PPEEK; if (c2 == '{' && - IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { - PINC; - tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); - - if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { - PFETCH(c2); - if (c2 == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); - } - else - PUNFETCH; - } + IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { + PINC; + tok->type = TK_CHAR_PROPERTY; + tok->u.prop.not = (c == 'P' ? 1 : 0); + + if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { + PFETCH(c2); + if (c2 == '^') { + tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + } + else + PUNFETCH; + } + } + break; + + case 'o': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { + PINC; + num = scan_unsigned_octal_number(&p, end, 11, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + c2 = PPEEK; + if (ONIGENC_IS_CODE_DIGIT(enc, c2)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) { + PINC; + tok->type = TK_CODE_POINT; + tok->base = 8; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } } break; @@ -2986,7 +3048,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) PUNFETCH; prev = p; num = scan_unsigned_octal_number(&p, end, 3, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -3098,7 +3160,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.repeat.upper = 1; greedy_check: if (!PEND && PPEEK_IS('?') && - IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { + IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) { PFETCH(c); tok->u.repeat.greedy = 0; tok->u.repeat.possessive = 0; @@ -3268,6 +3330,31 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) goto end_buf; break; + case 'o': + if (PEND) break; + + prev = p; + if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { + PINC; + num = scan_unsigned_octal_number(&p, end, 11, enc); + if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; + if (!PEND) { + if (ONIGENC_IS_CODE_DIGIT(enc, PPEEK)) + return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; + } + + if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) { + PINC; + tok->type = TK_CODE_POINT; + tok->u.code = (OnigCodePoint )num; + } + else { + /* can't read nothing or invalid format */ + p = prev; + } + } + break; + case 'x': if (PEND) break; @@ -3358,7 +3445,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { prev = p; num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER; if (p == prev) { /* can't read nothing. */ num = 0; /* but, it's not error */ } @@ -3507,7 +3594,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } else { /* string */ p = tok->backp + enclen(enc, tok->backp); - } + } } break; } @@ -3719,8 +3806,7 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, OnigCodePoint prev = 0; for (i = 0; i < n; i++) { - for (j = prev; - j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { + for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) { if (j >= sb_out) { goto sb_end2; } @@ -3994,14 +4080,16 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, } } - *state = CCS_VALUE; + if (*state != CCS_START) + *state = CCS_VALUE; + *type = CCV_CLASS; return 0; } static int -next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, - int* vs_israw, int v_israw, +next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to, + int* from_israw, int to_israw, enum CCVALTYPE intype, enum CCVALTYPE* type, enum CCSTATE* state, ScanEnv* env) { @@ -4010,10 +4098,13 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, switch (*state) { case CCS_VALUE: if (*type == CCV_SB) { - BITSET_SET_BIT(cc->bs, (int )(*vs)); + if (*from > 0xff) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + BITSET_SET_BIT(cc->bs, (int )(*from)); } else if (*type == CCV_CODE_POINT) { - r = add_code_range(&(cc->mbuf), env, *vs, *vs); + r = add_code_range(&(cc->mbuf), env, *from, *from); if (r < 0) return r; } break; @@ -4021,40 +4112,32 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, case CCS_RANGE: if (intype == *type) { if (intype == CCV_SB) { - if (*vs > 0xff || v > 0xff) + if (*from > 0xff || to > 0xff) return ONIGERR_INVALID_CODE_POINT_VALUE; - if (*vs > v) { + if (*from > to) { if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) goto ccs_range_end; else return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } - bitset_set_range(cc->bs, (int )*vs, (int )v); + bitset_set_range(cc->bs, (int )*from, (int )to); } else { - r = add_code_range(&(cc->mbuf), env, *vs, v); + r = add_code_range(&(cc->mbuf), env, *from, to); if (r < 0) return r; } } else { -#if 0 - if (intype == CCV_CODE_POINT && *type == CCV_SB) { -#endif - if (*vs > v) { - if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) - goto ccs_range_end; - else - return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; - } - bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); - r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); - if (r < 0) return r; -#if 0 + if (*from > to) { + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) + goto ccs_range_end; + else + return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; } - else - return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; -#endif + bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff)); + r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to); + if (r < 0) return r; } ccs_range_end: *state = CCS_COMPLETE; @@ -4069,9 +4152,9 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, break; } - *vs_israw = v_israw; - *vs = v; - *type = intype; + *from_israw = to_israw; + *from = to; + *type = intype; return 0; } @@ -4113,8 +4196,11 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, enum CCVALTYPE val_type, in_type; int val_israw, in_israw; - prev_cc = (CClassNode* )NULL; *np = NULL_NODE; + env->parse_depth++; + if (env->parse_depth > ParseDepthLimit) + return ONIGERR_PARSE_DEPTH_LIMIT_OVER; + prev_cc = (CClassNode* )NULL; r = fetch_token_in_cc(tok, src, end, env); if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { neg = 1; @@ -4315,7 +4401,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { CC_ESC_WARN(env, (UChar* )"-"); - goto any_char_in; /* [0-9-a] is allowed as [0-9\-a] */ + goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ } r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; goto err; @@ -4329,9 +4415,9 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, r = parse_char_class(&anode, tok, &p, end, env); if (r != 0) { - onig_node_free(anode); - goto cc_open_err; - } + onig_node_free(anode); + goto cc_open_err; + } acc = NCCLASS(anode); r = or_cclass(cc, acc, env->enc); @@ -4420,6 +4506,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } } *src = p; + env->parse_depth--; return 0; err: @@ -4625,9 +4712,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, r = parse_subexp(&target, tok, term, &p, end, env); env->option = prev; if (r < 0) { - onig_node_free(target); - return r; - } + onig_node_free(target); + return r; + } *np = node_new_option(option); CHECK_NULL_RETURN_MEMERR(*np); NENCLOSE(*np)->target = target; @@ -5253,8 +5340,8 @@ parse_branch(Node** top, OnigToken* tok, int term, while (r != TK_EOT && r != term && r != TK_ALT) { r = parse_exp(&node, tok, term, src, end, env); if (r < 0) { - onig_node_free(node); - return r; + onig_node_free(node); + return r; } if (NTYPE(node) == NT_LIST) { @@ -5281,6 +5368,9 @@ parse_subexp(Node** top, OnigToken* tok, int term, Node *node, **headp; *top = NULL; + env->parse_depth++; + if (env->parse_depth > ParseDepthLimit) + return ONIGERR_PARSE_DEPTH_LIMIT_OVER; r = parse_branch(&node, tok, term, src, end, env); if (r < 0) { onig_node_free(node); @@ -5317,6 +5407,7 @@ parse_subexp(Node** top, OnigToken* tok, int term, return ONIGERR_PARSER_BUG; } + env->parse_depth--; return r; } diff --git a/ext/mbstring/oniguruma/src/regparse.h b/ext/mbstring/oniguruma/src/regparse.h index 9e366fed7c..c9d1fe8a6e 100644 --- a/ext/mbstring/oniguruma/src/regparse.h +++ b/ext/mbstring/oniguruma/src/regparse.h @@ -306,6 +306,7 @@ typedef struct { int curr_max_regnum; int has_recursion; #endif + unsigned int parse_depth; } ScanEnv; diff --git a/ext/mbstring/oniguruma/src/regsyntax.c b/ext/mbstring/oniguruma/src/regsyntax.c index ade5b55f77..e751e24f2e 100644 --- a/ext/mbstring/oniguruma/src/regsyntax.c +++ b/ext/mbstring/oniguruma/src/regsyntax.c @@ -168,7 +168,8 @@ OnigSyntaxType OnigSyntaxJava = { OnigSyntaxType OnigSyntaxPerl = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | + ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL ) & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | @@ -192,7 +193,8 @@ OnigSyntaxType OnigSyntaxPerl = { OnigSyntaxType OnigSyntaxPerl_NG = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | - ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL | + ONIG_SYN_OP_ESC_CONTROL_CHARS | ONIG_SYN_OP_ESC_C_CONTROL ) & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | diff --git a/ext/mbstring/oniguruma/src/regversion.c b/ext/mbstring/oniguruma/src/regversion.c index 0df82e1c67..245a001b57 100644 --- a/ext/mbstring/oniguruma/src/regversion.c +++ b/ext/mbstring/oniguruma/src/regversion.c @@ -49,7 +49,7 @@ onig_copyright(void) static char s[58]; xsnprintf(s, sizeof(s), - "Oniguruma %d.%d.%d : Copyright (C) 2002-2017 K.Kosako", + "Oniguruma %d.%d.%d : Copyright (C) 2002-2016 K.Kosako", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY); diff --git a/ext/mbstring/oniguruma/src/sjis.c b/ext/mbstring/oniguruma/src/sjis.c index dd41897df5..3378474827 100644 --- a/ext/mbstring/oniguruma/src/sjis.c +++ b/ext/mbstring/oniguruma/src/sjis.c @@ -2,7 +2,7 @@ sjis.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/st.c b/ext/mbstring/oniguruma/src/st.c index 022880ae36..d4fe867f9e 100644 --- a/ext/mbstring/oniguruma/src/st.c +++ b/ext/mbstring/oniguruma/src/st.c @@ -130,11 +130,13 @@ static int collision = 0; static int init_st = 0; static void -stat_col() +stat_col(void) { - FILE *f = fopen("/tmp/col", "w"); - fprintf(f, "collision: %d\n", collision); - fclose(f); + FILE *f = fopen("/tmp/col", "w"); + if (f == 0) return ; + + (void) fprintf(f, "collision: %d\n", collision); + (void) fclose(f); } #endif @@ -155,10 +157,16 @@ st_init_table_with_size(type, size) size = new_size(size); /* round up to prime number */ tbl = alloc(st_table); + if (tbl == 0) return 0; + tbl->type = type; tbl->num_entries = 0; tbl->num_bins = size; tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); + if (tbl->bins == 0) { + free(tbl); + return 0; + } return tbl; } @@ -320,6 +328,9 @@ rehash(table) new_num_bins = new_size(old_num_bins+1); new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*)); + if (new_bins == 0) { + return ; + } for(i = 0; i < old_num_bins; i++) { ptr = table->bins[i]; diff --git a/ext/mbstring/oniguruma/src/unicode.c b/ext/mbstring/oniguruma/src/unicode.c index ca7f90bf80..8812ca2fd0 100644 --- a/ext/mbstring/oniguruma/src/unicode.c +++ b/ext/mbstring/oniguruma/src/unicode.c @@ -2,7 +2,7 @@ unicode.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/unicode_fold1_key.c b/ext/mbstring/oniguruma/src/unicode_fold1_key.c index 6b390fc40e..21512118e6 100644 --- a/ext/mbstring/oniguruma/src/unicode_fold1_key.c +++ b/ext/mbstring/oniguruma/src/unicode_fold1_key.c @@ -1,7 +1,7 @@ /* This file was converted by gperf_fold_key_conv.py from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold1_key unicode_fold1_key.gperf */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold1_key unicode_fold1_key.gperf */ /* Computed positions: -k'1-3' */ @@ -60,6 +60,12 @@ hash(OnigCodePoint codes[]) return asso_values[(unsigned char)onig_codes_byte_at(codes, 2)+3] + asso_values[(unsigned char)onig_codes_byte_at(codes, 1)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 0)]; } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif int unicode_fold1_key(OnigCodePoint codes[]) { @@ -2534,7 +2540,7 @@ unicode_fold1_key(OnigCodePoint codes[]) { int key = hash(codes); - if (key <= MAX_HASH_VALUE) + if (key <= MAX_HASH_VALUE && key >= 0) { int index = wordlist[key]; diff --git a/ext/mbstring/oniguruma/src/unicode_fold2_key.c b/ext/mbstring/oniguruma/src/unicode_fold2_key.c index 74e9876e75..07cfa4ec5e 100644 --- a/ext/mbstring/oniguruma/src/unicode_fold2_key.c +++ b/ext/mbstring/oniguruma/src/unicode_fold2_key.c @@ -1,7 +1,7 @@ /* This file was converted by gperf_fold_key_conv.py from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold2_key unicode_fold2_key.gperf */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold2_key unicode_fold2_key.gperf */ /* Computed positions: -k'3,6' */ @@ -60,6 +60,12 @@ hash(OnigCodePoint codes[]) return asso_values[(unsigned char)onig_codes_byte_at(codes, 5)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 2)]; } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif int unicode_fold2_key(OnigCodePoint codes[]) { @@ -189,7 +195,7 @@ unicode_fold2_key(OnigCodePoint codes[]) { int key = hash(codes); - if (key <= MAX_HASH_VALUE) + if (key <= MAX_HASH_VALUE && key >= 0) { int index = wordlist[key]; diff --git a/ext/mbstring/oniguruma/src/unicode_fold3_key.c b/ext/mbstring/oniguruma/src/unicode_fold3_key.c index 8095b1c497..1b4d9d41b7 100644 --- a/ext/mbstring/oniguruma/src/unicode_fold3_key.c +++ b/ext/mbstring/oniguruma/src/unicode_fold3_key.c @@ -1,7 +1,7 @@ /* This file was converted by gperf_fold_key_conv.py from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold3_key unicode_fold3_key.gperf */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold3_key unicode_fold3_key.gperf */ /* Computed positions: -k'3,6,9' */ @@ -60,6 +60,12 @@ hash(OnigCodePoint codes[]) return asso_values[(unsigned char)onig_codes_byte_at(codes, 8)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 5)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 2)]; } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif int unicode_fold3_key(OnigCodePoint codes[]) { @@ -99,7 +105,7 @@ unicode_fold3_key(OnigCodePoint codes[]) { int key = hash(codes); - if (key <= MAX_HASH_VALUE) + if (key <= MAX_HASH_VALUE && key >= 0) { int index = wordlist[key]; diff --git a/ext/mbstring/oniguruma/src/unicode_unfold_key.c b/ext/mbstring/oniguruma/src/unicode_unfold_key.c index c6261d2723..15302caada 100644 --- a/ext/mbstring/oniguruma/src/unicode_unfold_key.c +++ b/ext/mbstring/oniguruma/src/unicode_unfold_key.c @@ -1,7 +1,7 @@ /* This file was converted by gperf_unfold_key_conv.py from gperf output file. */ -/* ANSI-C code produced by gperf version 3.0.3 */ -/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N unicode_unfold_key unicode_unfold_key.gperf */ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N unicode_unfold_key unicode_unfold_key.gperf */ /* Computed positions: -k'1-3' */ @@ -64,6 +64,12 @@ hash(OnigCodePoint codes[]) return asso_values[(unsigned char)onig_codes_byte_at(codes, 2)+35] + asso_values[(unsigned char)onig_codes_byte_at(codes, 1)+1] + asso_values[(unsigned char)onig_codes_byte_at(codes, 0)]; } +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif const struct ByUnfoldKey * unicode_unfold_key(OnigCodePoint code) { @@ -2840,11 +2846,11 @@ unicode_unfold_key(OnigCodePoint code) { int key = hash(&code); - if (key <= MAX_HASH_VALUE) + if (key <= MAX_HASH_VALUE && key >= 0) { OnigCodePoint gcode = wordlist[key].code; - if (code == gcode) + if (code == gcode && wordlist[key].index >= 0) return &wordlist[key]; } } diff --git a/ext/mbstring/oniguruma/src/utf16_be.c b/ext/mbstring/oniguruma/src/utf16_be.c index 74c65cdaf4..f220cca228 100644 --- a/ext/mbstring/oniguruma/src/utf16_be.c +++ b/ext/mbstring/oniguruma/src/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/utf16_le.c b/ext/mbstring/oniguruma/src/utf16_le.c index 4a0162f6b1..89bc72f05d 100644 --- a/ext/mbstring/oniguruma/src/utf16_le.c +++ b/ext/mbstring/oniguruma/src/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/utf32_be.c b/ext/mbstring/oniguruma/src/utf32_be.c index 911b8eae7f..d0c7f39703 100644 --- a/ext/mbstring/oniguruma/src/utf32_be.c +++ b/ext/mbstring/oniguruma/src/utf32_be.c @@ -2,7 +2,7 @@ utf32_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/utf32_le.c b/ext/mbstring/oniguruma/src/utf32_le.c index 4b31063cd7..33200d104e 100644 --- a/ext/mbstring/oniguruma/src/utf32_le.c +++ b/ext/mbstring/oniguruma/src/utf32_le.c @@ -2,7 +2,7 @@ utf32_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/src/utf8.c b/ext/mbstring/oniguruma/src/utf8.c index c8cfad33d7..219b7eacea 100644 --- a/ext/mbstring/oniguruma/src/utf8.c +++ b/ext/mbstring/oniguruma/src/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2016 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without -- 2.50.1