From: K.Kosako <kosako@sofnec.co.jp>
Date: Mon, 25 Mar 2019 05:15:41 +0000 (+0900)
Subject: update doc/RE for text segment mode
X-Git-Tag: v6.9.2_rc1~43
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=109e68caceba225cce7abab0df35d4c9284d5ae6;p=onig

update doc/RE for text segment mode
---

diff --git a/doc/RE b/doc/RE
index 8cbfb48..24b704b 100644
--- a/doc/RE
+++ b/doc/RE
@@ -1,4 +1,4 @@
-Oniguruma Regular Expressions Version 6.9.2    2019/03/13
+Oniguruma Regular Expressions Version 6.9.2    2019/03/25
 
 syntax: ONIG_SYNTAX_ONIGURUMA (default)
 
@@ -81,15 +81,23 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
 
   \O       true anychar      (?m:.)    (* original function)
 
-  \X       Extended Grapheme Cluster    (?>\O(?:\Y\O)*)
+  \X       Text Segment    (?>\O(?:\Y\O)*)
 
-           \X doesn't check whether matching start position is boundary.
-           Write as \y\X if you want to ensure it.
+           The meaning of this operator changes depending on the setting of
+           the option (?y{..}).
 
-           Unicode case:
-             See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+           \X doesn't check whether matching start position is boundary or not.
+           Please write as \y\X if you want to ensure it.
+
+           [Extended Grapheme Cluster mode] (default)
+             Unicode case:
+               See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
 
-           Not Unicode:   (?>\r\n|\O)
+             Not Unicode:   (?>\r\n|\O)
+
+           [Word mode]
+             Currently, this mode is supported in Unicode only.
+             See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
 
 
   Character Property
@@ -126,7 +134,7 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
 
   reluctant
 
-    ??      1 or 0 times
+    ??      0 or 1 times
     *?      0 or more times
     +?      1 or more times
     {n,m}?  at least n but not more than m times
@@ -151,8 +159,6 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
   $       end of the line
   \b      word boundary
   \B      non-word boundary
-  \y      Extended Grapheme Cluster boundary
-  \Y      Extended Grapheme Cluster non-boundary
 
   \A      beginning of string
   \Z      end of string, or before newline at the end
@@ -161,6 +167,24 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
   \K      keep (keep start position of the result string)
 
 
+  \y      Text Segment boundary
+  \Y      Text Segment non-boundary
+
+          The meaning of these operators(\y, \Y) changes depending on the setting
+          of the option (?y{..}).
+
+          [Extended Grapheme Cluster mode] (default)
+            Unicode case:
+              See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+
+            Not Unicode:
+              All positions except between \r and \n.
+
+          [Word mode]
+            Currently, this mode is supported in Unicode only.
+            See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+
+
 
 6. Character class
 
@@ -222,7 +246,7 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
 
   (?#...)            comment
 
-  (?imxWDSP-imxWDSP:subexp)  option on/off for subexp
+  (?imxWDSPy-imxWDSP:subexp)  option on/off for subexp
 
                                i: ignore case
                                m: multi-line (dot (.) also matches newline)
@@ -235,7 +259,12 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
                                   (alnum, alpha, blank, cntrl, digit, graph,
                                    lower, print, punct, space, upper, xdigit, word)
 
-  (?imxWDSP-imxWDSP)  isolated option
+                               y{?}: Text Segment mode
+                                  y{g}: Extended Grapheme Cluster mode (default)
+                                  y{w}: Word mode
+                                  See [Unicode Standard Annex #29]
+
+  (?imxWDSPy-imxWDSP)  isolated option
 
                       * It makes a group to the next ')' or end of the pattern.
                         /ab(?i)c|def|gh/ == /ab(?i:c|def|gh)/
diff --git a/doc/RE.ja b/doc/RE.ja
index 67dc150..6d765a6 100644
--- a/doc/RE.ja
+++ b/doc/RE.ja
@@ -1,4 +1,4 @@
-é¬¼è» æ­£è¦è¡¨ç¾ Version 6.9.2    2019/03/13
+é¬¼è» æ­£è¦è¡¨ç¾ Version 6.9.2    2019/03/25
 
 ä½¿ç¨ææ³: ONIG_SYNTAX_ONIGURUMA (æ¢å®å¤)
 
@@ -81,15 +81,22 @@
 
   \O       çä»»ææå­  (?m:.)      (* åä½)
 
-  \X       æ¡å¼µæ¸è¨ç´ æ¿     (?>\O(?:\Y\O)*)
+  \X       æç« åºå    (?>\O(?:\Y\O)*)
 
-           \Xã¯ç§åã®éå§ä½ç½®ãæ¡å¼µæ¸è¨ç´ æ¿ã®å¢çãã©ãããç¢ºèªããªãã
+           ãã®æ¼ç®å­ã®æå³ã¯ããªãã·ã§ã³ (?y{..})ã®è¨­å®ã«ãã£ã¦å¤åããã
+
+           \Xã¯ç§åã®éå§ä½ç½®ãåºåã®å¢çãã©ãããç¢ºèªããªãã
            ãããç¢ºå®ã«ããããã°ã\y\Xã¨æ¸ãã°è¯ãã
 
-           Unicodeã®å ´å:
-             åç§ [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+           [æ¡å¼µæ¸è¨ç´ æ¿-ç¶æã®ã¨ã] (ããã©ã«ã)
+             Unicodeã®å ´å:
+               åç§ [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
 
-           Unicodeä»¥å¤ã®å ´å:   (?>\r\n|\O)
+             Unicodeä»¥å¤ã®å ´å:   (?>\r\n|\O)
+
+           [åèª-ç¶æã®ã¨ã]
+             ç¾å¨ãUnicodeãããµãã¼ããã¦ããªãã
+             åç§ [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
 
 
   Character Property
@@ -126,7 +133,7 @@
 
   ç¡æ¬²
 
-    ??      ä¸åã¾ãã¯é¶å
+    ??      é¶åã¾ãã¯ä¸å
     *?      é¶åä»¥ä¸
     +?      ä¸åä»¥ä¸
     {n,m}?  nåä»¥ä¸måä»¥ä¸
@@ -150,8 +157,6 @@
   $       è¡æ«
   \b      åèªå¢ç
   \B      éåèªå¢ç
-  \y      æ¡å¼µæ¸è¨ç´ æ¿ å¢ç
-  \Y      æ¡å¼µæ¸è¨ç´ æ¿ éå¢ç
 
   \A      æå­ååé ­
   \Z      æå­åæ«å°¾ãã¾ãã¯æå­åæ«å°¾ã®æ¹è¡ã®ç´å
@@ -160,6 +165,23 @@
   \K      ä¿æ (çµæã®éå§ä½ç½®ããã®ä½ç½®ã«ä¿ã¤)
 
 
+  \y      æç« åºå å¢ç
+  \Y      æç«  éå¢ç
+
+          ãã®æ¼ç®å­ã®æå³ã¯ããªãã·ã§ã³ (?y{..})ã®è¨­å®ã«ãã£ã¦å¤åããã
+
+           [æ¡å¼µæ¸è¨ç´ æ¿-ç¶æã®ã¨ã] (ããã©ã«ã)
+             Unicodeã®å ´å:
+               åç§ [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+
+             Unicodeä»¥å¤ã®å ´å:
+               \rã¨\nã®éãé¤ãå¨ã¦ã®ä½ç½®
+
+           [åèª-ç¶æã®ã¨ã]
+             ç¾å¨ãUnicodeãããµãã¼ããã¦ããªãã
+             åç§ [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+
+
 
 6. æå­éå
 
@@ -220,7 +242,7 @@
 
   (?#...)           æ³¨é
 
-  (?imxWDSP-imxWDSP:å¼)   å¼ãªãã·ã§ã³
+  (?imxWDSPy-imxWDSP:å¼)   å¼ãªãã·ã§ã³
 
                             i: å¤§æå­å°æå­ç§å
                             m: è¤æ°è¡
@@ -233,7 +255,12 @@
                                (alnum, alpha, blank, cntrl, digit, graph,
                                 lower, print, punct, space, upper, xdigit, word)
 
-  (?imxWDSP-imxWDSP)  å­¤ç«ãªãã·ã§ã³
+                            y{?}: æç« åºåç¶æ
+                               y{g}: æ¡å¼µæ¸è¨ç´ æ¿-ç¶æ (ããã©ã«ã)
+                               y{w}: åèª-ç¶æ
+                               åç§ [Unicode Standard Annex #29]
+
+  (?imxWDSPy-imxWDSP)  å­¤ç«ãªãã·ã§ã³
 
                       * ããã¯æ¬¡ã®')'ã¾ãã¯ãã¿ã¼ã³ã®çµããã¾ã§ã®ã°ã«ã¼ããå½¢æãã
                         /ab(?i)c|def|gh/ == /ab(?i:c|def|gh)/