update doc/RE for text segment mode

author K.Kosako <kosako@sofnec.co.jp>

Mon, 25 Mar 2019 05:15:41 +0000 (14:15 +0900)

committer K.Kosako <kosako@sofnec.co.jp>

Mon, 25 Mar 2019 05:15:41 +0000 (14:15 +0900)
author K.Kosako <kosako@sofnec.co.jp>
Mon, 25 Mar 2019 05:15:41 +0000 (14:15 +0900)
committer K.Kosako <kosako@sofnec.co.jp>
Mon, 25 Mar 2019 05:15:41 +0000 (14:15 +0900)
diff --git a/doc/RE b/doc/RE

index 8cbfb483f2b521b15f3df848aecccd05330619d0..24b704beeeaca750ea7c2ca3007f262e39ae8109 100644 (file)
--- a/doc/RE
+++ b/doc/RE
@@ -1,4 +1,4 @@
-Oniguruma Regular Expressions Version 6.9.2    2019/03/13
+Oniguruma Regular Expressions Version 6.9.2    2019/03/25
  
  syntax: ONIG_SYNTAX_ONIGURUMA (default)
  
@@ -81,15 +81,23 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
  
    \O       true anychar      (?m:.)    (* original function)
  
-  \X       Extended Grapheme Cluster    (?>\O(?:\Y\O)*)
+  \X       Text Segment    (?>\O(?:\Y\O)*)
  
-           \X doesn't check whether matching start position is boundary.
-           Write as \y\X if you want to ensure it.
+           The meaning of this operator changes depending on the setting of
+           the option (?y{..}).
  
-           Unicode case:
-             See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+           \X doesn't check whether matching start position is boundary or not.
+           Please write as \y\X if you want to ensure it.
+
+           [Extended Grapheme Cluster mode] (default)
+             Unicode case:
+               See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
  
-           Not Unicode:   (?>\r\n|\O)
+             Not Unicode:   (?>\r\n|\O)
+
+           [Word mode]
+             Currently, this mode is supported in Unicode only.
+             See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
  
  
    Character Property
@@ -126,7 +134,7 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
  
    reluctant
  
-    ??      1 or 0 times
+    ??      0 or 1 times
      *?      0 or more times
      +?      1 or more times
      {n,m}?  at least n but not more than m times
@@ -151,8 +159,6 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
    $       end of the line
    \b      word boundary
    \B      non-word boundary
-  \y      Extended Grapheme Cluster boundary
-  \Y      Extended Grapheme Cluster non-boundary
  
    \A      beginning of string
    \Z      end of string, or before newline at the end
@@ -161,6 +167,24 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
    \K      keep (keep start position of the result string)
  
  
+  \y      Text Segment boundary
+  \Y      Text Segment non-boundary
+
+          The meaning of these operators(\y, \Y) changes depending on the setting
+          of the option (?y{..}).
+
+          [Extended Grapheme Cluster mode] (default)
+            Unicode case:
+              See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+
+            Not Unicode:
+              All positions except between \r and \n.
+
+          [Word mode]
+            Currently, this mode is supported in Unicode only.
+            See [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+
+
  
  6. Character class
  
@@ -222,7 +246,7 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
  
    (?#...)            comment
  
-  (?imxWDSP-imxWDSP:subexp)  option on/off for subexp
+  (?imxWDSPy-imxWDSP:subexp)  option on/off for subexp
  
                                 i: ignore case
                                 m: multi-line (dot (.) also matches newline)
@@ -235,7 +259,12 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
                                    (alnum, alpha, blank, cntrl, digit, graph,
                                     lower, print, punct, space, upper, xdigit, word)
  
-  (?imxWDSP-imxWDSP)  isolated option
+                               y{?}: Text Segment mode
+                                  y{g}: Extended Grapheme Cluster mode (default)
+                                  y{w}: Word mode
+                                  See [Unicode Standard Annex #29]
+
+  (?imxWDSPy-imxWDSP)  isolated option
  
                        * It makes a group to the next ')' or end of the pattern.
                          /ab(?i)c|def|gh/ == /ab(?i:c|def|gh)/
diff --git a/doc/RE.ja b/doc/RE.ja

index 67dc150fa7be13cdfab44343492a87686a96c0e8..6d765a6c4a89109179fe7316a519de868132407d 100644 (file)
--- a/doc/RE.ja
+++ b/doc/RE.ja
@@ -1,4 +1,4 @@
-鬼車 正規表現 Version 6.9.2    2019/03/13
+鬼車 正規表現 Version 6.9.2    2019/03/25
  
  使用文法: ONIG_SYNTAX_ONIGURUMA (既定値)
  
@@ -81,15 +81,22 @@
  
    \O       真任意文字  (?m:.)      (* 原作)
  
-  \X       æ\8b¡å¼µæ\9b¸è¨\98ç´ æ\88¿     (?>\O(?:\Y\O)*)
+  \X       æ\96\87ç« å\8cºå\88\86    (?>\O(?:\Y\O)*)
  
-           \Xは照合の開始位置が拡張書記素房の境界かどうかを確認しない。
+           この演算子の意味は、オプション (?y{..})の設定によって変化する。
+
+           \Xは照合の開始位置が区分の境界かどうかを確認しない。
             それを確実にしたければ、\y\Xと書けば良い。
  
-           Unicodeの場合:
-             参照 [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+           [拡張書記素房-状態のとき] (デフォルト)
+             Unicodeの場合:
+               参照 [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
  
-           Unicode以外の場合:   (?>\r\n|\O)
+             Unicode以外の場合:   (?>\r\n|\O)
+
+           [単語-状態のとき]
+             現在、Unicodeしかサポートしていない。
+             参照 [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
  
  
    Character Property
@@ -126,7 +133,7 @@
  
    無欲
  
-    ??      一回または零回
+    ??      零回または一回
      *?      零回以上
      +?      一回以上
      {n,m}?  n回以上m回以下
@@ -150,8 +157,6 @@
    $       行末
    \b      単語境界
    \B      非単語境界
-  \y      拡張書記素房 境界
-  \Y      拡張書記素房 非境界
  
    \A      文字列先頭
    \Z      文字列末尾、または文字列末尾の改行の直前
@@ -160,6 +165,23 @@
    \K      保持 (結果の開始位置をこの位置に保つ)
  
  
+  \y      文章区分 境界
+  \Y      文章 非境界
+
+          この演算子の意味は、オプション (?y{..})の設定によって変化する。
+
+           [拡張書記素房-状態のとき] (デフォルト)
+             Unicodeの場合:
+               参照 [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+
+             Unicode以外の場合:
+               \rと\nの間を除く全ての位置
+
+           [単語-状態のとき]
+             現在、Unicodeしかサポートしていない。
+             参照 [Unicode Standard Annex #29: http://unicode.org/reports/tr29/]
+
+
  
  6. 文字集合
  
@@ -220,7 +242,7 @@
  
    (?#...)           注釈
  
-  (?imxWDSP-imxWDSP:式)   式オプション
+  (?imxWDSPy-imxWDSP:式)   式オプション
  
                              i: 大文字小文字照合
                              m: 複数行
@@ -233,7 +255,12 @@
                                 (alnum, alpha, blank, cntrl, digit, graph,
                                  lower, print, punct, space, upper, xdigit, word)
  
-  (?imxWDSP-imxWDSP)  孤立オプション
+                            y{?}: 文章区分状態
+                               y{g}: 拡張書記素房-状態 (デフォルト)
+                               y{w}: 単語-状態
+                               参照 [Unicode Standard Annex #29]
+
+  (?imxWDSPy-imxWDSP)  孤立オプション
  
                        * これは次の')'またはパターンの終わりまでのグループを形成する
                          /ab(?i)c|def|gh/ == /ab(?i:c|def|gh)/
author	K.Kosako <kosako@sofnec.co.jp>
	Mon, 25 Mar 2019 05:15:41 +0000 (14:15 +0900)
committer	K.Kosako <kosako@sofnec.co.jp>
	Mon, 25 Mar 2019 05:15:41 +0000 (14:15 +0900)