Repair bug in regexp split performance improvements.

author Andrew Gierth <rhodiumtoad@postgresql.org>

Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)

committer Andrew Gierth <rhodiumtoad@postgresql.org>

Wed, 12 Sep 2018 18:45:13 +0000 (19:45 +0100)
author Andrew Gierth <rhodiumtoad@postgresql.org>
Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)
committer Andrew Gierth <rhodiumtoad@postgresql.org>
Wed, 12 Sep 2018 18:45:13 +0000 (19:45 +0100)
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c

index e3a852769f6b9643fdfad45679f59098e097dca5..7463c0dd2fff8f87efb8635f0eddc83b2706ba5d 100644 (file)
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -936,6 +936,7 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
         int                     array_len;
         int                     array_idx;
         int                     prev_match_end;
+       int                     prev_valid_match_end;
         int                     start_search;
         int                     maxlen = 0;             /* largest fetch length in characters */
  
@@ -991,6 +992,7 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
  
         /* search for the pattern, perhaps repeatedly */
         prev_match_end = 0;
+       prev_valid_match_end = 0;
         start_search = 0;
         while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
                                                         pmatch_len, pmatch))
@@ -1043,13 +1045,15 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
                         matchctx->nmatches++;
  
                         /*
-                        * check length of unmatched portion between end of previous match
-                        * and start of current one
+                        * check length of unmatched portion between end of previous valid
+                        * (nondegenerate, or degenerate but not ignored) match and start
+                        * of current one
                          */
                         if (fetching_unmatched &&
                                 pmatch[0].rm_so >= 0 &&
-                               (pmatch[0].rm_so - prev_match_end) > maxlen)
-                               maxlen = (pmatch[0].rm_so - prev_match_end);
+                               (pmatch[0].rm_so - prev_valid_match_end) > maxlen)
+                               maxlen = (pmatch[0].rm_so - prev_valid_match_end);
+                       prev_valid_match_end = pmatch[0].rm_eo;
                 }
                 prev_match_end = pmatch[0].rm_eo;
  
@@ -1075,8 +1079,8 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
          * input string
          */
         if (fetching_unmatched &&
-               (wide_len - prev_match_end) > maxlen)
-               maxlen = (wide_len - prev_match_end);
+               (wide_len - prev_valid_match_end) > maxlen)
+               maxlen = (wide_len - prev_valid_match_end);
  
         /*
          * Keep a note of the end position of the string for the benefit of
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out

index 19708c32fdd30b9566050b55ef82843b72989923..8e7470d1cbbfd9173e3303ec6f06a81b01e2ef69 100644 (file)
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -674,6 +674,24 @@ SELECT regexp_split_to_array('123456','.');
   {"","","","","","",""}
  (1 row)
  
+SELECT regexp_split_to_array('123456','');
+ regexp_split_to_array 
+-----------------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT regexp_split_to_array('123456','(?:)');
+ regexp_split_to_array 
+-----------------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT regexp_split_to_array('1','');
+ regexp_split_to_array 
+-----------------------
+ {1}
+(1 row)
+
  -- errors
  SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
  ERROR:  invalid regexp option: "z"
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql

index f9cfaeb44ac2f55d8f5c859a0951aaba9e1dd381..5e39458bd2234d5e4b13016115bca137f01831db 100644 (file)
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -188,6 +188,9 @@ SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', 'nom
  SELECT regexp_split_to_array('123456','1');
  SELECT regexp_split_to_array('123456','6');
  SELECT regexp_split_to_array('123456','.');
+SELECT regexp_split_to_array('123456','');
+SELECT regexp_split_to_array('123456','(?:)');
+SELECT regexp_split_to_array('1','');
  -- errors
  SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
  SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'iz');
author	Andrew Gierth <rhodiumtoad@postgresql.org>
	Wed, 12 Sep 2018 18:31:06 +0000 (19:31 +0100)
committer	Andrew Gierth <rhodiumtoad@postgresql.org>
	Wed, 12 Sep 2018 18:45:13 +0000 (19:45 +0100)
src/backend/utils/adt/regexp.c		patch \| blob \| history
src/test/regress/expected/strings.out		patch \| blob \| history
src/test/regress/sql/strings.sql		patch \| blob \| history