]> granicus.if.org Git - postgresql/commitdiff
Fix quoted-substring handling in format parsing for to_char/to_number/etc.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 18 Nov 2017 17:16:37 +0000 (12:16 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 18 Nov 2017 17:16:37 +0000 (12:16 -0500)
This code evidently intended to treat backslash as an escape character
within double-quoted substrings, but it was sufficiently confused that
cases like ..."foo\\"... did not work right: the second backslash
managed to quote the double-quote after it, despite being quoted itself.
Rewrite to get that right, while preserving the existing behavior
outside double-quoted substrings, which is that backslash isn't special
except in the combination \".

Comparing to Oracle, it seems that their version of to_char() for
timestamps allows literal alphanumerics only within double quotes, while
non-alphanumerics are allowed outside quotes; backslashes aren't special
anywhere; there is no way at all to emit a literal double quote.
(Bizarrely, their to_char() for numbers is different; it doesn't allow
literal text at all AFAICT.)  The fact that they don't treat backslash
as special justifies our existing behavior for backslash outside double
quotes.  I considered making backslash inside double quotes act the same
way (ie, special only if before "), which in a green field would be a
more consistent behavior.  But that would likely break more existing SQL
code than what this patch does.

Add some test cases illustrating this behavior.  (Only the last new
case actually changes behavior in this commit.)

Little of this behavior was documented, either, so fix that.

Discussion: https://postgr.es/m/3626.1510949486@sss.pgh.pa.us

doc/src/sgml/func.sgml
src/backend/utils/adt/formatting.c
src/test/regress/expected/numeric.out
src/test/regress/sql/numeric.sql

index 35a845c4001cfd4de0da381b44e8408f6e4693c2..698daf69ea6c7d17f21ff027aaca938c717c65d2 100644 (file)
@@ -6196,6 +6196,11 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
        If you want to have a double quote in the output you must
        precede it with a backslash, for example <literal>'\"YYYY
        Month\"'</literal>. <!-- "" font-lock sanity :-) -->
+       Backslashes are not otherwise special outside of double-quoted
+       strings.  Within a double-quoted string, a backslash causes the
+       next character to be taken literally, whatever it is (but this
+       has no special effect unless the next character is a double quote
+       or another backslash).
       </para>
      </listitem>
 
index 5afc293a5a0d93e91fb2d5a9600e51944b5ad025..cb0dbf748e501f0ba002efe37d48e9e9c4df12a8 100644 (file)
@@ -1227,11 +1227,7 @@ static void
 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
                         const KeySuffix *suf, const int *index, int ver, NUMDesc *Num)
 {
-       const KeySuffix *s;
        FormatNode *n;
-       int                     node_set = 0,
-                               suffix,
-                               last = 0;
 
 #ifdef DEBUG_TO_FROM_CHAR
        elog(DEBUG_elog_output, "to_char/number(): run parser");
@@ -1241,12 +1237,14 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
 
        while (*str)
        {
-               suffix = 0;
+               int                     suffix = 0;
+               const KeySuffix *s;
 
                /*
                 * Prefix
                 */
-               if (ver == DCH_TYPE && (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
+               if (ver == DCH_TYPE &&
+                       (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
                {
                        suffix |= s->id;
                        if (s->len)
@@ -1259,8 +1257,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
                if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
                {
                        n->type = NODE_TYPE_ACTION;
-                       n->suffix = 0;
-                       node_set = 1;
+                       n->suffix = suffix;
                        if (n->key->len)
                                str += n->key->len;
 
@@ -1273,71 +1270,56 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
                        /*
                         * Postfix
                         */
-                       if (ver == DCH_TYPE && *str && (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
+                       if (ver == DCH_TYPE && *str &&
+                               (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
                        {
-                               suffix |= s->id;
+                               n->suffix |= s->id;
                                if (s->len)
                                        str += s->len;
                        }
+
+                       n++;
                }
                else if (*str)
                {
                        /*
-                        * Special characters '\' and '"'
+                        * Process double-quoted literal string, if any
                         */
-                       if (*str == '"' && last != '\\')
+                       if (*str == '"')
                        {
-                               int                     x = 0;
-
                                while (*(++str))
                                {
-                                       if (*str == '"' && x != '\\')
+                                       if (*str == '"')
                                        {
                                                str++;
                                                break;
                                        }
-                                       else if (*str == '\\' && x != '\\')
-                                       {
-                                               x = '\\';
-                                               continue;
-                                       }
+                                       /* backslash quotes the next character, if any */
+                                       if (*str == '\\' && *(str + 1))
+                                               str++;
                                        n->type = NODE_TYPE_CHAR;
                                        n->character = *str;
                                        n->key = NULL;
                                        n->suffix = 0;
-                                       ++n;
-                                       x = *str;
+                                       n++;
                                }
-                               node_set = 0;
-                               suffix = 0;
-                               last = 0;
                        }
-                       else if (*str && *str == '\\' && last != '\\' && *(str + 1) == '"')
-                       {
-                               last = *str;
-                               str++;
-                       }
-                       else if (*str)
+                       else
                        {
+                               /*
+                                * Outside double-quoted strings, backslash is only special if
+                                * it immediately precedes a double quote.
+                                */
+                               if (*str == '\\' && *(str + 1) == '"')
+                                       str++;
                                n->type = NODE_TYPE_CHAR;
                                n->character = *str;
                                n->key = NULL;
-                               node_set = 1;
-                               last = 0;
+                               n->suffix = 0;
+                               n++;
                                str++;
                        }
                }
-
-               /* end */
-               if (node_set)
-               {
-                       if (n->type == NODE_TYPE_ACTION)
-                               n->suffix = suffix;
-                       ++n;
-
-                       n->suffix = 0;
-                       node_set = 0;
-               }
        }
 
        n->type = NODE_TYPE_END;
index a96bfc0eb04952eac7b19d9ee1a5bcec992999aa..17985e8540124700122496a7b07cd2fe8a0a8114 100644 (file)
@@ -1217,6 +1217,67 @@ SELECT '' AS to_char_26, to_char('100'::numeric, 'FM999');
             | 100
 (1 row)
 
+-- Check parsing of literal text in a format string
+SELECT '' AS to_char_27, to_char('100'::numeric, 'foo999');
+ to_char_27 | to_char 
+------------+---------
+            | foo 100
+(1 row)
+
+SELECT '' AS to_char_28, to_char('100'::numeric, 'f\oo999');
+ to_char_28 | to_char  
+------------+----------
+            | f\oo 100
+(1 row)
+
+SELECT '' AS to_char_29, to_char('100'::numeric, 'f\\oo999');
+ to_char_29 |  to_char  
+------------+-----------
+            | f\\oo 100
+(1 row)
+
+SELECT '' AS to_char_30, to_char('100'::numeric, 'f\"oo999');
+ to_char_30 | to_char  
+------------+----------
+            | f"oo 100
+(1 row)
+
+SELECT '' AS to_char_31, to_char('100'::numeric, 'f\\"oo999');
+ to_char_31 |  to_char  
+------------+-----------
+            | f\"oo 100
+(1 row)
+
+SELECT '' AS to_char_32, to_char('100'::numeric, 'f"ool"999');
+ to_char_32 | to_char  
+------------+----------
+            | fool 100
+(1 row)
+
+SELECT '' AS to_char_33, to_char('100'::numeric, 'f"\ool"999');
+ to_char_33 | to_char  
+------------+----------
+            | fool 100
+(1 row)
+
+SELECT '' AS to_char_34, to_char('100'::numeric, 'f"\\ool"999');
+ to_char_34 |  to_char  
+------------+-----------
+            | f\ool 100
+(1 row)
+
+SELECT '' AS to_char_35, to_char('100'::numeric, 'f"ool\"999');
+ to_char_35 | to_char  
+------------+----------
+            | fool"999
+(1 row)
+
+SELECT '' AS to_char_36, to_char('100'::numeric, 'f"ool\\"999');
+ to_char_36 |  to_char  
+------------+-----------
+            | fool\ 100
+(1 row)
+
 -- TO_NUMBER()
 --
 SET lc_numeric = 'C';
index 321c7bdf7c55a6635854bdf7124c5e6b2df97155..d77504e6246332b747792dd22128c6b5e6279564 100644 (file)
@@ -786,6 +786,18 @@ SELECT '' AS to_char_24, to_char('100'::numeric, 'FM999.9');
 SELECT '' AS to_char_25, to_char('100'::numeric, 'FM999.');
 SELECT '' AS to_char_26, to_char('100'::numeric, 'FM999');
 
+-- Check parsing of literal text in a format string
+SELECT '' AS to_char_27, to_char('100'::numeric, 'foo999');
+SELECT '' AS to_char_28, to_char('100'::numeric, 'f\oo999');
+SELECT '' AS to_char_29, to_char('100'::numeric, 'f\\oo999');
+SELECT '' AS to_char_30, to_char('100'::numeric, 'f\"oo999');
+SELECT '' AS to_char_31, to_char('100'::numeric, 'f\\"oo999');
+SELECT '' AS to_char_32, to_char('100'::numeric, 'f"ool"999');
+SELECT '' AS to_char_33, to_char('100'::numeric, 'f"\ool"999');
+SELECT '' AS to_char_34, to_char('100'::numeric, 'f"\\ool"999');
+SELECT '' AS to_char_35, to_char('100'::numeric, 'f"ool\"999');
+SELECT '' AS to_char_36, to_char('100'::numeric, 'f"ool\\"999');
+
 -- TO_NUMBER()
 --
 SET lc_numeric = 'C';