data based on the given value. Any text that is not a template pattern is
simply copied verbatim. Similarly, in an input template string (for the
other functions), template patterns identify the values to be supplied by
- the input data string.
+ the input data string. If there are characters in the template string
+ that are not template patterns, the corresponding characters in the input
+ data string are simply skipped over (whether or not they are equal to the
+ template string characters).
</para>
<para>
Ordinary text is allowed in <function>to_char</function>
templates and will be output literally. You can put a substring
in double quotes to force it to be interpreted as literal text
- even if it contains pattern key words. For example, in
+ even if it contains template patterns. For example, in
<literal>'"Hello Year "YYYY'</literal>, the <literal>YYYY</literal>
will be replaced by the year data, but the single <literal>Y</literal> in <literal>Year</literal>
- will not be. In <function>to_date</function>, <function>to_number</function>,
- and <function>to_timestamp</function>, double-quoted strings skip the number of
- input characters contained in the string, e.g. <literal>"XX"</literal>
- skips two input characters.
+ will not be.
+ In <function>to_date</function>, <function>to_number</function>,
+ and <function>to_timestamp</function>, literal text and double-quoted
+ strings result in skipping the number of characters contained in the
+ string; for example <literal>"XX"</literal> skips two input characters
+ (whether or not they are <literal>XX</literal>).
</para>
</listitem>
</para>
</listitem>
+ <listitem>
+ <para>
+ In <function>to_number</function>, if non-data template patterns such
+ as <literal>L</literal> or <literal>TH</literal> are used, the
+ corresponding number of input characters are skipped, whether or not
+ they match the template pattern, unless they are data characters
+ (that is, digits, sign, decimal point, or comma). For
+ example, <literal>TH</literal> would skip two non-data characters.
+ </para>
+ </listitem>
+
<listitem>
<para>
<literal>V</literal> with <function>to_char</function>
static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
static void NUM_numpart_to_char(NUMProc *Np, int id);
static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
- char *number, int from_char_input_len, int to_char_out_pre_spaces,
+ char *number, int input_len, int to_char_out_pre_spaces,
int sign, bool is_to_char, Oid collid);
static DCHCacheEntry *DCH_cache_getnew(const char *str);
static DCHCacheEntry *DCH_cache_search(const char *str);
return result;
}
+/*
+ * These macros are used in NUM_processor() and its subsidiary routines.
+ * OVERLOAD_TEST: true if we've reached end of input string
+ * AMOUNT_TEST(s): true if at least s characters remain in string
+ */
+#define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
+#define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
+
/* ----------
* Number extraction for TO_NUMBER()
* ----------
(id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
#endif
-#define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
-#define AMOUNT_TEST(_s) (input_len-(Np->inout_p-Np->inout) >= _s)
-
if (OVERLOAD_TEST)
return;
++Np->num_curr;
}
+/*
+ * Skip over "n" input characters, but only if they aren't numeric data
+ */
+static void
+NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
+{
+ while (n-- > 0)
+ {
+ if (OVERLOAD_TEST)
+ break; /* end of input */
+ if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
+ break; /* it's a data character */
+ Np->inout_p += pg_mblen(Np->inout_p);
+ }
+}
+
static char *
NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
- char *number, int from_char_input_len, int to_char_out_pre_spaces,
+ char *number, int input_len, int to_char_out_pre_spaces,
int sign, bool is_to_char, Oid collid)
{
FormatNode *n;
NUMProc _Np,
*Np = &_Np;
+ const char *pattern;
+ int pattern_len;
MemSet(Np, 0, sizeof(NUMProc));
if (!Np->is_to_char)
{
/*
- * Check non-string inout end
+ * Check at least one character remains to be scanned. (In
+ * actions below, must use AMOUNT_TEST if we want to read more
+ * characters than that.)
*/
- if (Np->inout_p >= Np->inout + from_char_input_len)
+ if (OVERLOAD_TEST)
break;
}
if (n->type == NODE_TYPE_ACTION)
{
/*
- * Create/reading digit/zero/blank/sing
+ * Create/read digit/zero/blank/sign/special-case
*
* 'NUM_S' note: The locale sign is anchored to number and we
* read/write it when we work with first or last number
- * (NUM_0/NUM_9). This is reason why NUM_S missing in follow
- * switch().
+ * (NUM_0/NUM_9). This is why NUM_S is missing in switch().
+ *
+ * Notice the "Np->inout_p++" at the bottom of the loop. This is
+ * why most of the actions advance inout_p one less than you might
+ * expect. In cases where we don't want that increment to happen,
+ * a switch case ends with "continue" not "break".
*/
switch (n->key->id)
{
}
else
{
- NUM_numpart_from_char(Np, n->key->id, from_char_input_len);
+ NUM_numpart_from_char(Np, n->key->id, input_len);
break; /* switch() case: */
}
if (IS_FILLMODE(Np->Num))
continue;
}
+ if (*Np->inout_p != ',')
+ continue;
}
break;
case NUM_G:
+ pattern = Np->L_thousands_sep;
+ pattern_len = strlen(pattern);
if (Np->is_to_char)
{
if (!Np->num_in)
continue;
else
{
- int x = strlen(Np->L_thousands_sep);
-
- memset(Np->inout_p, ' ', x);
- Np->inout_p += x - 1;
+ /* just in case there are MB chars */
+ pattern_len = pg_mbstrlen(pattern);
+ memset(Np->inout_p, ' ', pattern_len);
+ Np->inout_p += pattern_len - 1;
}
}
else
{
- strcpy(Np->inout_p, Np->L_thousands_sep);
- Np->inout_p += strlen(Np->inout_p) - 1;
+ strcpy(Np->inout_p, pattern);
+ Np->inout_p += pattern_len - 1;
}
}
else
if (IS_FILLMODE(Np->Num))
continue;
}
- Np->inout_p += strlen(Np->L_thousands_sep) - 1;
+
+ /*
+ * Because L_thousands_sep typically contains data
+ * characters (either '.' or ','), we can't use
+ * NUM_eat_non_data_chars here. Instead skip only if
+ * the input matches L_thousands_sep.
+ */
+ if (AMOUNT_TEST(pattern_len) &&
+ strncmp(Np->inout_p, pattern, pattern_len) == 0)
+ Np->inout_p += pattern_len - 1;
+ else
+ continue;
}
break;
case NUM_L:
+ pattern = Np->L_currency_symbol;
if (Np->is_to_char)
{
- strcpy(Np->inout_p, Np->L_currency_symbol);
- Np->inout_p += strlen(Np->inout_p) - 1;
+ strcpy(Np->inout_p, pattern);
+ Np->inout_p += strlen(pattern) - 1;
}
else
- Np->inout_p += strlen(Np->L_currency_symbol) - 1;
+ {
+ NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
+ continue;
+ }
break;
case NUM_RN:
continue;
if (Np->is_to_char)
+ {
strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
- Np->inout_p += 1;
+ Np->inout_p += 1;
+ }
+ else
+ {
+ /* All variants of 'th' occupy 2 characters */
+ NUM_eat_non_data_chars(Np, 2, input_len);
+ continue;
+ }
break;
case NUM_TH:
continue;
if (Np->is_to_char)
+ {
strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
- Np->inout_p += 1;
+ Np->inout_p += 1;
+ }
+ else
+ {
+ /* All variants of 'TH' occupy 2 characters */
+ NUM_eat_non_data_chars(Np, 2, input_len);
+ continue;
+ }
break;
case NUM_MI:
{
if (*Np->inout_p == '-')
*Np->number = '-';
+ else
+ {
+ NUM_eat_non_data_chars(Np, 1, input_len);
+ continue;
+ }
}
break;
{
if (*Np->inout_p == '+')
*Np->number = '+';
+ else
+ {
+ NUM_eat_non_data_chars(Np, 1, input_len);
+ continue;
+ }
}
break;
case NUM_SG:
if (Np->is_to_char)
*Np->inout_p = Np->sign;
-
else
{
if (*Np->inout_p == '-')
*Np->number = '-';
else if (*Np->inout_p == '+')
*Np->number = '+';
+ else
+ {
+ NUM_eat_non_data_chars(Np, 1, input_len);
+ continue;
+ }
}
break;
-
default:
continue;
break;
else
{
/*
- * Remove to output char from input in TO_CHAR
+ * In TO_CHAR, non-pattern characters in the format are copied to
+ * the output. In TO_NUMBER, we skip one input character for each
+ * non-pattern format character, whether or not it matches the
+ * format character. (Currently, that's actually implemented as
+ * skipping one input byte per non-pattern format byte, which is
+ * wrong...)
*/
if (Np->is_to_char)
*Np->inout_p = n->character;
-- TO_NUMBER()
--
+SET lc_numeric = 'C';
SELECT '' AS to_number_1, to_number('-34,338,492', '99G999G999');
to_number_1 | to_number
-------------+-----------
| -0.01
(1 row)
+SELECT '' AS to_number_14, to_number('34,50','999,99');
+ to_number_14 | to_number
+--------------+-----------
+ | 3450
+(1 row)
+
+SELECT '' AS to_number_15, to_number('123,000','999G');
+ to_number_15 | to_number
+--------------+-----------
+ | 123
+(1 row)
+
+SELECT '' AS to_number_16, to_number('123456','999G999');
+ to_number_16 | to_number
+--------------+-----------
+ | 123456
+(1 row)
+
+SELECT '' AS to_number_17, to_number('$1234.56','L9,999.99');
+ to_number_17 | to_number
+--------------+-----------
+ | 1234.56
+(1 row)
+
+SELECT '' AS to_number_18, to_number('$1234.56','L99,999.99');
+ to_number_18 | to_number
+--------------+-----------
+ | 1234.56
+(1 row)
+
+SELECT '' AS to_number_19, to_number('$1,234.56','L99,999.99');
+ to_number_19 | to_number
+--------------+-----------
+ | 1234.56
+(1 row)
+
+SELECT '' AS to_number_20, to_number('1234.56','L99,999.99');
+ to_number_20 | to_number
+--------------+-----------
+ | 1234.56
+(1 row)
+
+SELECT '' AS to_number_21, to_number('1,234.56','L99,999.99');
+ to_number_21 | to_number
+--------------+-----------
+ | 1234.56
+(1 row)
+
+SELECT '' AS to_number_22, to_number('42nd', '99th');
+ to_number_22 | to_number
+--------------+-----------
+ | 42
+(1 row)
+
+RESET lc_numeric;
--
-- Input syntax
--
-- TO_NUMBER()
--
+SET lc_numeric = 'C';
SELECT '' AS to_number_1, to_number('-34,338,492', '99G999G999');
SELECT '' AS to_number_2, to_number('-34,338,492.654,878', '99G999G999D999G999');
SELECT '' AS to_number_3, to_number('<564646.654564>', '999999.999999PR');
SELECT '' AS to_number_11, to_number('.-01', 'S99.99');
SELECT '' AS to_number_12, to_number('.01-', '99.99S');
SELECT '' AS to_number_13, to_number(' . 0 1-', ' 9 9 . 9 9 S');
+SELECT '' AS to_number_14, to_number('34,50','999,99');
+SELECT '' AS to_number_15, to_number('123,000','999G');
+SELECT '' AS to_number_16, to_number('123456','999G999');
+SELECT '' AS to_number_17, to_number('$1234.56','L9,999.99');
+SELECT '' AS to_number_18, to_number('$1234.56','L99,999.99');
+SELECT '' AS to_number_19, to_number('$1,234.56','L99,999.99');
+SELECT '' AS to_number_20, to_number('1234.56','L99,999.99');
+SELECT '' AS to_number_21, to_number('1,234.56','L99,999.99');
+SELECT '' AS to_number_22, to_number('42nd', '99th');
+RESET lc_numeric;
--
-- Input syntax