2 /*-------------------------------------------------------------------------
5 * lexical scanner for PostgreSQL
9 * The rules in this file must be kept in sync with psql's lexer!!!
11 * The rules are designed so that the scanner never has to backtrack,
12 * in the sense that there is always a rule that can match the input
13 * consumed so far (the rule action may internally throw back some input
14 * with yyless(), however). As explained in the flex manual, this makes
15 * for a useful speed increase --- about a third faster than a plain -CF
16 * lexer, in simple testing. The extra complexity is mostly in the rules
17 * for handling float numbers and continued string literals. If you change
18 * the lexical rules, verify that you haven't broken the no-backtrack
19 * property by running flex with the "-b" option and checking that the
20 * resulting "lex.backup" file says that no backing up is needed.
23 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
27 * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.152 2009/05/05 18:32:17 petere Exp $
29 *-------------------------------------------------------------------------
36 #include "parser/gramparse.h"
37 #include "parser/keywords.h"
38 /* Not needed now that this file is compiled as part of gram.y */
39 /* #include "parser/gram.h" */
40 #include "parser/scansup.h"
41 #include "mb/pg_wchar.h"
44 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
46 #define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg)))
48 static int xcdepth = 0; /* depth of nesting in slash-star comments */
49 static char *dolqstart; /* current $foo$ quote start string */
52 * GUC variables. This is a DIRECT violation of the warning given at the
53 * head of gram.y, ie flex/bison code must not depend on any GUC variables;
54 * as such, changing their values can induce very unintuitive behavior.
55 * But we shall have to live with it as a short-term thing until the switch
56 * to SQL-standard string syntax is complete.
58 int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
59 bool escape_string_warning = true;
60 bool standard_conforming_strings = false;
62 static bool warn_on_first_escape;
63 static bool saw_non_ascii = false;
66 * literalbuf is used to accumulate literal values when multiple rules
67 * are needed to parse a single literal. Call startlit to reset buffer
68 * to empty, addlit to add text. Note that the buffer is palloc'd and
69 * starts life afresh on every parse cycle.
71 static char *literalbuf; /* expandable buffer */
72 static int literallen; /* actual current length */
73 static int literalalloc; /* current allocated buffer size */
75 #define startlit() (literalbuf[0] = '\0', literallen = 0)
76 static void addlit(char *ytext, int yleng);
77 static void addlitchar(unsigned char ychar);
78 static char *litbufdup(void);
79 static char *litbuf_udeescape(unsigned char escape);
81 #define lexer_errposition() scanner_errposition(yylloc)
83 static void check_escape_warning(void);
84 static void check_string_escape_warning(unsigned char ychar);
87 * Each call to yylex must set yylloc to the location of the found token
88 * (expressed as a byte offset from the start of the input text).
89 * When we parse a token that requires multiple lexer rules to process,
90 * this should be done in the first such rule, else yylloc will point
91 * into the middle of the token.
93 #define SET_YYLLOC() (yylloc = yytext - scanbuf)
95 /* Handles to the buffer that the lexer uses internally */
96 static YY_BUFFER_STATE scanbufhandle;
99 static unsigned char unescape_single_char(unsigned char c);
104 %option never-interactive
109 %option prefix="base_yy"
112 * OK, here is a short description of lex/flex rules behavior.
113 * The longest pattern which matches an input string is always chosen.
114 * For equal-length patterns, the first occurring in the rules list is chosen.
115 * INITIAL is the starting state, to which all non-conditional rules apply.
116 * Exclusive states change parsing rules while the state is active. When in
117 * an exclusive state, only those rules defined for that state apply.
119 * We use exclusive states for quoted strings, extended comments,
120 * and to eliminate parsing troubles for numeric strings.
122 * <xb> bit string literal
123 * <xc> extended C-style comments
124 * <xd> delimited identifiers (double-quoted identifiers)
125 * <xh> hexadecimal numeric string
126 * <xq> standard quoted strings
127 * <xe> extended quoted strings (support backslash escape sequences)
128 * <xdolq> $foo$ quoted strings
129 * <xui> quoted identifier with Unicode escapes
130 * <xus> quoted string with Unicode escapes
144 * In order to make the world safe for Windows and Mac clients as well as
145 * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
146 * sequence will be seen as two successive newlines, but that doesn't cause
147 * any problems. Comments that start with -- and extend to the next
148 * newline are treated as equivalent to a single whitespace character.
150 * NOTE a fine point: if there is no newline following --, we will absorb
151 * everything to the end of the input as a comment. This is correct. Older
152 * versions of Postgres failed to recognize -- as a comment if the input
153 * did not end with a newline.
155 * XXX perhaps \f (formfeed) should be treated as a newline as well?
157 * XXX if you change the set of whitespace characters, fix scanner_isspace()
158 * to agree, and see also the plpgsql lexer.
166 comment ("--"{non_newline}*)
168 whitespace ({space}+|{comment})
171 * SQL requires at least one newline in the whitespace separating
172 * string literals that are to be concatenated. Silly, but who are we
173 * to argue? Note that {whitespace_with_newline} should not have * after
174 * it, whereas {whitespace} should generally have a * after it...
177 special_whitespace ({space}+|{comment}{newline})
178 horiz_whitespace ({horiz_space}|{comment})
179 whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
182 * To ensure that {quotecontinue} can be scanned without having to back up
183 * if the full pattern isn't matched, we include trailing whitespace in
184 * {quotestop}. This matches all cases where {quotecontinue} fails to match,
185 * except for {quote} followed by whitespace and just one "-" (not two,
186 * which would start a {comment}). To cover that we have {quotefail}.
187 * The actions for {quotestop} and {quotefail} must throw back characters
188 * beyond the quote proper.
191 quotestop {quote}{whitespace}*
192 quotecontinue {quote}{whitespace_with_newline}{quote}
193 quotefail {quote}{whitespace}*"-"
196 * It is tempting to scan the string for only those characters
197 * which are allowed. However, this leads to silently swallowed
198 * characters if illegal characters are included in the string.
199 * For example, if xbinside is [01] then B'ABCD' is interpreted
200 * as a zero-length string, and the ABCD' is lost!
201 * Better to pass the string forward and let the input routines
202 * validate the contents.
207 /* Hexadecimal number */
211 /* National character */
214 /* Quoted string that allows backslash escapes */
218 xeoctesc [\\][0-7]{1,3}
219 xehexesc [\\]x[0-9A-Fa-f]{1,2}
222 * xqdouble implements embedded quote, ''''
225 xqdouble {quote}{quote}
228 /* $foo$ style quotes ("dollar quoting")
229 * The quoted string starts with $foo$ where "foo" is an optional string
230 * in the form of an identifier, except that it may not contain "$",
231 * and extends to the first occurrence of an identical string.
232 * There is *no* processing of the quoted text.
234 * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
235 * fails to match its trailing "$".
237 dolq_start [A-Za-z\200-\377_]
238 dolq_cont [A-Za-z\200-\377_0-9]
239 dolqdelim \$({dolq_start}{dolq_cont}*)?\$
240 dolqfailed \${dolq_start}{dolq_cont}*
244 * Allows embedded spaces and other special characters into identifiers.
249 xddouble {dquote}{dquote}
252 /* Unicode escapes */
253 uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
254 /* error rule to avoid backup */
255 uescapefail ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
257 /* Quoted identifier with Unicode escapes */
258 xuistart [uU]&{dquote}
259 xuistop1 {dquote}{whitespace}*{uescapefail}?
260 xuistop2 {dquote}{whitespace}*{uescape}
262 /* Quoted string with Unicode escapes */
263 xusstart [uU]&{quote}
264 xusstop1 {quote}{whitespace}*{uescapefail}?
265 xusstop2 {quote}{whitespace}*{uescape}
267 /* error rule to avoid backup */
273 * The "extended comment" syntax closely resembles allowable operator syntax.
274 * The tricky part here is to get lex to recognize a string starting with
275 * slash-star as a comment, when interpreting it as an operator would produce
276 * a longer match --- remember lex will prefer a longer match! Also, if we
277 * have something like plus-slash-star, lex will think this is a 3-character
278 * operator whereas we want to see it as a + operator and a comment start.
279 * The solution is two-fold:
280 * 1. append {op_chars}* to xcstart so that it matches as much text as
281 * {operator} would. Then the tie-breaker (first matching rule of same
282 * length) ensures xcstart wins. We put back the extra stuff with yyless()
283 * in case it contains a star-slash that should terminate the comment.
284 * 2. In the operator rule, check for slash-star within the operator, and
285 * if found throw it back with yyless(). This handles the plus-slash-star
287 * Dash-dash comments have similar interactions with the operator rule.
289 xcstart \/\*{op_chars}*
294 ident_start [A-Za-z\200-\377_]
295 ident_cont [A-Za-z\200-\377_0-9\$]
297 identifier {ident_start}{ident_cont}*
302 * "self" is the set of chars that should be returned as single-character
303 * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
304 * which can be one or more characters long (but if a single-char token
305 * appears in the "self" set, it is not to be returned as an Op). Note
306 * that the sets overlap, but each has some chars that are not in the other.
308 * If you change either set, adjust the character lists appearing in the
309 * rule for "operator"!
311 self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
312 op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
315 /* we no longer allow unary minus in numbers.
316 * instead we pass it separately to parser. there it gets
317 * coerced via doNegate() -- Leon aug 20 1999
319 * {realfail1} and {realfail2} are added to prevent the need for scanner
320 * backup when the {real} rule fails to match completely.
324 decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
325 real ({integer}|{decimal})[Ee][-+]?{digit}+
326 realfail1 ({integer}|{decimal})[Ee]
327 realfail2 ({integer}|{decimal})[Ee][-+]
334 * Dollar quoted strings are totally opaque, and no escaping is done on them.
335 * Other quoted strings must allow some special characters such as single-quote
337 * Embedded single-quotes are implemented both in the SQL standard
338 * style of two adjacent single quotes "''" and in the Postgres/Java style
339 * of escaped-quote "\'".
340 * Other embedded escaped characters are matched explicitly and the leading
341 * backslash is dropped from the string.
342 * Note that xcstart must appear before operator, as explained above!
343 * Also whitespace (comment) must appear before operator.
353 /* Set location in case of syntax error in comment */
357 /* Put back any characters past slash-star; see above */
363 /* Put back any characters past slash-star; see above */
386 <xc><<EOF>> { yyerror("unterminated /* comment"); }
390 * At some point we should simply pass the string
391 * forward to the parser and label it there.
392 * In the meantime, place a leading "b" on the string
393 * to mark it for the input routine as a binary string.
404 yylval.str = litbufdup();
409 addlit(yytext, yyleng);
411 <xh>{quotecontinue} |
412 <xb>{quotecontinue} {
415 <xb><<EOF>> { yyerror("unterminated bit string literal"); }
418 /* Hexadecimal bit type.
419 * At some point we should simply pass the string
420 * forward to the parser and label it there.
421 * In the meantime, place a leading "x" on the string
422 * to mark it for the input routine as a hex string.
433 yylval.str = litbufdup();
436 <xh><<EOF>> { yyerror("unterminated hexadecimal string literal"); }
439 /* National character.
440 * We will pass this along as a normal character string,
441 * but preceded with an internally-generated "NCHAR".
443 const ScanKeyword *keyword;
446 yyless(1); /* eat only 'n' this time */
447 /* nchar had better be a keyword! */
448 keyword = ScanKeywordLookup("nchar");
449 Assert(keyword != NULL);
450 yylval.keyword = keyword->name;
451 return keyword->value;
455 warn_on_first_escape = true;
456 saw_non_ascii = false;
458 if (standard_conforming_strings)
465 warn_on_first_escape = false;
466 saw_non_ascii = false;
472 if (!standard_conforming_strings)
474 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
475 errmsg("unsafe use of string constant with Unicode escapes"),
476 errdetail("String constants with Unicode escapes cannot be used when standard_conforming_strings is off.")));
486 * check that the data remains valid if it might have been
487 * made invalid by unescaping any chars.
490 pg_verifymbstr(literalbuf, literallen, false);
491 yylval.str = litbufdup();
495 /* throw back all but the quote */
498 yylval.str = litbuf_udeescape('\\');
503 yylval.str = litbuf_udeescape(yytext[yyleng-2]);
506 <xq,xe,xus>{xqdouble} {
510 addlit(yytext, yyleng);
513 addlit(yytext, yyleng);
516 if (yytext[1] == '\'')
518 if (backslash_quote == BACKSLASH_QUOTE_OFF ||
519 (backslash_quote == BACKSLASH_QUOTE_SAFE_ENCODING &&
520 PG_ENCODING_IS_CLIENT_ONLY(pg_get_client_encoding())))
522 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
523 errmsg("unsafe use of \\' in a string literal"),
524 errhint("Use '' to write quotes in strings. \\' is insecure in client-only encodings."),
525 lexer_errposition()));
527 check_string_escape_warning(yytext[1]);
528 addlitchar(unescape_single_char(yytext[1]));
531 unsigned char c = strtoul(yytext+1, NULL, 8);
533 check_escape_warning();
535 if (c == '\0' || IS_HIGHBIT_SET(c))
536 saw_non_ascii = true;
539 unsigned char c = strtoul(yytext+2, NULL, 16);
541 check_escape_warning();
543 if (c == '\0' || IS_HIGHBIT_SET(c))
544 saw_non_ascii = true;
546 <xq,xe,xus>{quotecontinue} {
550 /* This is only needed for \ just before EOF */
551 addlitchar(yytext[0]);
553 <xq,xe,xus><<EOF>> { yyerror("unterminated quoted string"); }
557 dolqstart = pstrdup(yytext);
563 /* throw back all but the initial "$" */
565 /* and treat it as {other} */
569 if (strcmp(yytext, dolqstart) == 0)
573 yylval.str = litbufdup();
579 * When we fail to match $...$ to dolqstart, transfer
580 * the $... part to the output, but put back the final
581 * $ for rescanning. Consider $delim$...$junk$delim$
583 addlit(yytext, yyleng-1);
587 <xdolq>{dolqinside} {
588 addlit(yytext, yyleng);
590 <xdolq>{dolqfailed} {
591 addlit(yytext, yyleng);
594 /* This is only needed for $ inside the quoted text */
595 addlitchar(yytext[0]);
597 <xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
614 yyerror("zero-length delimited identifier");
616 if (literallen >= NAMEDATALEN)
617 truncate_identifier(ident, literallen, true);
626 yyerror("zero-length delimited identifier");
627 ident = litbuf_udeescape('\\');
628 if (literallen >= NAMEDATALEN)
629 truncate_identifier(ident, literallen, true);
631 /* throw back all but the quote */
640 yyerror("zero-length delimited identifier");
641 ident = litbuf_udeescape(yytext[yyleng - 2]);
642 if (literallen >= NAMEDATALEN)
643 truncate_identifier(ident, literallen, true);
651 addlit(yytext, yyleng);
653 <xd,xui><<EOF>> { yyerror("unterminated quoted identifier"); }
659 /* throw back all but the initial u/U */
661 /* and treat it as {identifier} */
662 ident = downcase_truncate_identifier(yytext, yyleng, true);
679 * Check for embedded slash-star or dash-dash; those
680 * are comment starts, so operator must stop there.
681 * Note that slash-star or dash-dash at the first
682 * character will match a prior rule, not this one.
685 char *slashstar = strstr(yytext, "/*");
686 char *dashdash = strstr(yytext, "--");
688 if (slashstar && dashdash)
690 /* if both appear, take the first one */
691 if (slashstar > dashdash)
692 slashstar = dashdash;
695 slashstar = dashdash;
697 nchars = slashstar - yytext;
700 * For SQL compatibility, '+' and '-' cannot be the
701 * last char of a multi-char operator unless the operator
702 * contains chars that are not in SQL operators.
703 * The idea is to lex '=-' as two operators, but not
704 * to forbid operator names like '?-' that could not be
705 * sequences of SQL operators.
708 (yytext[nchars-1] == '+' ||
709 yytext[nchars-1] == '-'))
713 for (ic = nchars-2; ic >= 0; ic--)
715 if (strchr("~!@#^&|`?%", yytext[ic]))
719 break; /* found a char that makes it OK */
720 nchars--; /* else remove the +/-, and check again */
727 /* Strip the unwanted chars from the token */
730 * If what we have left is only one char, and it's
731 * one of the characters matching "self", then
732 * return it as a character token the same way
733 * that the "self" rule would have.
736 strchr(",()[].;:+-*/%^<>=", yytext[0]))
741 * Complain if operator is too long. Unlike the case
742 * for identifiers, we make this an error not a notice-
743 * and-truncate, because the odds are we are looking at
744 * a syntactic mistake anyway.
746 if (nchars >= NAMEDATALEN)
747 yyerror("operator too long");
749 /* Convert "!=" operator to "<>" for compatibility */
750 if (strcmp(yytext, "!=") == 0)
751 yylval.str = pstrdup("<>");
753 yylval.str = pstrdup(yytext);
759 yylval.ival = atol(yytext + 1);
769 val = strtol(yytext, &endptr, 10);
770 if (*endptr != '\0' || errno == ERANGE
771 #ifdef HAVE_LONG_INT_64
772 /* if long > 32 bits, check for overflow of int4 */
773 || val != (long) ((int32) val)
777 /* integer too large, treat it as a float */
778 yylval.str = pstrdup(yytext);
786 yylval.str = pstrdup(yytext);
791 yylval.str = pstrdup(yytext);
796 * throw back the [Ee], and treat as {decimal}. Note
797 * that it is possible the input is actually {integer},
798 * but since this case will almost certainly lead to a
799 * syntax error anyway, we don't bother to distinguish.
803 yylval.str = pstrdup(yytext);
807 /* throw back the [Ee][+-], and proceed as above */
810 yylval.str = pstrdup(yytext);
816 const ScanKeyword *keyword;
821 /* Is it a keyword? */
822 keyword = ScanKeywordLookup(yytext);
825 yylval.keyword = keyword->name;
826 return keyword->value;
830 * No. Convert the identifier to lower case, and truncate
833 ident = downcase_truncate_identifier(yytext, yyleng, true);
851 * scanner_errposition
852 * Report a lexer or grammar error cursor position, if possible.
854 * This is expected to be used within an ereport() call. The return value
855 * is a dummy (always 0, in fact).
857 * Note that this can only be used for messages emitted during raw parsing
858 * (essentially, scan.l and gram.y), since it requires scanbuf to still be
862 scanner_errposition(int location)
866 Assert(scanbuf != NULL); /* else called from wrong place */
868 return 0; /* no-op if location is unknown */
870 /* Convert byte offset to character number */
871 pos = pg_mbstrlen_with_len(scanbuf, location) + 1;
872 /* And pass it to the ereport mechanism */
873 return errposition(pos);
878 * Report a lexer or grammar error.
880 * The message's cursor position identifies the most recently lexed token.
881 * This is OK for syntax error messages from the Bison parser, because Bison
882 * parsers report error as soon as the first unparsable token is reached.
883 * Beware of using yyerror for other purposes, as the cursor position might
887 yyerror(const char *message)
889 const char *loc = scanbuf + yylloc;
891 if (*loc == YY_END_OF_BUFFER_CHAR)
894 (errcode(ERRCODE_SYNTAX_ERROR),
895 /* translator: %s is typically the translation of "syntax error" */
896 errmsg("%s at end of input", _(message)),
897 lexer_errposition()));
902 (errcode(ERRCODE_SYNTAX_ERROR),
903 /* translator: first %s is typically the translation of "syntax error" */
904 errmsg("%s at or near \"%s\"", _(message), loc),
905 lexer_errposition()));
911 * Called before any actual parsing is done
914 scanner_init(const char *str)
916 Size slen = strlen(str);
919 * Might be left over after ereport()
921 if (YY_CURRENT_BUFFER)
922 yy_delete_buffer(YY_CURRENT_BUFFER);
925 * Make a scan buffer with special termination needed by flex.
927 scanbuf = palloc(slen + 2);
928 memcpy(scanbuf, str, slen);
929 scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
930 scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
932 /* initialize literal buffer to a reasonable but expansible size */
934 literalbuf = (char *) palloc(literalalloc);
942 * Called after parsing is done to clean up after scanner_init()
947 yy_delete_buffer(scanbufhandle);
954 addlit(char *ytext, int yleng)
956 /* enlarge buffer if needed */
957 if ((literallen+yleng) >= literalalloc)
961 } while ((literallen+yleng) >= literalalloc);
962 literalbuf = (char *) repalloc(literalbuf, literalalloc);
964 /* append new data, add trailing null */
965 memcpy(literalbuf+literallen, ytext, yleng);
967 literalbuf[literallen] = '\0';
972 addlitchar(unsigned char ychar)
974 /* enlarge buffer if needed */
975 if ((literallen+1) >= literalalloc)
978 literalbuf = (char *) repalloc(literalbuf, literalalloc);
980 /* append new data, add trailing null */
981 literalbuf[literallen] = ychar;
983 literalbuf[literallen] = '\0';
988 * One might be tempted to write pstrdup(literalbuf) instead of this,
989 * but for long literals this is much faster because the length is
997 new = palloc(literallen + 1);
998 memcpy(new, literalbuf, literallen+1);
1003 hexval(unsigned char c)
1005 if (c >= '0' && c <= '9')
1007 if (c >= 'a' && c <= 'f')
1008 return c - 'a' + 0xA;
1009 if (c >= 'A' && c <= 'F')
1010 return c - 'A' + 0xA;
1011 elog(ERROR, "invalid hexadecimal digit");
1012 return 0; /* not reached */
1016 check_unicode_value(pg_wchar c, char * loc)
1018 if (GetDatabaseEncoding() == PG_UTF8)
1023 yylloc += (char *) loc - literalbuf + 3; /* 3 for U&" */
1024 yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
1029 litbuf_udeescape(unsigned char escape)
1034 if (isxdigit(escape)
1038 || scanner_isspace(escape))
1040 yylloc += literallen + yyleng + 1;
1041 yyerror("invalid Unicode escape character");
1045 * This relies on the subtle assumption that a UTF-8 expansion
1046 * cannot be longer than its escaped representation.
1048 new = palloc(literallen + 1);
1054 if (in[0] == escape)
1056 if (in[1] == escape)
1061 else if (isxdigit(in[1]) && isxdigit(in[2]) && isxdigit(in[3]) && isxdigit(in[4]))
1063 pg_wchar unicode = hexval(in[1]) * 16*16*16 + hexval(in[2]) * 16*16 + hexval(in[3]) * 16 + hexval(in[4]);
1064 check_unicode_value(unicode, in);
1065 unicode_to_utf8(unicode, (unsigned char *) out);
1067 out += pg_mblen(out);
1069 else if (in[1] == '+'
1070 && isxdigit(in[2]) && isxdigit(in[3])
1071 && isxdigit(in[4]) && isxdigit(in[5])
1072 && isxdigit(in[6]) && isxdigit(in[7]))
1074 pg_wchar unicode = hexval(in[2]) * 16*16*16*16*16 + hexval(in[3]) * 16*16*16*16 + hexval(in[4]) * 16*16*16
1075 + hexval(in[5]) * 16*16 + hexval(in[6]) * 16 + hexval(in[7]);
1076 check_unicode_value(unicode, in);
1077 unicode_to_utf8(unicode, (unsigned char *) out);
1079 out += pg_mblen(out);
1083 yylloc += in - literalbuf + 3; /* 3 for U&" */
1084 yyerror("invalid Unicode escape value");
1093 * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
1094 * codes; but it's probably not worth the trouble, since this isn't
1095 * likely to be a performance-critical path.
1097 pg_verifymbstr(new, out - new, false);
1101 static unsigned char
1102 unescape_single_char(unsigned char c)
1117 /* check for backslash followed by non-7-bit-ASCII */
1118 if (c == '\0' || IS_HIGHBIT_SET(c))
1119 saw_non_ascii = true;
1126 check_string_escape_warning(unsigned char ychar)
1130 if (warn_on_first_escape && escape_string_warning)
1132 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1133 errmsg("nonstandard use of \\' in a string literal"),
1134 errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."),
1135 lexer_errposition()));
1136 warn_on_first_escape = false; /* warn only once per string */
1138 else if (ychar == '\\')
1140 if (warn_on_first_escape && escape_string_warning)
1142 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1143 errmsg("nonstandard use of \\\\ in a string literal"),
1144 errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."),
1145 lexer_errposition()));
1146 warn_on_first_escape = false; /* warn only once per string */
1149 check_escape_warning();
1153 check_escape_warning(void)
1155 if (warn_on_first_escape && escape_string_warning)
1157 (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
1158 errmsg("nonstandard use of escape in a string literal"),
1159 errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."),
1160 lexer_errposition()));
1161 warn_on_first_escape = false; /* warn only once per string */