From: Tom Lane Date: Tue, 24 Feb 2004 21:45:18 +0000 (+0000) Subject: Implement dollar-quoting in the backend lexer and psql. Documentation X-Git-Tag: REL8_0_0BETA1~1120 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=58e705320e0c9e691a3fd2bd544f375ee0ca23d6;p=postgresql Implement dollar-quoting in the backend lexer and psql. Documentation is still lacking, as is support in plpgsql and other places, but this is the basic feature. Patch by Andrew Dunstan, some tweaking by Tom Lane. Also, enable %option nodefault in these two lexers, and patch some gaps revealed thereby. --- diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index caab9a002c..b788a42bc9 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -10,7 +10,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.114 2004/02/21 00:34:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.115 2004/02/24 21:45:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ extern YYSTYPE yylval; static int xcdepth = 0; /* depth of nesting in slash-star comments */ +static char *dolqstart; /* current $foo$ quote start string */ /* * literalbuf is used to accumulate literal values when multiple rules @@ -74,6 +75,7 @@ unsigned char unescape_single_char(unsigned char c); %option 8bit %option never-interactive +%option nodefault %option nounput %option noyywrap %option prefix="base_yy" @@ -94,6 +96,7 @@ unsigned char unescape_single_char(unsigned char c); * delimited identifiers (double-quoted identifiers) * hexadecimal numeric string * quoted strings + * $foo$ quoted strings */ %x xb @@ -101,6 +104,7 @@ unsigned char unescape_single_char(unsigned char c); %x xd %x xh %x xq +%x xdolq /* * In order to make the world safe for Windows and Mac clients as well as @@ -175,6 +179,17 @@ xqescape [\\][^0-7] xqoctesc [\\][0-7]{1,3} xqcat {quote}{whitespace_with_newline}{quote} +/* $foo$ style quotes ("dollar quoting") + * The quoted string starts with $foo$ where "foo" is an optional string + * in the form of an identifier, except that it may not contain "$", + * and extends to the first occurrence of an identical string. + * There is *no* processing of the quoted text. + */ +dolq_start [A-Za-z\200-\377_] +dolq_cont [A-Za-z\200-\377_0-9] +dolqdelim \$({dolq_start}{dolq_cont}*)?\$ +dolqinside [^$]+ + /* Double quote * Allows embedded spaces and other special characters into identifiers. */ @@ -242,7 +257,8 @@ param \${integer} other . /* - * Quoted strings must allow some special characters such as single-quote + * Dollar quoted strings are totally opaque, and no escaping is done on them. + * Other quoted strings must allow some special characters such as single-quote * and newline. * Embedded single-quotes are implemented both in the SQL standard * style of two adjacent single quotes "''" and in the Postgres/Java style @@ -388,8 +404,46 @@ other . {xqcat} { /* ignore */ } +. { + /* This is only needed for \ just before EOF */ + addlitchar(yytext[0]); + } <> { yyerror("unterminated quoted string"); } +{dolqdelim} { + token_start = yytext; + dolqstart = pstrdup(yytext); + BEGIN(xdolq); + startlit(); + } +{dolqdelim} { + if (strcmp(yytext, dolqstart) == 0) + { + pfree(dolqstart); + BEGIN(INITIAL); + yylval.str = litbufdup(); + return SCONST; + } + else + { + /* + * When we fail to match $...$ to dolqstart, transfer + * the $... part to the output, but put back the final + * $ for rescanning. Consider $delim$...$junk$delim$ + */ + addlit(yytext, yyleng-1); + yyless(yyleng-1); + } + } +{dolqinside} { + addlit(yytext, yyleng); + } +. { + /* This is only needed for $ inside the quoted text */ + addlitchar(yytext[0]); + } +<> { yyerror("unterminated dollar-quoted string"); } + {xdstart} { token_start = yytext; BEGIN(xd); @@ -407,7 +461,7 @@ other . yylval.str = ident; return IDENT; } -{xddouble} { +{xddouble} { addlitchar('"'); } {xdinside} { diff --git a/src/bin/psql/prompt.c b/src/bin/psql/prompt.c index a11881bc9a..0a0d317739 100644 --- a/src/bin/psql/prompt.c +++ b/src/bin/psql/prompt.c @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/prompt.c,v 1.34 2004/01/25 03:07:22 neilc Exp $ + * $PostgreSQL: pgsql/src/bin/psql/prompt.c,v 1.35 2004/02/24 21:45:18 tgl Exp $ */ #include "postgres_fe.h" #include "prompt.h" @@ -85,6 +85,7 @@ get_prompt(promptStatus_t status) case PROMPT_CONTINUE: case PROMPT_SINGLEQUOTE: case PROMPT_DOUBLEQUOTE: + case PROMPT_DOLLARQUOTE: case PROMPT_COMMENT: case PROMPT_PAREN: prompt_name = "PROMPT2"; @@ -199,6 +200,9 @@ get_prompt(promptStatus_t status) case PROMPT_DOUBLEQUOTE: buf[0] = '"'; break; + case PROMPT_DOLLARQUOTE: + buf[0] = '$'; + break; case PROMPT_COMMENT: buf[0] = '*'; break; diff --git a/src/bin/psql/prompt.h b/src/bin/psql/prompt.h index 433f70a3af..338371a478 100644 --- a/src/bin/psql/prompt.h +++ b/src/bin/psql/prompt.h @@ -3,7 +3,7 @@ * * Copyright (c) 2000-2003, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/bin/psql/prompt.h,v 1.13 2003/11/29 19:52:07 pgsql Exp $ + * $PostgreSQL: pgsql/src/bin/psql/prompt.h,v 1.14 2004/02/24 21:45:18 tgl Exp $ */ #ifndef PROMPT_H #define PROMPT_H @@ -15,6 +15,7 @@ typedef enum _promptStatus PROMPT_COMMENT, PROMPT_SINGLEQUOTE, PROMPT_DOUBLEQUOTE, + PROMPT_DOLLARQUOTE, PROMPT_PAREN, PROMPT_COPY } promptStatus_t; diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l index 46886b2f94..5eb3f40fc8 100644 --- a/src/bin/psql/psqlscan.l +++ b/src/bin/psql/psqlscan.l @@ -31,7 +31,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.1 2004/02/19 19:40:09 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.2 2004/02/24 21:45:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -92,6 +92,7 @@ typedef struct PsqlScanStateData int start_state; /* saved YY_START */ int paren_depth; /* depth of nesting in parentheses */ int xcdepth; /* depth of nesting in slash-star comments */ + char *dolqstart; /* current $foo$ quote start string */ } PsqlScanStateData; static PsqlScanState cur_state; /* current state while active */ @@ -123,6 +124,7 @@ static void emit(const char *txt, int len); %option 8bit %option never-interactive +%option nodefault %option nounput %option noyywrap @@ -151,6 +153,7 @@ static void emit(const char *txt, int len); * delimited identifiers (double-quoted identifiers) * hexadecimal numeric string * quoted strings + * $foo$ quoted strings */ %x xb @@ -158,6 +161,7 @@ static void emit(const char *txt, int len); %x xd %x xh %x xq +%x xdolq /* Additional exclusive states for psql only: lex backslash commands */ %x xslashcmd %x xslasharg @@ -241,6 +245,17 @@ xqescape [\\][^0-7] xqoctesc [\\][0-7]{1,3} xqcat {quote}{whitespace_with_newline}{quote} +/* $foo$ style quotes ("dollar quoting") + * The quoted string starts with $foo$ where "foo" is an optional string + * in the form of an identifier, except that it may not contain "$", + * and extends to the first occurrence of an identical string. + * There is *no* processing of the quoted text. + */ +dolq_start [A-Za-z\200-\377_] +dolq_cont [A-Za-z\200-\377_0-9] +dolqdelim \$({dolq_start}{dolq_cont}*)?\$ +dolqinside [^$]+ + /* Double quote * Allows embedded spaces and other special characters into identifiers. */ @@ -308,7 +323,8 @@ param \${integer} other . /* - * Quoted strings must allow some special characters such as single-quote + * Dollar quoted strings are totally opaque, and no escaping is done on them. + * Other quoted strings must allow some special characters such as single-quote * and newline. * Embedded single-quotes are implemented both in the SQL standard * style of two adjacent single quotes "''" and in the Postgres/Java style @@ -427,6 +443,41 @@ other . {xqcat} { ECHO; } +. { + /* This is only needed for \ just before EOF */ + ECHO; + } + +{dolqdelim} { + cur_state->dolqstart = pg_strdup(yytext); + BEGIN(xdolq); + ECHO; + } +{dolqdelim} { + if (strcmp(yytext, cur_state->dolqstart) == 0) + { + free(cur_state->dolqstart); + cur_state->dolqstart = NULL; + BEGIN(INITIAL); + } + else + { + /* + * When we fail to match $...$ to dolqstart, transfer + * the $... part to the output, but put back the final + * $ for rescanning. Consider $delim$...$junk$delim$ + */ + yyless(yyleng-1); + } + ECHO; + } +{dolqinside} { + ECHO; + } +. { + /* This is only needed for $ inside the quoted text */ + ECHO; + } {xdstart} { BEGIN(xd); @@ -436,7 +487,7 @@ other . BEGIN(INITIAL); ECHO; } -{xddouble} { +{xddouble} { ECHO; } {xdinside} { @@ -754,7 +805,7 @@ other . "\\". { emit(yytext + 1, 1); } -{other} { ECHO; } +{other}|\n { ECHO; } } @@ -766,7 +817,7 @@ other . "`" { return LEXRES_OK; } -{other} { ECHO; } +{other}|\n { ECHO; } } @@ -811,7 +862,7 @@ other . BEGIN(xslashdefaultarg); } -{other} { ECHO; } +{other}|\n { ECHO; } } @@ -833,7 +884,7 @@ other . "\\\\" { return LEXRES_OK; } -{other} { +{other}|\n { yyless(0); return LEXRES_OK; } @@ -865,6 +916,8 @@ psql_scan_destroy(PsqlScanState state) { psql_scan_finish(state); + psql_scan_reset(state); + free(state); } @@ -1008,6 +1061,10 @@ psql_scan(PsqlScanState state, result = PSCAN_INCOMPLETE; *prompt = PROMPT_SINGLEQUOTE; break; + case xdolq: + result = PSCAN_INCOMPLETE; + *prompt = PROMPT_DOLLARQUOTE; + break; default: /* can't get here */ fprintf(stderr, "invalid YY_START\n"); @@ -1082,6 +1139,9 @@ psql_scan_reset(PsqlScanState state) state->start_state = INITIAL; state->paren_depth = 0; state->xcdepth = 0; /* not really necessary */ + if (state->dolqstart) + free(state->dolqstart); + state->dolqstart = NULL; } /*