]> granicus.if.org Git - postgresql/commitdiff
Implement dollar-quoting in the backend lexer and psql. Documentation
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 24 Feb 2004 21:45:18 +0000 (21:45 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 24 Feb 2004 21:45:18 +0000 (21:45 +0000)
is still lacking, as is support in plpgsql and other places, but this is
the basic feature.  Patch by Andrew Dunstan, some tweaking by Tom Lane.
Also, enable %option nodefault in these two lexers, and patch some gaps
revealed thereby.

src/backend/parser/scan.l
src/bin/psql/prompt.c
src/bin/psql/prompt.h
src/bin/psql/psqlscan.l

index caab9a002cf075298accde359be5d39c02e1e89d..b788a42bc98db10aea20c7ec4478ca806de83a9c 100644 (file)
@@ -10,7 +10,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.114 2004/02/21 00:34:52 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.115 2004/02/24 21:45:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -37,6 +37,7 @@
 extern YYSTYPE yylval;
 
 static int             xcdepth = 0;    /* depth of nesting in slash-star comments */
+static char    *dolqstart;      /* current $foo$ quote start string */
 
 /*
  * literalbuf is used to accumulate literal values when multiple rules
@@ -74,6 +75,7 @@ unsigned char unescape_single_char(unsigned char c);
 
 %option 8bit
 %option never-interactive
+%option nodefault
 %option nounput
 %option noyywrap
 %option prefix="base_yy"
@@ -94,6 +96,7 @@ unsigned char unescape_single_char(unsigned char c);
  *  <xd> delimited identifiers (double-quoted identifiers)
  *  <xh> hexadecimal numeric string
  *  <xq> quoted strings
+ *  <xdolq> $foo$ quoted strings
  */
 
 %x xb
@@ -101,6 +104,7 @@ unsigned char unescape_single_char(unsigned char c);
 %x xd
 %x xh
 %x xq
+%x xdolq
 
 /*
  * In order to make the world safe for Windows and Mac clients as well as
@@ -175,6 +179,17 @@ xqescape           [\\][^0-7]
 xqoctesc               [\\][0-7]{1,3}
 xqcat                  {quote}{whitespace_with_newline}{quote}
 
+/* $foo$ style quotes ("dollar quoting")
+ * The quoted string starts with $foo$ where "foo" is an optional string
+ * in the form of an identifier, except that it may not contain "$", 
+ * and extends to the first occurrence of an identical string.  
+ * There is *no* processing of the quoted text.
+ */
+dolq_start             [A-Za-z\200-\377_]
+dolq_cont              [A-Za-z\200-\377_0-9]
+dolqdelim              \$({dolq_start}{dolq_cont}*)?\$
+dolqinside             [^$]+
+
 /* Double quote
  * Allows embedded spaces and other special characters into identifiers.
  */
@@ -242,7 +257,8 @@ param                       \${integer}
 other                  .
 
 /*
- * Quoted strings must allow some special characters such as single-quote
+ * Dollar quoted strings are totally opaque, and no escaping is done on them.
+ * Other quoted strings must allow some special characters such as single-quote
  *  and newline.
  * Embedded single-quotes are implemented both in the SQL standard
  *  style of two adjacent single quotes "''" and in the Postgres/Java style
@@ -388,8 +404,46 @@ other                      .
 <xq>{xqcat}            {
                                        /* ignore */
                                }
+<xq>.                  {
+                                       /* This is only needed for \ just before EOF */
+                                       addlitchar(yytext[0]);
+                               }
 <xq><<EOF>>            { yyerror("unterminated quoted string"); }
 
+{dolqdelim}            {
+                                       token_start = yytext;
+                                       dolqstart = pstrdup(yytext);
+                                       BEGIN(xdolq);
+                                       startlit();
+                               }
+<xdolq>{dolqdelim} {
+                                       if (strcmp(yytext, dolqstart) == 0)
+                                       {
+                                               pfree(dolqstart);
+                                               BEGIN(INITIAL);
+                                               yylval.str = litbufdup();
+                                               return SCONST;
+                                       }
+                                       else
+                                       {
+                                               /*
+                                                * When we fail to match $...$ to dolqstart, transfer
+                                                * the $... part to the output, but put back the final
+                                                * $ for rescanning.  Consider $delim$...$junk$delim$
+                                                */
+                                               addlit(yytext, yyleng-1);
+                                               yyless(yyleng-1);
+                                       }
+                               }
+<xdolq>{dolqinside} {
+                                       addlit(yytext, yyleng);
+                               }
+<xdolq>.               {
+                                       /* This is only needed for $ inside the quoted text */
+                                       addlitchar(yytext[0]);
+                               }
+<xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
+
 {xdstart}              {
                                        token_start = yytext;
                                        BEGIN(xd);
@@ -407,7 +461,7 @@ other                       .
                                        yylval.str = ident;
                                        return IDENT;
                                }
-<xd>{xddouble} {
+<xd>{xddouble} {
                                        addlitchar('"');
                                }
 <xd>{xdinside} {
index a11881bc9a7fdfd610168d37de79bd76fc5a4b77..0a0d317739b106c608cb147ae2a6d64f425fb865 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2003, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/prompt.c,v 1.34 2004/01/25 03:07:22 neilc Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/prompt.c,v 1.35 2004/02/24 21:45:18 tgl Exp $
  */
 #include "postgres_fe.h"
 #include "prompt.h"
@@ -85,6 +85,7 @@ get_prompt(promptStatus_t status)
                case PROMPT_CONTINUE:
                case PROMPT_SINGLEQUOTE:
                case PROMPT_DOUBLEQUOTE:
+               case PROMPT_DOLLARQUOTE:
                case PROMPT_COMMENT:
                case PROMPT_PAREN:
                        prompt_name = "PROMPT2";
@@ -199,6 +200,9 @@ get_prompt(promptStatus_t status)
                                                case PROMPT_DOUBLEQUOTE:
                                                        buf[0] = '"';
                                                        break;
+                                               case PROMPT_DOLLARQUOTE:
+                                                       buf[0] = '$';
+                                                       break;
                                                case PROMPT_COMMENT:
                                                        buf[0] = '*';
                                                        break;
index 433f70a3af847ae323efc00113173cf59e66f9aa..338371a478ab0df54f2ac95acf2d3e27fd1c776b 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2003, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/prompt.h,v 1.13 2003/11/29 19:52:07 pgsql Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/prompt.h,v 1.14 2004/02/24 21:45:18 tgl Exp $
  */
 #ifndef PROMPT_H
 #define PROMPT_H
@@ -15,6 +15,7 @@ typedef enum _promptStatus
        PROMPT_COMMENT,
        PROMPT_SINGLEQUOTE,
        PROMPT_DOUBLEQUOTE,
+       PROMPT_DOLLARQUOTE,
        PROMPT_PAREN,
        PROMPT_COPY
 } promptStatus_t;
index 46886b2f9403698db23191f8e469aedae0a0d94f..5eb3f40fc8e81e24c30c000f06e230674bc69eab 100644 (file)
@@ -31,7 +31,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.1 2004/02/19 19:40:09 tgl Exp $
+ *       $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.2 2004/02/24 21:45:18 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -92,6 +92,7 @@ typedef struct PsqlScanStateData
        int                     start_state;    /* saved YY_START */
        int                     paren_depth;    /* depth of nesting in parentheses */
        int                     xcdepth;                /* depth of nesting in slash-star comments */
+       char       *dolqstart;          /* current $foo$ quote start string */
 } PsqlScanStateData;
 
 static PsqlScanState cur_state;        /* current state while active */
@@ -123,6 +124,7 @@ static void emit(const char *txt, int len);
 
 %option 8bit
 %option never-interactive
+%option nodefault
 %option nounput
 %option noyywrap
 
@@ -151,6 +153,7 @@ static void emit(const char *txt, int len);
  *  <xd> delimited identifiers (double-quoted identifiers)
  *  <xh> hexadecimal numeric string
  *  <xq> quoted strings
+ *  <xdolq> $foo$ quoted strings
  */
 
 %x xb
@@ -158,6 +161,7 @@ static void emit(const char *txt, int len);
 %x xd
 %x xh
 %x xq
+%x xdolq
 /* Additional exclusive states for psql only: lex backslash commands */
 %x xslashcmd
 %x xslasharg
@@ -241,6 +245,17 @@ xqescape           [\\][^0-7]
 xqoctesc               [\\][0-7]{1,3}
 xqcat                  {quote}{whitespace_with_newline}{quote}
 
+/* $foo$ style quotes ("dollar quoting")
+ * The quoted string starts with $foo$ where "foo" is an optional string
+ * in the form of an identifier, except that it may not contain "$", 
+ * and extends to the first occurrence of an identical string.  
+ * There is *no* processing of the quoted text.
+ */
+dolq_start             [A-Za-z\200-\377_]
+dolq_cont              [A-Za-z\200-\377_0-9]
+dolqdelim              \$({dolq_start}{dolq_cont}*)?\$
+dolqinside             [^$]+
+
 /* Double quote
  * Allows embedded spaces and other special characters into identifiers.
  */
@@ -308,7 +323,8 @@ param                       \${integer}
 other                  .
 
 /*
- * Quoted strings must allow some special characters such as single-quote
+ * Dollar quoted strings are totally opaque, and no escaping is done on them.
+ * Other quoted strings must allow some special characters such as single-quote
  *  and newline.
  * Embedded single-quotes are implemented both in the SQL standard
  *  style of two adjacent single quotes "''" and in the Postgres/Java style
@@ -427,6 +443,41 @@ other                      .
 <xq>{xqcat}            {
                                        ECHO;
                                }
+<xq>.                  {
+                                       /* This is only needed for \ just before EOF */
+                                       ECHO;
+                               }
+
+{dolqdelim}            {
+                                       cur_state->dolqstart = pg_strdup(yytext);
+                                       BEGIN(xdolq);
+                                       ECHO;
+                               }
+<xdolq>{dolqdelim} {
+                                       if (strcmp(yytext, cur_state->dolqstart) == 0)
+                                       {
+                                               free(cur_state->dolqstart);
+                                               cur_state->dolqstart = NULL;
+                                               BEGIN(INITIAL);
+                                       }
+                                       else
+                                       {
+                                               /*
+                                                * When we fail to match $...$ to dolqstart, transfer
+                                                * the $... part to the output, but put back the final
+                                                * $ for rescanning.  Consider $delim$...$junk$delim$
+                                                */
+                                               yyless(yyleng-1);
+                                       }
+                                       ECHO;
+                               }
+<xdolq>{dolqinside} {
+                                       ECHO;
+                               }
+<xdolq>.               {
+                                       /* This is only needed for $ inside the quoted text */
+                                       ECHO;
+                               }
 
 {xdstart}              {
                                        BEGIN(xd);
@@ -436,7 +487,7 @@ other                       .
                                        BEGIN(INITIAL);
                                        ECHO;
                                }
-<xd>{xddouble} {
+<xd>{xddouble} {
                                        ECHO;
                                }
 <xd>{xdinside} {
@@ -754,7 +805,7 @@ other                       .
 
 "\\".                  { emit(yytext + 1, 1); }
 
-{other}                        { ECHO; }
+{other}|\n             { ECHO; }
 
 }
 
@@ -766,7 +817,7 @@ other                       .
 
 "`"                            { return LEXRES_OK; }
 
-{other}                        { ECHO; }
+{other}|\n             { ECHO; }
 
 }
 
@@ -811,7 +862,7 @@ other                       .
                                        BEGIN(xslashdefaultarg);
                                }
 
-{other}                        { ECHO; }
+{other}|\n             { ECHO; }
 
 }
 
@@ -833,7 +884,7 @@ other                       .
 
 "\\\\"                 { return LEXRES_OK; }
 
-{other}                        {
+{other}|\n             {
                                        yyless(0);
                                        return LEXRES_OK;
                                }
@@ -865,6 +916,8 @@ psql_scan_destroy(PsqlScanState state)
 {
        psql_scan_finish(state);
 
+       psql_scan_reset(state);
+
        free(state);
 }
 
@@ -1008,6 +1061,10 @@ psql_scan(PsqlScanState state,
                                        result = PSCAN_INCOMPLETE;
                                        *prompt = PROMPT_SINGLEQUOTE;
                                        break;
+                               case xdolq:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_DOLLARQUOTE;
+                                       break;
                                default:
                                        /* can't get here */
                                        fprintf(stderr, "invalid YY_START\n");
@@ -1082,6 +1139,9 @@ psql_scan_reset(PsqlScanState state)
        state->start_state = INITIAL;
        state->paren_depth = 0;
        state->xcdepth = 0;                     /* not really necessary */
+       if (state->dolqstart)
+               free(state->dolqstart);
+       state->dolqstart = NULL;
 }
 
 /*