]> granicus.if.org Git - postgresql/commitdiff
Fix lexing of standard multi-character operators in edge cases.
authorAndrew Gierth <rhodiumtoad@postgresql.org>
Thu, 23 Aug 2018 17:29:18 +0000 (18:29 +0100)
committerAndrew Gierth <rhodiumtoad@postgresql.org>
Thu, 23 Aug 2018 20:42:40 +0000 (21:42 +0100)
Commits c6b3c939b (which fixed the precedence of >=, <=, <> operators)
and 865f14a2d (which added support for the standard => notation for
named arguments) created a class of lexer tokens which look like
multi-character operators but which have their own token IDs distinct
from Op. However, longest-match rules meant that following any of
these tokens with another operator character, as in (1<>-1), would
cause them to be incorrectly returned as Op.

The error here isn't immediately obvious, because the parser would
usually still find the correct operator via the Op token, but there
were more subtle problems:

1. If immediately followed by a comment or +-, >= <= <> would be given
   the old precedence of Op rather than the correct new precedence;

2. If followed by a comment, != would be returned as Op rather than as
   NOT_EQUAL, causing it not to be found at all;

3. If followed by a comment or +-, the => token for named arguments
   would be lexed as Op, causing the argument to be mis-parsed as a
   simple expression, usually causing an error.

Fix by explicitly checking for the operators in the {operator} code
block in addition to all the existing special cases there.

Backpatch to 9.5 where the problem was introduced.

Analysis and patch by me; review by Tom Lane.
Discussion: https://postgr.es/m/87va851ppl.fsf@news-spur.riddles.org.uk

src/backend/parser/scan.l
src/fe_utils/psqlscan.l
src/interfaces/ecpg/preproc/pgc.l
src/test/regress/expected/create_operator.out
src/test/regress/expected/polymorphism.out
src/test/regress/sql/create_operator.sql
src/test/regress/sql/polymorphism.sql

index 96f51bfd596daa17ec0e49f518fe1e02ba8c11ca..950b8b85918c131fbe39efc1a31a62110a76e119 100644 (file)
@@ -339,6 +339,15 @@ identifier         {ident_start}{ident_cont}*
 typecast               "::"
 dot_dot                        \.\.
 colon_equals   ":="
+
+/*
+ * These operator-like tokens (unlike the above ones) also match the {operator}
+ * rule, which means that they might be overridden by a longer match if they
+ * are followed by a comment start or a + or - character. Accordingly, if you
+ * add to this list, you must also add corresponding code to the {operator}
+ * block to return the correct token in such cases. (This is not needed in
+ * psqlscan.l since the token value is ignored there.)
+ */
 equals_greater "=>"
 less_equals            "<="
 greater_equals ">="
@@ -929,6 +938,25 @@ other                      .
                                                if (nchars == 1 &&
                                                        strchr(",()[].;:+-*/%^<>=", yytext[0]))
                                                        return yytext[0];
+                                               /*
+                                                * Likewise, if what we have left is two chars, and
+                                                * those match the tokens ">=", "<=", "=>", "<>" or
+                                                * "!=", then we must return the appropriate token
+                                                * rather than the generic Op.
+                                                */
+                                               if (nchars == 2)
+                                               {
+                                                       if (yytext[0] == '=' && yytext[1] == '>')
+                                                               return EQUALS_GREATER;
+                                                       if (yytext[0] == '>' && yytext[1] == '=')
+                                                               return GREATER_EQUALS;
+                                                       if (yytext[0] == '<' && yytext[1] == '=')
+                                                               return LESS_EQUALS;
+                                                       if (yytext[0] == '<' && yytext[1] == '>')
+                                                               return NOT_EQUALS;
+                                                       if (yytext[0] == '!' && yytext[1] == '=')
+                                                               return NOT_EQUALS;
+                                               }
                                        }
 
                                        /*
index 989284dc6fe985baaa92b95aea1f1e7f141c874d..fdf49875a7296a7dec2293ab3f3538303e174199 100644 (file)
@@ -298,6 +298,15 @@ identifier         {ident_start}{ident_cont}*
 typecast               "::"
 dot_dot                        \.\.
 colon_equals   ":="
+
+/*
+ * These operator-like tokens (unlike the above ones) also match the {operator}
+ * rule, which means that they might be overridden by a longer match if they
+ * are followed by a comment start or a + or - character. Accordingly, if you
+ * add to this list, you must also add corresponding code to the {operator}
+ * block to return the correct token in such cases. (This is not needed in
+ * psqlscan.l since the token value is ignored there.)
+ */
 equals_greater "=>"
 less_equals            "<="
 greater_equals ">="
index 9ad50b99119f248ef3fdf53ef39b86e1f36dbbed..0792118cfe3f8921de60456cec302463bb0ec19e 100644 (file)
@@ -245,6 +245,15 @@ array                      ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])*
 typecast               "::"
 dot_dot                        \.\.
 colon_equals   ":="
+
+/*
+ * These operator-like tokens (unlike the above ones) also match the {operator}
+ * rule, which means that they might be overridden by a longer match if they
+ * are followed by a comment start or a + or - character. Accordingly, if you
+ * add to this list, you must also add corresponding code to the {operator}
+ * block to return the correct token in such cases. (This is not needed in
+ * psqlscan.l since the token value is ignored there.)
+ */
 equals_greater "=>"
 less_equals            "<="
 greater_equals ">="
@@ -732,6 +741,25 @@ cppline                    {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                                                        if (nchars == 1 &&
                                                                strchr(",()[].;:+-*/%^<>=", yytext[0]))
                                                                return yytext[0];
+                                                       /*
+                                                        * Likewise, if what we have left is two chars, and
+                                                        * those match the tokens ">=", "<=", "=>", "<>" or
+                                                        * "!=", then we must return the appropriate token
+                                                        * rather than the generic Op.
+                                                        */
+                                                       if (nchars == 2)
+                                                       {
+                                                               if (yytext[0] == '=' && yytext[1] == '>')
+                                                                       return EQUALS_GREATER;
+                                                               if (yytext[0] == '>' && yytext[1] == '=')
+                                                                       return GREATER_EQUALS;
+                                                               if (yytext[0] == '<' && yytext[1] == '=')
+                                                                       return LESS_EQUALS;
+                                                               if (yytext[0] == '<' && yytext[1] == '>')
+                                                                       return NOT_EQUALS;
+                                                               if (yytext[0] == '!' && yytext[1] == '=')
+                                                                       return NOT_EQUALS;
+                                                       }
                                                }
 
                                                base_yylval.str = mm_strdup(yytext);
index 77237f485006e58fe39a6208a79badec61832fec..54e8b791595dc10408c064cad1abdb87271b044d 100644 (file)
@@ -45,6 +45,80 @@ CREATE OPERATOR => (
 ERROR:  syntax error at or near "=>"
 LINE 1: CREATE OPERATOR => (
                         ^
+-- lexing of <=, >=, <>, != has a number of edge cases
+-- (=> is tested elsewhere)
+-- this is legal because ! is not allowed in sql ops
+CREATE OPERATOR !=- (
+   leftarg = int8,             -- right unary
+   procedure = numeric_fac
+);
+SELECT 2 !=-;
+ ?column? 
+----------
+        2
+(1 row)
+
+-- make sure lexer returns != as <> even in edge cases
+SELECT 2 !=/**/ 1, 2 !=/**/ 2;
+ ?column? | ?column? 
+----------+----------
+ t        | f
+(1 row)
+
+SELECT 2 !=-- comment to be removed by psql
+  1;
+ ?column? 
+----------
+ t
+(1 row)
+
+DO $$ -- use DO to protect -- from psql
+  declare r boolean;
+  begin
+    execute $e$ select 2 !=-- comment
+      1 $e$ into r;
+    raise info 'r = %', r;
+  end;
+$$;
+INFO:  r = t
+-- check that <= etc. followed by more operator characters are returned
+-- as the correct token with correct precedence
+SELECT true<>-1 BETWEEN 1 AND 1;  -- BETWEEN has prec. above <> but below Op
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT false<>/**/1 BETWEEN 1 AND 1;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT false<=-1 BETWEEN 1 AND 1;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT false>=-1 BETWEEN 1 AND 1;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 2<=/**/3, 3>=/**/2, 2<>/**/3;
+ ?column? | ?column? | ?column? 
+----------+----------+----------
+ t        | t        | t
+(1 row)
+
+SELECT 3<=/**/2, 2>=/**/3, 2<>/**/2;
+ ?column? | ?column? | ?column? 
+----------+----------+----------
+ f        | f        | f
+(1 row)
+
 -- Should fail. CREATE OPERATOR requires USAGE on SCHEMA
 BEGIN TRANSACTION;
 CREATE ROLE regress_rol_op1;
index 67e70c8c140a5303074ab28071bbc63d4dca019b..986417a1881f362acf55ef41492bd01dbecfdb2c 100644 (file)
@@ -1478,6 +1478,42 @@ select dfunc('a'::text, 'b', flag => true); -- mixed notation
  a
 (1 row)
 
+-- this tests lexer edge cases around =>
+select dfunc(a =>-1);
+ dfunc 
+-------
+    -1
+(1 row)
+
+select dfunc(a =>+1);
+ dfunc 
+-------
+     1
+(1 row)
+
+select dfunc(a =>/**/1);
+ dfunc 
+-------
+     1
+(1 row)
+
+select dfunc(a =>--comment to be removed by psql
+  1);
+ dfunc 
+-------
+     1
+(1 row)
+
+-- need DO to protect the -- from psql
+do $$
+  declare r integer;
+  begin
+    select dfunc(a=>-- comment
+      1) into r;
+    raise info 'r = %', r;
+  end;
+$$;
+INFO:  r = 1
 -- check reverse-listing of named-arg calls
 CREATE VIEW dfview AS
    SELECT q1, q2,
index 625e9b97485160c83ad9e8f8fe9ce670474aa025..8b6fd0bb43d62764ce1add88e239b324d1ae1383 100644 (file)
@@ -45,6 +45,37 @@ CREATE OPERATOR => (
    procedure = numeric_fac
 );
 
+-- lexing of <=, >=, <>, != has a number of edge cases
+-- (=> is tested elsewhere)
+
+-- this is legal because ! is not allowed in sql ops
+CREATE OPERATOR !=- (
+   leftarg = int8,             -- right unary
+   procedure = numeric_fac
+);
+SELECT 2 !=-;
+-- make sure lexer returns != as <> even in edge cases
+SELECT 2 !=/**/ 1, 2 !=/**/ 2;
+SELECT 2 !=-- comment to be removed by psql
+  1;
+DO $$ -- use DO to protect -- from psql
+  declare r boolean;
+  begin
+    execute $e$ select 2 !=-- comment
+      1 $e$ into r;
+    raise info 'r = %', r;
+  end;
+$$;
+
+-- check that <= etc. followed by more operator characters are returned
+-- as the correct token with correct precedence
+SELECT true<>-1 BETWEEN 1 AND 1;  -- BETWEEN has prec. above <> but below Op
+SELECT false<>/**/1 BETWEEN 1 AND 1;
+SELECT false<=-1 BETWEEN 1 AND 1;
+SELECT false>=-1 BETWEEN 1 AND 1;
+SELECT 2<=/**/3, 3>=/**/2, 2<>/**/3;
+SELECT 3<=/**/2, 2>=/**/3, 2<>/**/2;
+
 -- Should fail. CREATE OPERATOR requires USAGE on SCHEMA
 BEGIN TRANSACTION;
 CREATE ROLE regress_rol_op1;
index 2f65f0f97d6414ed7775a358a868e6b0d3fa83a9..03606671d92b757533208e06ac4792ffa5567f9d 100644 (file)
@@ -785,6 +785,21 @@ select dfunc('a'::text, 'b', flag => false); -- mixed notation
 select dfunc('a'::text, 'b', true); -- full positional notation
 select dfunc('a'::text, 'b', flag => true); -- mixed notation
 
+-- this tests lexer edge cases around =>
+select dfunc(a =>-1);
+select dfunc(a =>+1);
+select dfunc(a =>/**/1);
+select dfunc(a =>--comment to be removed by psql
+  1);
+-- need DO to protect the -- from psql
+do $$
+  declare r integer;
+  begin
+    select dfunc(a=>-- comment
+      1) into r;
+    raise info 'r = %', r;
+  end;
+$$;
 
 -- check reverse-listing of named-arg calls
 CREATE VIEW dfview AS